Revert "Don't block quick callee saved registers for optimizing."
X64 has one libcore test failing, and codegen_test on
arm is failing.
This reverts commit 6004796d6c630696127df2494dcd4f30d1367a34.
Change-Id: I20e00431fa18e11ce4c0cb6fffa91977fa8e9b4f
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 5334031..461409d 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -177,31 +177,6 @@
}
}
-void CodeGenerator::MaybeBlockPhysicalRegisters(Location loc, bool is_output) const {
- // The DCHECKS below check that a register is not specified twice in
- // the summary.
- // Note that fixed output registers are allowed to overlap with fixed input and
- // temp registers: the writer of the location summary has to make sure they
- // don't conflict with each other.
- if (loc.IsRegister()) {
- DCHECK(is_output || !blocked_core_registers_[loc.reg()]);
- blocked_core_registers_[loc.reg()] = true;
- } else if (loc.IsFpuRegister()) {
- DCHECK(is_output || !blocked_fpu_registers_[loc.reg()]);
- blocked_fpu_registers_[loc.reg()] = true;
- } else if (loc.IsFpuRegisterPair()) {
- DCHECK(is_output || !blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()]);
- blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()] = true;
- DCHECK(is_output || !blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()]);
- blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()] = true;
- } else if (loc.IsRegisterPair()) {
- DCHECK(is_output || !blocked_core_registers_[loc.AsRegisterPairLow<int>()]);
- blocked_core_registers_[loc.AsRegisterPairLow<int>()] = true;
- DCHECK(is_output || !blocked_core_registers_[loc.AsRegisterPairHigh<int>()]);
- blocked_core_registers_[loc.AsRegisterPairHigh<int>()] = true;
- }
-}
-
void CodeGenerator::AllocateRegistersLocally(HInstruction* instruction) const {
LocationSummary* locations = instruction->GetLocations();
if (locations == nullptr) return;
@@ -221,18 +196,43 @@
// Mark all fixed input, temp and output registers as used.
for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
Location loc = locations->InAt(i);
- MaybeBlockPhysicalRegisters(loc, false);
+ // The DCHECKS below check that a register is not specified twice in
+ // the summary.
+ if (loc.IsRegister()) {
+ DCHECK(!blocked_core_registers_[loc.reg()]);
+ blocked_core_registers_[loc.reg()] = true;
+ } else if (loc.IsFpuRegister()) {
+ DCHECK(!blocked_fpu_registers_[loc.reg()]);
+ blocked_fpu_registers_[loc.reg()] = true;
+ } else if (loc.IsFpuRegisterPair()) {
+ DCHECK(!blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()]);
+ blocked_fpu_registers_[loc.AsFpuRegisterPairLow<int>()] = true;
+ DCHECK(!blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()]);
+ blocked_fpu_registers_[loc.AsFpuRegisterPairHigh<int>()] = true;
+ } else if (loc.IsRegisterPair()) {
+ DCHECK(!blocked_core_registers_[loc.AsRegisterPairLow<int>()]);
+ blocked_core_registers_[loc.AsRegisterPairLow<int>()] = true;
+ DCHECK(!blocked_core_registers_[loc.AsRegisterPairHigh<int>()]);
+ blocked_core_registers_[loc.AsRegisterPairHigh<int>()] = true;
+ }
}
for (size_t i = 0, e = locations->GetTempCount(); i < e; ++i) {
Location loc = locations->GetTemp(i);
- MaybeBlockPhysicalRegisters(loc, false);
+ // The DCHECKS below check that a register is not specified twice in
+ // the summary.
+ if (loc.IsRegister()) {
+ DCHECK(!blocked_core_registers_[loc.reg()]);
+ blocked_core_registers_[loc.reg()] = true;
+ } else if (loc.IsFpuRegister()) {
+ DCHECK(!blocked_fpu_registers_[loc.reg()]);
+ blocked_fpu_registers_[loc.reg()] = true;
+ } else {
+ DCHECK(loc.GetPolicy() == Location::kRequiresRegister
+ || loc.GetPolicy() == Location::kRequiresFpuRegister);
+ }
}
- // If the output is a fixed register, mark it as used.
- Location result_location = locations->Out();
- MaybeBlockPhysicalRegisters(result_location, true);
-
SetupBlockedRegisters();
// Allocate all unallocated input locations.
@@ -276,11 +276,9 @@
<< loc.GetPolicy();
}
locations->SetTempAt(i, loc);
- } else {
- DCHECK(loc.IsFpuRegister() || loc.IsRegister());
}
}
-
+ Location result_location = locations->Out();
if (result_location.IsUnallocated()) {
switch (result_location.GetPolicy()) {
case Location::kAny:
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index e03bc53..1d42c47 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -211,7 +211,6 @@
// Register allocation logic.
void AllocateRegistersLocally(HInstruction* instruction) const;
- void MaybeBlockPhysicalRegisters(Location loc, bool is_output) const;
// Backend specific implementation for allocating a register.
virtual Location AllocateFreeRegister(Primitive::Type type) const = 0;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index f43449d..cbe5f0c 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -38,7 +38,7 @@
static constexpr bool kExplicitStackOverflowCheck = false;
-static constexpr int kNumberOfPushedRegistersAtEntry = 1; // LR.
+static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7
static constexpr int kCurrentMethodStackOffset = 0;
static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 };
@@ -444,16 +444,21 @@
blocked_core_registers_[LR] = true;
blocked_core_registers_[PC] = true;
- // Reserve frame pointer register.
- blocked_core_registers_[FP] = true;
-
// Reserve thread register.
blocked_core_registers_[TR] = true;
// Reserve temp register.
blocked_core_registers_[IP] = true;
- // Reserve hard-float's callee saved registers.
+ // TODO: We currently don't use Quick's callee saved registers.
+ // We always save and restore R6 and R7 to make sure we can use three
+ // register pairs for long operations.
+ blocked_core_registers_[R4] = true;
+ blocked_core_registers_[R5] = true;
+ blocked_core_registers_[R8] = true;
+ blocked_core_registers_[R10] = true;
+ blocked_core_registers_[R11] = true;
+
blocked_fpu_registers_[S16] = true;
blocked_fpu_registers_[S17] = true;
blocked_fpu_registers_[S18] = true;
@@ -508,8 +513,8 @@
}
}
- core_spill_mask_ |= (1 << LR);
- __ Push(LR);
+ core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7);
+ __ PushList(1 << LR | 1 << R6 | 1 << R7);
// The return PC has already been pushed on the stack.
__ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize));
@@ -518,7 +523,7 @@
void CodeGeneratorARM::GenerateFrameExit() {
__ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize);
- __ Pop(PC);
+ __ PopList(1 << PC | 1 << R6 | 1 << R7);
}
void CodeGeneratorARM::Bind(HBasicBlock* block) {
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index b2bb875..b048c07 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -716,8 +716,11 @@
// tr
// lr
// sp is not part of the allocatable registers, so we don't need to block it.
+ // TODO: Avoid blocking callee-saved registers, and instead preserve them
+ // where necessary.
CPURegList reserved_core_registers = vixl_reserved_core_registers;
reserved_core_registers.Combine(runtime_reserved_core_registers);
+ reserved_core_registers.Combine(quick_callee_saved_registers);
while (!reserved_core_registers.IsEmpty()) {
blocked_core_registers_[reserved_core_registers.PopLowestIndex().code()] = true;
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 38e2965..1d5bfb7 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -48,6 +48,9 @@
const vixl::CPURegList vixl_reserved_core_registers(vixl::ip0, vixl::ip1);
const vixl::CPURegList vixl_reserved_fp_registers(vixl::d31);
const vixl::CPURegList runtime_reserved_core_registers(tr, vixl::lr);
+const vixl::CPURegList quick_callee_saved_registers(vixl::CPURegister::kRegister,
+ vixl::kXRegSize,
+ kArm64CalleeSaveRefSpills);
Location ARM64ReturnLocation(Primitive::Type return_type);
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 18b1b32..4757235 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -42,12 +42,6 @@
static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { };
static constexpr size_t kRuntimeParameterFpuRegistersLength = 0;
-static constexpr Register kByteRegisters[] = { EAX, ECX, EDX, EBX };
-
-static ByteRegister ToByteRegister(Register reg) {
- return X86ManagedRegister::FromCpuRegister(reg).AsByteRegister();
-}
-
// Marker for places that can be updated once we don't follow the quick ABI.
static constexpr bool kFollowsQuickABI = true;
@@ -443,8 +437,11 @@
// Stack register is always reserved.
blocked_core_registers_[ESP] = true;
- // Frame register is always reserved.
+ // TODO: We currently don't use Quick's callee saved registers.
+ DCHECK(kFollowsQuickABI);
blocked_core_registers_[EBP] = true;
+ blocked_core_registers_[ESI] = true;
+ blocked_core_registers_[EDI] = true;
UpdateBlockedPairRegisters();
}
@@ -932,7 +929,7 @@
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
if (comp->NeedsMaterialization()) {
- locations->SetOut(Location::RegisterLocation(kByteRegisters[0]));
+ locations->SetOut(Location::RequiresRegister());
}
}
@@ -953,7 +950,7 @@
__ cmpl(locations->InAt(0).AsRegister<Register>(),
Address(ESP, locations->InAt(1).GetStackIndex()));
}
- __ setb(X86Condition(comp->GetCondition()), ToByteRegister(reg));
+ __ setb(X86Condition(comp->GetCondition()), reg);
}
}
@@ -1168,11 +1165,11 @@
case Primitive::kPrimShort:
case Primitive::kPrimInt:
case Primitive::kPrimNot:
- locations->SetOut(Location::RegisterLocation(EAX), Location::kNoOutputOverlap);
+ locations->SetOut(Location::RegisterLocation(EAX));
break;
case Primitive::kPrimLong:
- locations->SetOut(Location::RegisterPairLocation(EAX, EDX), Location::kNoOutputOverlap);
+ locations->SetOut(Location::RegisterPairLocation(EAX, EDX));
break;
case Primitive::kPrimVoid:
@@ -1180,7 +1177,7 @@
case Primitive::kPrimDouble:
case Primitive::kPrimFloat:
- locations->SetOut(Location::FpuRegisterLocation(XMM0), Location::kNoOutputOverlap);
+ locations->SetOut(Location::FpuRegisterLocation(XMM0));
break;
}
@@ -1350,7 +1347,7 @@
case Primitive::kPrimInt:
case Primitive::kPrimChar:
// Processing a Dex `int-to-byte' instruction.
- locations->SetInAt(0, Location::RegisterLocation(kByteRegisters[0]));
+ locations->SetInAt(0, Location::Any());
locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
break;
@@ -1545,7 +1542,15 @@
case Primitive::kPrimInt:
case Primitive::kPrimChar:
// Processing a Dex `int-to-byte' instruction.
- __ movsxb(out.AsRegister<Register>(), ToByteRegister(in.AsRegister<Register>()));
+ if (in.IsRegister()) {
+ __ movsxb(out.AsRegister<Register>(), in.AsRegister<ByteRegister>());
+ } else if (in.IsStackSlot()) {
+ __ movsxb(out.AsRegister<Register>(), Address(ESP, in.GetStackIndex()));
+ } else {
+ DCHECK(in.GetConstant()->IsIntConstant());
+ int32_t value = in.GetConstant()->AsIntConstant()->GetValue();
+ __ movl(out.AsRegister<Register>(), Immediate(static_cast<int8_t>(value)));
+ }
break;
default:
@@ -2663,16 +2668,17 @@
|| (field_type == Primitive::kPrimByte);
// The register allocator does not support multiple
// inputs that die at entry with one in a specific register.
- size_t byte_register_index = 0;
if (is_byte_type) {
- locations->SetInAt(1, Location::RegisterLocation(kByteRegisters[byte_register_index++]));
+ // Ensure the value is in a byte register.
+ locations->SetInAt(1, Location::RegisterLocation(EAX));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
// Temporary registers for the write barrier.
if (needs_write_barrier) {
locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RegisterLocation(kByteRegisters[byte_register_index]));
+ // Ensure the card is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
}
}
@@ -2685,7 +2691,7 @@
switch (field_type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
- ByteRegister value = ToByteRegister(locations->InAt(1).AsRegister<Register>());
+ ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>();
__ movb(Address(obj, offset), value);
break;
}
@@ -2735,17 +2741,15 @@
}
}
-void CodeGeneratorX86::MarkGCCard(Register temp,
- Register card,
- Register object,
- Register value) {
+void CodeGeneratorX86::MarkGCCard(Register temp, Register card, Register object, Register value) {
Label is_null;
__ testl(value, value);
__ j(kEqual, &is_null);
__ fs()->movl(card, Address::Absolute(Thread::CardTableOffset<kX86WordSize>().Int32Value()));
__ movl(temp, object);
__ shrl(temp, Immediate(gc::accounting::CardTable::kCardShift));
- __ movb(Address(temp, card, TIMES_1, 0), ToByteRegister(card));
+ __ movb(Address(temp, card, TIMES_1, 0),
+ X86ManagedRegister::FromCpuRegister(card).AsByteRegister());
__ Bind(&is_null);
}
@@ -2976,17 +2980,17 @@
// inputs that die at entry with one in a specific register.
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
- size_t byte_register_index = 0;
if (is_byte_type) {
- locations->SetInAt(2, Location::ByteRegisterOrConstant(
- kByteRegisters[byte_register_index++], instruction->InputAt(2)));
+ // Ensure the value is in a byte register.
+ locations->SetInAt(2, Location::ByteRegisterOrConstant(EAX, instruction->InputAt(2)));
} else {
locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
}
// Temporary registers for the write barrier.
if (needs_write_barrier) {
locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RegisterLocation(kByteRegisters[byte_register_index]));
+ // Ensure the card is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
}
}
}
@@ -3008,7 +3012,7 @@
if (index.IsConstant()) {
size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_1) + data_offset;
if (value.IsRegister()) {
- __ movb(Address(obj, offset), ToByteRegister(value.AsRegister<Register>()));
+ __ movb(Address(obj, offset), value.AsRegister<ByteRegister>());
} else {
__ movb(Address(obj, offset),
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
@@ -3016,7 +3020,7 @@
} else {
if (value.IsRegister()) {
__ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
- ToByteRegister(value.AsRegister<Register>()));
+ value.AsRegister<ByteRegister>());
} else {
__ movb(Address(obj, index.AsRegister<Register>(), TIMES_1, data_offset),
Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
@@ -3459,16 +3463,17 @@
|| (field_type == Primitive::kPrimByte);
// The register allocator does not support multiple
// inputs that die at entry with one in a specific register.
- size_t byte_register_index = 0;
if (is_byte_type) {
- locations->SetInAt(1, Location::RegisterLocation(kByteRegisters[byte_register_index++]));
+ // Ensure the value is in a byte register.
+ locations->SetInAt(1, Location::RegisterLocation(EAX));
} else {
locations->SetInAt(1, Location::RequiresRegister());
}
// Temporary registers for the write barrier.
if (needs_write_barrier) {
locations->AddTemp(Location::RequiresRegister());
- locations->AddTemp(Location::RegisterLocation(kByteRegisters[byte_register_index]));
+ // Ensure the card is in a byte register.
+ locations->AddTemp(Location::RegisterLocation(ECX));
}
}
@@ -3481,7 +3486,7 @@
switch (field_type) {
case Primitive::kPrimBoolean:
case Primitive::kPrimByte: {
- ByteRegister value = ToByteRegister(locations->InAt(1).AsRegister<Register>());
+ ByteRegister value = locations->InAt(1).AsRegister<ByteRegister>();
__ movb(Address(cls, offset), value);
break;
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 854e93b..f8651f6 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -453,8 +453,18 @@
// Block the register used as TMP.
blocked_core_registers_[TMP] = true;
- // Block the frame pointer.
+ // TODO: We currently don't use Quick's callee saved registers.
+ blocked_core_registers_[RBX] = true;
blocked_core_registers_[RBP] = true;
+ blocked_core_registers_[R12] = true;
+ blocked_core_registers_[R13] = true;
+ blocked_core_registers_[R14] = true;
+ blocked_core_registers_[R15] = true;
+
+ blocked_fpu_registers_[XMM12] = true;
+ blocked_fpu_registers_[XMM13] = true;
+ blocked_fpu_registers_[XMM14] = true;
+ blocked_fpu_registers_[XMM15] = true;
}
void CodeGeneratorX86_64::GenerateFrameEntry() {