[optimizing] Use callee-save registers for x86
Add ESI, EDI, EBP to available registers for non-baseline mode. Ensure
that they aren't used when byte addressible registers are needed.
Change-Id: Ie7130d4084c2ae9cfcd1e47c26eb3e5dcac1ebd6
Signed-off-by: Mark Mendell <mark.p.mendell@intel.com>
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 3c8f62c..87efa6c 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -37,14 +37,13 @@
static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX };
static constexpr size_t kRuntimeParameterCoreRegistersLength =
arraysize(kRuntimeParameterCoreRegisters);
+static constexpr Register kCoreCalleeSaves[] = { EBP, ESI, EDI };
static constexpr XmmRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1, XMM2, XMM3 };
static constexpr size_t kRuntimeParameterFpuRegistersLength =
arraysize(kRuntimeParameterFpuRegisters);
static constexpr int kC2ConditionMask = 0x400;
-// Marker for places that can be updated once we don't follow the quick ABI.
-static constexpr bool kFollowsQuickABI = true;
static constexpr int kFakeReturnRegister = Register(8);
class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> {
@@ -371,8 +370,15 @@
}
CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options)
- : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters,
- kNumberOfRegisterPairs, (1 << kFakeReturnRegister), 0, compiler_options),
+ : CodeGenerator(graph,
+ kNumberOfCpuRegisters,
+ kNumberOfXmmRegisters,
+ kNumberOfRegisterPairs,
+ ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
+ arraysize(kCoreCalleeSaves))
+ | (1 << kFakeReturnRegister),
+ 0,
+ compiler_options),
block_labels_(graph->GetArena(), 0),
location_builder_(graph, this),
instruction_visitor_(graph, this),
@@ -427,18 +433,18 @@
return Location();
}
-void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline ATTRIBUTE_UNUSED) const {
+void CodeGeneratorX86::SetupBlockedRegisters(bool is_baseline) const {
// Don't allocate the dalvik style register pair passing.
blocked_register_pairs_[ECX_EDX] = true;
// Stack register is always reserved.
blocked_core_registers_[ESP] = true;
- // TODO: We currently don't use Quick's callee saved registers.
- DCHECK(kFollowsQuickABI);
- blocked_core_registers_[EBP] = true;
- blocked_core_registers_[ESI] = true;
- blocked_core_registers_[EDI] = true;
+ if (is_baseline) {
+ blocked_core_registers_[EBP] = true;
+ blocked_core_registers_[ESI] = true;
+ blocked_core_registers_[EDI] = true;
+ }
UpdateBlockedPairRegisters();
}
@@ -470,15 +476,33 @@
RecordPcInfo(nullptr, 0);
}
- if (!HasEmptyFrame()) {
- __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
- __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
+ if (HasEmptyFrame()) {
+ return;
}
+
+ for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ __ pushl(reg);
+ }
+ }
+
+ __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ __ movl(Address(ESP, kCurrentMethodStackOffset), EAX);
}
void CodeGeneratorX86::GenerateFrameExit() {
- if (!HasEmptyFrame()) {
- __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+ if (HasEmptyFrame()) {
+ return;
+ }
+
+ __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize()));
+
+ for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ __ popl(reg);
+ }
}
}
@@ -907,7 +931,8 @@
locations->SetInAt(0, Location::RequiresRegister());
locations->SetInAt(1, Location::Any());
if (comp->NeedsMaterialization()) {
- locations->SetOut(Location::RequiresRegister());
+ // We need a byte register.
+ locations->SetOut(Location::RegisterLocation(ECX));
}
}
@@ -1345,8 +1370,10 @@
case Primitive::kPrimInt:
case Primitive::kPrimChar:
// Processing a Dex `int-to-byte' instruction.
- locations->SetInAt(0, Location::Any());
- locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+ locations->SetInAt(0, Location::ByteRegisterOrConstant(ECX, conversion->InputAt(0)));
+ // Make the output overlap to please the register allocator. This greatly simplifies
+ // the validation of the linear scan implementation
+ locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
break;
default:
@@ -3161,15 +3188,16 @@
}
void LocationsBuilderX86::VisitArraySet(HArraySet* instruction) {
+ // This location builder might end up asking to up to four registers, which is
+ // not currently possible for baseline. The situation in which we need four
+ // registers cannot be met by baseline though, because it has not run any
+ // optimization.
+
Primitive::Type value_type = instruction->GetComponentType();
bool needs_write_barrier =
CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
- DCHECK(kFollowsQuickABI);
- bool not_enough_registers = needs_write_barrier
- && !instruction->GetValue()->IsConstant()
- && !instruction->GetIndex()->IsConstant();
- bool needs_runtime_call = instruction->NeedsTypeCheck() || not_enough_registers;
+ bool needs_runtime_call = instruction->NeedsTypeCheck();
LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
instruction,