diff options
| author | 2015-01-22 13:50:01 +0000 | |
|---|---|---|
| committer | 2015-01-23 09:20:42 +0000 | |
| commit | d97dc40d186aec46bfd318b6a2026a98241d7e9c (patch) | |
| tree | 5cf0257eda25e2722a1adafb9de22690c06a56d8 /compiler/optimizing | |
| parent | c698b78a17043d8898deb817098181595fbe734e (diff) | |
Support callee save floating point registers on x64.
- Share the computation of core_spill_mask and fpu_spill_mask
between backends.
- Remove explicit stack overflow check support: we need to adjust
them and since they are not tested, they will easily bitrot.
Change-Id: I0b619b8de4e1bdb169ea1ae7c6ede8df0d65837a
Diffstat (limited to 'compiler/optimizing')
| -rw-r--r-- | compiler/optimizing/code_generator.cc | 3 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator.h | 23 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.cc | 55 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 50 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 5 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 48 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 103 | ||||
| -rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 2 | ||||
| -rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 4 |
11 files changed, 109 insertions, 188 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index 0af70f9b90..43fd8bb668 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -140,6 +140,9 @@ void CodeGenerator::ComputeFrameSize(size_t number_of_spill_slots, size_t maximum_number_of_live_core_registers, size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots) { + core_spill_mask_ = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; + DCHECK_NE(core_spill_mask_, 0u) << "At least the return address register must be saved"; + fpu_spill_mask_ = allocated_registers_.GetFloatingPointRegisters() & fpu_callee_save_mask_; first_register_slot_in_slow_path_ = (number_of_out_slots + number_of_spill_slots) * kVRegSize; SetFrameSize(RoundUp( diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 16080a47eb..85d18c0b43 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -117,13 +117,13 @@ class CodeGenerator { size_t maximum_number_of_live_core_registers, size_t maximum_number_of_live_fp_registers, size_t number_of_out_slots); - virtual size_t FrameEntrySpillSize() const = 0; int32_t GetStackSlot(HLocal* local) const; Location GetTemporaryLocation(HTemporary* temp) const; uint32_t GetFrameSize() const { return frame_size_; } void SetFrameSize(uint32_t size) { frame_size_ = size; } uint32_t GetCoreSpillMask() const { return core_spill_mask_; } + uint32_t GetFpuSpillMask() const { return fpu_spill_mask_; } size_t GetNumberOfCoreRegisters() const { return number_of_core_registers_; } size_t GetNumberOfFloatingPointRegisters() const { return number_of_fpu_registers_; } @@ -225,6 +225,7 @@ class CodeGenerator { const CompilerOptions& compiler_options) : frame_size_(kUninitializedFrameSize), core_spill_mask_(0), + fpu_spill_mask_(0), first_register_slot_in_slow_path_(0), blocked_core_registers_(graph->GetArena()->AllocArray<bool>(number_of_core_registers)), blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers)), @@ -254,9 +255,29 @@ class CodeGenerator { virtual ParallelMoveResolver* GetMoveResolver() = 0; + // Returns the location of the first spilled entry for floating point registers, + // relative to the stack pointer. + uint32_t GetFpuSpillStart() const { + DCHECK_NE(frame_size_, kUninitializedFrameSize); + return GetFrameSize() - FrameEntrySpillSize(); + } + + uint32_t GetFpuSpillSize() const { + return POPCOUNT(fpu_spill_mask_) * GetFloatingPointSpillSlotSize(); + } + + uint32_t GetCoreSpillSize() const { + return POPCOUNT(core_spill_mask_) * GetWordSize(); + } + + uint32_t FrameEntrySpillSize() const { + return GetFpuSpillSize() + GetCoreSpillSize(); + } + // Frame size required for this method. uint32_t frame_size_; uint32_t core_spill_mask_; + uint32_t fpu_spill_mask_; uint32_t first_register_slot_in_slow_path_; // Registers that were allocated during linear scan. diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc index bc8858bc8c..f4e4f5a74a 100644 --- a/compiler/optimizing/code_generator_arm.cc +++ b/compiler/optimizing/code_generator_arm.cc @@ -42,7 +42,6 @@ static bool ExpectedPairLayout(Location location) { return ((location.low() & 1) == 0) && (location.low() + 1 == location.high()); } -static constexpr int kNumberOfPushedRegistersAtEntry = 1 + 2; // LR, R6, R7 static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { R0, R1, R2, R3 }; @@ -113,20 +112,6 @@ class DivZeroCheckSlowPathARM : public SlowPathCodeARM { DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARM); }; -class StackOverflowCheckSlowPathARM : public SlowPathCodeARM { - public: - StackOverflowCheckSlowPathARM() {} - - void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - __ Bind(GetEntryLabel()); - __ LoadFromOffset(kLoadWord, PC, TR, - QUICK_ENTRYPOINT_OFFSET(kArmWordSize, pThrowStackOverflow).Int32Value()); - } - - private: - DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM); -}; - class SuspendCheckSlowPathARM : public SlowPathCodeARM { public: SuspendCheckSlowPathARM(HSuspendCheck* instruction, HBasicBlock* successor) @@ -390,16 +375,19 @@ CodeGeneratorARM::CodeGeneratorARM(HGraph* graph, const ArmInstructionSetFeatures& isa_features, const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCoreRegisters, kNumberOfSRegisters, - kNumberOfRegisterPairs, 0, 0, compiler_options), + kNumberOfRegisterPairs, (1 << R6) | (1 << R7) | (1 << LR), 0, compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), move_resolver_(graph->GetArena(), this), assembler_(true), - isa_features_(isa_features) {} - -size_t CodeGeneratorARM::FrameEntrySpillSize() const { - return kNumberOfPushedRegistersAtEntry * kArmWordSize; + isa_features_(isa_features) { + // We unconditionally allocate R6 and R7 to ensure we can do long operations + // with baseline. + AddAllocatedRegister(Location::RegisterLocation(R6)); + AddAllocatedRegister(Location::RegisterLocation(R7)); + // Save the link register to mimic Quick. + AddAllocatedRegister(Location::RegisterLocation(LR)); } Location CodeGeneratorARM::AllocateFreeRegister(Primitive::Type type) const { @@ -516,32 +504,21 @@ InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGene void CodeGeneratorARM::GenerateFrameEntry() { bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); if (!skip_overflow_check) { - if (GetCompilerOptions().GetImplicitStackOverflowChecks()) { - __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); - __ LoadFromOffset(kLoadWord, IP, IP, 0); - RecordPcInfo(nullptr, 0); - } else { - SlowPathCodeARM* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM(); - AddSlowPath(slow_path); - - __ LoadFromOffset(kLoadWord, IP, TR, Thread::StackEndOffset<kArmWordSize>().Int32Value()); - __ cmp(SP, ShifterOperand(IP)); - __ b(slow_path->GetEntryLabel(), CC); - } + __ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm))); + __ LoadFromOffset(kLoadWord, IP, IP, 0); + RecordPcInfo(nullptr, 0); } - core_spill_mask_ |= (1 << LR | 1 << R6 | 1 << R7); - __ PushList(1 << LR | 1 << R6 | 1 << R7); - - // The return PC has already been pushed on the stack. - __ AddConstant(SP, -(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize)); + __ PushList(core_spill_mask_); + __ AddConstant(SP, -(GetFrameSize() - FrameEntrySpillSize())); __ StoreToOffset(kStoreWord, R0, SP, 0); } void CodeGeneratorARM::GenerateFrameExit() { - __ AddConstant(SP, GetFrameSize() - kNumberOfPushedRegistersAtEntry * kArmWordSize); - __ PopList(1 << PC | 1 << R6 | 1 << R7); + __ AddConstant(SP, GetFrameSize() - FrameEntrySpillSize()); + __ PopList((core_spill_mask_ & (~(1 << LR))) | 1 << PC); } void CodeGeneratorARM::Bind(HBasicBlock* block) { diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h index f3b1ff5edc..46accfdaf0 100644 --- a/compiler/optimizing/code_generator_arm.h +++ b/compiler/optimizing/code_generator_arm.h @@ -179,8 +179,6 @@ class CodeGeneratorARM : public CodeGenerator { return kArmWordSize; } - size_t FrameEntrySpillSize() const OVERRIDE; - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 21c1e9c5fc..1f561b725a 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -434,21 +434,6 @@ class NullCheckSlowPathARM64 : public SlowPathCodeARM64 { DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARM64); }; -class StackOverflowCheckSlowPathARM64 : public SlowPathCodeARM64 { - public: - StackOverflowCheckSlowPathARM64() {} - - virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen); - __ Bind(GetEntryLabel()); - arm64_codegen->InvokeRuntime(QUICK_ENTRY_POINT(pThrowStackOverflow), nullptr, 0); - CheckEntrypointTypes<kQuickThrowStackOverflow, void, void*>(); - } - - private: - DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathARM64); -}; - class SuspendCheckSlowPathARM64 : public SlowPathCodeARM64 { public: explicit SuspendCheckSlowPathARM64(HSuspendCheck* instruction, @@ -567,13 +552,16 @@ CodeGeneratorARM64::CodeGeneratorARM64(HGraph* graph, const CompilerOptions& com kNumberOfAllocatableRegisters, kNumberOfAllocatableFPRegisters, kNumberOfAllocatableRegisterPairs, - 0, + (1 << LR), 0, compiler_options), block_labels_(nullptr), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) {} + move_resolver_(graph->GetArena(), this) { + // Save the link register (containing the return address) to mimic Quick. + AddAllocatedRegister(Location::RegisterLocation(LR)); +} #undef __ #define __ GetVIXLAssembler()-> @@ -607,26 +595,15 @@ void CodeGeneratorARM64::GenerateFrameEntry() { if (do_overflow_check) { UseScratchRegisterScope temps(GetVIXLAssembler()); Register temp = temps.AcquireX(); - if (GetCompilerOptions().GetImplicitStackOverflowChecks()) { - __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); - __ Ldr(wzr, MemOperand(temp, 0)); - RecordPcInfo(nullptr, 0); - } else { - SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathARM64(); - AddSlowPath(slow_path); - - __ Ldr(temp, MemOperand(tr, Thread::StackEndOffset<kArm64WordSize>().Int32Value())); - __ Cmp(sp, temp); - __ B(lo, slow_path->GetEntryLabel()); - } + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + __ Add(temp, sp, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm64))); + __ Ldr(wzr, MemOperand(temp, 0)); + RecordPcInfo(nullptr, 0); } - CPURegList preserved_regs = GetFramePreservedRegisters(); int frame_size = GetFrameSize(); - core_spill_mask_ |= preserved_regs.list(); - __ Str(w0, MemOperand(sp, -frame_size, PreIndex)); - __ PokeCPURegList(preserved_regs, frame_size - preserved_regs.TotalSizeInBytes()); + __ PokeCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); // Stack layout: // sp[frame_size - 8] : lr. @@ -638,8 +615,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() { void CodeGeneratorARM64::GenerateFrameExit() { int frame_size = GetFrameSize(); - CPURegList preserved_regs = GetFramePreservedRegisters(); - __ PeekCPURegList(preserved_regs, frame_size - preserved_regs.TotalSizeInBytes()); + __ PeekCPURegList(GetFramePreservedRegisters(), frame_size - FrameEntrySpillSize()); __ Drop(frame_size); } @@ -690,10 +666,6 @@ void CodeGeneratorARM64::Move(HInstruction* instruction, } } -size_t CodeGeneratorARM64::FrameEntrySpillSize() const { - return GetFramePreservedRegistersSize(); -} - Location CodeGeneratorARM64::GetStackLocation(HLoadLocal* load) const { Primitive::Type type = load->GetType(); diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index d81e481c4e..96013e55c6 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -178,9 +178,6 @@ class CodeGeneratorARM64 : public CodeGenerator { vixl::CPURegList(vixl::CPURegister::kRegister, vixl::kXRegSize, vixl::lr.Bit()); return frame_preserved_regs; } - static int GetFramePreservedRegistersSize() { - return GetFramePreservedRegisters().TotalSizeInBytes(); - } void Bind(HBasicBlock* block) OVERRIDE; @@ -205,8 +202,6 @@ class CodeGeneratorARM64 : public CodeGenerator { return block_entry_label->location(); } - size_t FrameEntrySpillSize() const OVERRIDE; - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; } Arm64Assembler* GetAssembler() OVERRIDE { return &assembler_; } diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 9e26ddd80d..c0fdcaa8aa 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -31,7 +31,6 @@ namespace art { namespace x86 { -static constexpr int kNumberOfPushedRegistersAtEntry = 1; static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { EAX, ECX, EDX, EBX }; @@ -44,6 +43,7 @@ static constexpr int kC2ConditionMask = 0x400; // Marker for places that can be updated once we don't follow the quick ABI. static constexpr bool kFollowsQuickABI = true; +static constexpr int kFakeReturnRegister = Register(8); class InvokeRuntimeCallingConvention : public CallingConvention<Register, XmmRegister> { public: @@ -123,21 +123,6 @@ class DivRemMinusOneSlowPathX86 : public SlowPathCodeX86 { DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86); }; -class StackOverflowCheckSlowPathX86 : public SlowPathCodeX86 { - public: - StackOverflowCheckSlowPathX86() {} - - virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - __ Bind(GetEntryLabel()); - __ addl(ESP, - Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); - __ fs()->jmp(Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86WordSize, pThrowStackOverflow))); - } - - private: - DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86); -}; - class BoundsCheckSlowPathX86 : public SlowPathCodeX86 { public: BoundsCheckSlowPathX86(HBoundsCheck* instruction, @@ -375,14 +360,13 @@ size_t CodeGeneratorX86::RestoreCoreRegister(size_t stack_index, uint32_t reg_id CodeGeneratorX86::CodeGeneratorX86(HGraph* graph, const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfXmmRegisters, - kNumberOfRegisterPairs, 0, 0, compiler_options), + kNumberOfRegisterPairs, (1 << kFakeReturnRegister), 0, compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) {} - -size_t CodeGeneratorX86::FrameEntrySpillSize() const { - return kNumberOfPushedRegistersAtEntry * kX86WordSize; + move_resolver_(graph->GetArena(), this) { + // Use a fake return address register to mimic Quick. + AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); } Location CodeGeneratorX86::AllocateFreeRegister(Primitive::Type type) const { @@ -464,35 +448,21 @@ InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGene codegen_(codegen) {} void CodeGeneratorX86::GenerateFrameEntry() { - // Create a fake register to mimic Quick. - static const int kFakeReturnRegister = 8; - core_spill_mask_ |= (1 << kFakeReturnRegister); - bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); - bool implicitStackOverflowChecks = GetCompilerOptions().GetImplicitStackOverflowChecks(); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - if (!skip_overflow_check && implicitStackOverflowChecks) { + if (!skip_overflow_check) { __ testl(EAX, Address(ESP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86)))); RecordPcInfo(nullptr, 0); } - // The return PC has already been pushed on the stack. - __ subl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); - - if (!skip_overflow_check && !implicitStackOverflowChecks) { - SlowPathCodeX86* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86(); - AddSlowPath(slow_path); - - __ fs()->cmpl(ESP, Address::Absolute(Thread::StackEndOffset<kX86WordSize>())); - __ j(kLess, slow_path->GetEntryLabel()); - } - + __ subl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); __ movl(Address(ESP, kCurrentMethodStackOffset), EAX); } void CodeGeneratorX86::GenerateFrameExit() { - __ addl(ESP, Immediate(GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86WordSize)); + __ addl(ESP, Immediate(GetFrameSize() - FrameEntrySpillSize())); } void CodeGeneratorX86::Bind(HBasicBlock* block) { diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index dcfeb2fb99..73b647c1c4 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -178,8 +178,6 @@ class CodeGeneratorX86 : public CodeGenerator { return 2 * kX86WordSize; } - size_t FrameEntrySpillSize() const OVERRIDE; - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 3d99695449..e60f8a5690 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -37,7 +37,6 @@ namespace x86_64 { // Some x86_64 instructions require a register to be available as temp. static constexpr Register TMP = R11; -static constexpr int kNumberOfPushedRegistersAtEntry = 1; static constexpr int kCurrentMethodStackOffset = 0; static constexpr Register kRuntimeParameterCoreRegisters[] = { RDI, RSI, RDX }; @@ -46,7 +45,10 @@ static constexpr size_t kRuntimeParameterCoreRegistersLength = static constexpr FloatRegister kRuntimeParameterFpuRegisters[] = { XMM0, XMM1 }; static constexpr size_t kRuntimeParameterFpuRegistersLength = arraysize(kRuntimeParameterFpuRegisters); -static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 }; +static constexpr Register kFakeReturnRegister = Register(16); +static constexpr Register kCoreCalleeSaves[] = + { RBX, RBP, R12, R13, R14, R15, kFakeReturnRegister }; +static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 }; static constexpr int kC2ConditionMask = 0x400; @@ -128,22 +130,6 @@ class DivRemMinusOneSlowPathX86_64 : public SlowPathCodeX86_64 { DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64); }; -class StackOverflowCheckSlowPathX86_64 : public SlowPathCodeX86_64 { - public: - StackOverflowCheckSlowPathX86_64() {} - - virtual void EmitNativeCode(CodeGenerator* codegen) OVERRIDE { - __ Bind(GetEntryLabel()); - __ addq(CpuRegister(RSP), - Immediate(codegen->GetFrameSize() - kNumberOfPushedRegistersAtEntry * kX86_64WordSize)); - __ gs()->jmp( - Address::Absolute(QUICK_ENTRYPOINT_OFFSET(kX86_64WordSize, pThrowStackOverflow), true)); - } - - private: - DISALLOW_COPY_AND_ASSIGN(StackOverflowCheckSlowPathX86_64); -}; - class SuspendCheckSlowPathX86_64 : public SlowPathCodeX86_64 { public: explicit SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor) @@ -417,26 +403,32 @@ size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uin return kX86_64WordSize; } -static uint32_t ComputeCoreCalleeSaveMask() { +static uint32_t ComputeCalleeSaveMask(const int* registers, size_t length) { uint32_t mask = 0; - for (size_t i = 0, e = arraysize(kCoreCalleeSaves); i < e; ++i) { - mask |= (1 << kCoreCalleeSaves[i]); + for (size_t i = 0, e = length; i < e; ++i) { + mask |= (1 << registers[i]); } return mask; } +static constexpr int kNumberOfCpuRegisterPairs = 0; CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph, const CompilerOptions& compiler_options) : CodeGenerator(graph, kNumberOfCpuRegisters, kNumberOfFloatRegisters, - 0, - ComputeCoreCalleeSaveMask(), - 0, + kNumberOfCpuRegisterPairs, + ComputeCalleeSaveMask(reinterpret_cast<const int*>(kCoreCalleeSaves), + arraysize(kCoreCalleeSaves)), + ComputeCalleeSaveMask(reinterpret_cast<const int*>(kFpuCalleeSaves), + arraysize(kFpuCalleeSaves)), compiler_options), block_labels_(graph->GetArena(), 0), location_builder_(graph, this), instruction_visitor_(graph, this), - move_resolver_(graph->GetArena(), this) {} + move_resolver_(graph->GetArena(), this) { + // Use a fake return address register to mimic Quick. + AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister)); +} InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph, CodeGeneratorX86_64* codegen) @@ -470,12 +462,6 @@ Location CodeGeneratorX86_64::AllocateFreeRegister(Primitive::Type type) const { return Location(); } -size_t CodeGeneratorX86_64::FrameEntrySpillSize() const { - uint32_t mask = allocated_registers_.GetCoreRegisters() & core_callee_save_mask_; - return kNumberOfPushedRegistersAtEntry * kX86_64WordSize - + __builtin_popcount(mask) * kX86_64WordSize; -} - void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { // Stack register is always reserved. blocked_core_registers_[RSP] = true; @@ -487,57 +473,60 @@ void CodeGeneratorX86_64::SetupBlockedRegisters(bool is_baseline) const { for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { blocked_core_registers_[kCoreCalleeSaves[i]] = true; } + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + blocked_fpu_registers_[kFpuCalleeSaves[i]] = true; + } } - - // TODO: We currently don't use Quick's FP callee saved registers. - blocked_fpu_registers_[XMM12] = true; - blocked_fpu_registers_[XMM13] = true; - blocked_fpu_registers_[XMM14] = true; - blocked_fpu_registers_[XMM15] = true; } void CodeGeneratorX86_64::GenerateFrameEntry() { - // Create a fake register to mimic Quick. - static const int kFakeReturnRegister = 16; - core_spill_mask_ |= (1 << kFakeReturnRegister); - core_spill_mask_ |= (allocated_registers_.GetCoreRegisters() & core_callee_save_mask_); - bool skip_overflow_check = IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); - bool implicitStackOverflowChecks = GetCompilerOptions().GetImplicitStackOverflowChecks(); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - if (!skip_overflow_check && implicitStackOverflowChecks) { + if (!skip_overflow_check) { __ testq(CpuRegister(RAX), Address( CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64)))); RecordPcInfo(nullptr, 0); } for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { - if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) { - __ pushq(CpuRegister(kCoreCalleeSaves[i])); + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg) && reg != kFakeReturnRegister) { + __ pushq(CpuRegister(reg)); } } - __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize())); + __ subq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); + uint32_t xmm_spill_location = GetFpuSpillStart(); + size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); - if (!skip_overflow_check && !implicitStackOverflowChecks) { - SlowPathCodeX86_64* slow_path = new (GetGraph()->GetArena()) StackOverflowCheckSlowPathX86_64(); - AddSlowPath(slow_path); - - __ gs()->cmpq(CpuRegister(RSP), - Address::Absolute(Thread::StackEndOffset<kX86_64WordSize>(), true)); - __ j(kLess, slow_path->GetEntryLabel()); + for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { + if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { + __ movsd(Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i)), + XmmRegister(kFpuCalleeSaves[i])); + } } __ movl(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); } void CodeGeneratorX86_64::GenerateFrameExit() { - __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - FrameEntrySpillSize())); + uint32_t xmm_spill_location = GetFpuSpillStart(); + size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize(); + for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) { + if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { + __ movsd(XmmRegister(kFpuCalleeSaves[i]), + Address(CpuRegister(RSP), xmm_spill_location + (xmm_spill_slot_size * i))); + } + } + + __ addq(CpuRegister(RSP), Immediate(GetFrameSize() - GetCoreSpillSize())); for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) { - if (allocated_registers_.ContainsCoreRegister(kCoreCalleeSaves[i])) { - __ popq(CpuRegister(kCoreCalleeSaves[i])); + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg) && reg != kFakeReturnRegister) { + __ popq(CpuRegister(reg)); } } } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 645fb17b6c..1ac2ab76a4 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -194,8 +194,6 @@ class CodeGeneratorX86_64 : public CodeGenerator { return kX86_64WordSize; } - size_t FrameEntrySpillSize() const OVERRIDE; - HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; } diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 1e0d65a945..5bca73003e 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -286,7 +286,7 @@ CompiledMethod* OptimizingCompiler::CompileOptimized(HGraph* graph, ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), codegen->GetCoreSpillMask(), - 0, /* FPR spill mask, unused */ + codegen->GetFpuSpillMask(), ArrayRef<const uint8_t>(stack_map)); } @@ -313,7 +313,7 @@ CompiledMethod* OptimizingCompiler::CompileBaseline( ArrayRef<const uint8_t>(allocator.GetMemory()), codegen->GetFrameSize(), codegen->GetCoreSpillMask(), - 0, /* FPR spill mask, unused */ + codegen->GetFpuSpillMask(), &src_mapping_table, AlignVectorSize(mapping_table), AlignVectorSize(vmap_table), |