diff options
author | 2019-11-27 17:42:32 +0000 | |
---|---|---|
committer | 2019-12-03 14:32:09 +0000 | |
commit | a59af8aeaad8fe7d68d8f8de63eab9cf85b6ab31 (patch) | |
tree | 83195c74b135731cc4555254763a8f449691c1b0 /compiler | |
parent | 5c8cc64b5f1580faf510f27527e7e22987174963 (diff) |
JIT baseline: trigger optimized compilation on hotness threshold.
- Add a new hotness count in the ProfilingInfo to not conflict with
interpreter hotness which may use it for OSR.
- Add a baseline flag in the OatQuickMethodHeader to identify baseline
compiled methods.
- Add a -Xusetieredjit flag to experiment and test.
Bug: 119800099
Test: test.py with Xusetieredjit to true
Change-Id: I8512853f869f1312e3edc60bf64413dee9143c52
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/optimizing/code_generator.cc | 3 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.cc | 77 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm64.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.cc | 81 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_arm_vixl.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.cc | 133 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.cc | 135 | ||||
-rw-r--r-- | compiler/optimizing/code_generator_x86_64.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/inliner.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 8 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 6 | ||||
-rw-r--r-- | compiler/optimizing/stack_map_stream.cc | 5 | ||||
-rw-r--r-- | compiler/optimizing/stack_map_stream.h | 4 |
14 files changed, 315 insertions, 143 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index bef7169da1..8406ef5504 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -395,7 +395,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { GetStackMapStream()->BeginMethod(HasEmptyFrame() ? 0 : frame_size_, core_spill_mask_, fpu_spill_mask_, - GetGraph()->GetNumberOfVRegs()); + GetGraph()->GetNumberOfVRegs(), + GetGraph()->IsCompilingBaseline()); size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 47c62f9366..894c7a4c59 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1061,19 +1061,66 @@ void ParallelMoveResolverARM64::EmitMove(size_t index) { codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); } -void CodeGeneratorARM64::GenerateFrameEntry() { +void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { MacroAssembler* masm = GetVIXLAssembler(); - __ Bind(&frame_entry_label_); - if (GetCompilerOptions().CountHotnessInCompiledCode()) { UseScratchRegisterScope temps(masm); + Register counter = temps.AcquireX(); + Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX(); + if (!is_frame_entry) { + __ Ldr(method, MemOperand(sp, 0)); + } + __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(counter, counter, 1); + // Subtract one if the counter would overflow. + __ Sub(counter, counter, Operand(counter, LSR, 16)); + __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); + } + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + uint32_t address = reinterpret_cast32<uint32_t>(info); + vixl::aarch64::Label done; + UseScratchRegisterScope temps(masm); Register temp = temps.AcquireX(); - __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); - __ Add(temp, temp, 1); - // Subtract one if the counter would overflow. - __ Sub(temp, temp, Operand(temp, LSR, 16)); - __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + Register counter = temps.AcquireW(); + __ Mov(temp, address); + __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Add(counter, counter, 1); + __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Tst(counter, 0xffff); + __ B(ne, &done); + if (is_frame_entry) { + if (HasEmptyFrame()) { + // The entyrpoint expects the method at the bottom of the stack. We + // claim stack space necessary for alignment. + __ Claim(kStackAlignment); + __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0)); + } else if (!RequiresCurrentMethod()) { + __ Str(kArtMethodRegister, MemOperand(sp, 0)); + } + } else { + CHECK(RequiresCurrentMethod()); + } + uint32_t entrypoint_offset = + GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value(); + __ Ldr(lr, MemOperand(tr, entrypoint_offset)); + // Note: we don't record the call here (and therefore don't generate a stack + // map), as the entrypoint should never be suspended. + __ Blr(lr); + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + __ Ldr(lr, MemOperand(sp, 8)); + __ Drop(kStackAlignment); + } + __ Bind(&done); } +} + +void CodeGeneratorARM64::GenerateFrameEntry() { + MacroAssembler* masm = GetVIXLAssembler(); + __ Bind(&frame_entry_label_); bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); @@ -1136,7 +1183,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() { __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); } } - + MaybeIncrementHotness(/* is_frame_entry= */ true); MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } @@ -3177,17 +3224,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp1 = temps.AcquireX(); - Register temp2 = temps.AcquireX(); - __ Ldr(temp1, MemOperand(sp, 0)); - __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); - __ Add(temp2, temp2, 1); - // Subtract one if the counter would overflow. - __ Sub(temp2, temp2, Operand(temp2, LSR, 16)); - __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 253e91505d..6b2c80529b 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -787,6 +787,7 @@ class CodeGeneratorARM64 : public CodeGenerator { } void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass); + void MaybeIncrementHotness(bool is_frame_entry); private: // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 9100c6c547..49a608eee7 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2080,27 +2080,79 @@ void CodeGeneratorARMVIXL::ComputeSpillMask() { } } -void CodeGeneratorARMVIXL::GenerateFrameEntry() { - bool skip_overflow_check = - IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); - DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - __ Bind(&frame_entry_label_); - +void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong"); - // Load with sign extend to set the high bits for integer overflow check. + if (!is_frame_entry) { + __ Push(vixl32::Register(kMethodRegister)); + GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); + } + // Load with zero extend to clear the high bits for integer overflow check. __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); __ Add(temp, temp, 1); // Subtract one if the counter would overflow. __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16)); __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + if (!is_frame_entry) { + __ Pop(vixl32::Register(kMethodRegister)); + } } + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + uint32_t address = reinterpret_cast32<uint32_t>(info); + vixl::aarch32::Label done; + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + if (!is_frame_entry) { + __ Push(r4); // Will be used as temporary. For frame entry, r4 is always available. + } + __ Mov(r4, address); + __ Ldrh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Add(ip, ip, 1); + __ Strh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + if (!is_frame_entry) { + __ Pop(r4); + } + __ Lsls(ip, ip, 16); + __ B(ne, &done); + uint32_t entry_point_offset = + GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value(); + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + // For leaf methods, we need to spill lr and r0. Also spill r1 and r2 for + // alignment. + uint32_t core_spill_mask = + (1 << lr.GetCode()) | (1 << r0.GetCode()) | (1 << r1.GetCode()) | (1 << r2.GetCode()); + __ Push(RegisterList(core_spill_mask)); + __ Ldr(lr, MemOperand(tr, entry_point_offset)); + __ Blx(lr); + __ Pop(RegisterList(core_spill_mask)); + } else { + if (!RequiresCurrentMethod()) { + CHECK(is_frame_entry); + GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0); + } + __ Ldr(lr, MemOperand(tr, entry_point_offset)); + __ Blx(lr); + } + __ Bind(&done); + } +} + +void CodeGeneratorARMVIXL::GenerateFrameEntry() { + bool skip_overflow_check = + IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + __ Bind(&frame_entry_label_); + if (HasEmptyFrame()) { // Ensure that the CFI opcode list is not empty. GetAssembler()->cfi().Nop(); + MaybeIncrementHotness(/* is_frame_entry= */ true); return; } @@ -2201,6 +2253,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag()); } + MaybeIncrementHotness(/* is_frame_entry= */ true); MaybeGenerateMarkingRegisterCheck(/* code= */ 1); } @@ -2498,19 +2551,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Push(vixl32::Register(kMethodRegister)); - GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); - // Load with sign extend to set the high bits for integer overflow check. - __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); - __ Add(temp, temp, 1); - // Subtract one if the counter would overflow. - __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16)); - __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); - __ Pop(vixl32::Register(kMethodRegister)); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 3d4c231842..48fb0827d7 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -757,6 +757,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { } void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl32::Register klass); + void MaybeIncrementHotness(bool is_frame_entry); private: // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 54da87919c..3077be05ac 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1072,58 +1072,112 @@ static dwarf::Reg DWARFReg(Register reg) { return dwarf::Reg::X86Core(static_cast<int>(reg)); } -void CodeGeneratorX86::GenerateFrameEntry() { - __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address - __ Bind(&frame_entry_label_); - bool skip_overflow_check = - IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); - DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - +void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { + Register reg = EAX; + if (is_frame_entry) { + reg = kMethodRegisterArgument; + } else { + __ pushl(EAX); + __ movl(EAX, Address(ESP, kX86WordSize)); + } NearLabel overflow; - __ cmpw(Address(kMethodRegisterArgument, - ArtMethod::HotnessCountOffset().Int32Value()), + __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(ArtMethod::MaxCounter())); __ j(kEqual, &overflow); - __ addw(Address(kMethodRegisterArgument, - ArtMethod::HotnessCountOffset().Int32Value()), + __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1)); __ Bind(&overflow); + if (!is_frame_entry) { + __ popl(EAX); + } } + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + uint32_t address = reinterpret_cast32<uint32_t>(info); + NearLabel done; + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + // Alignment + __ subl(ESP, Immediate(8)); + __ cfi().AdjustCFAOffset(8); + // We need a temporary. The stub also expects the method at bottom of stack. + __ pushl(EAX); + __ cfi().AdjustCFAOffset(4); + __ movl(EAX, Immediate(address)); + __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), Immediate(1)); + __ j(kCarryClear, &done); + GenerateInvokeRuntime( + GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value()); + __ Bind(&done); + // We don't strictly require to restore EAX, but this makes the generated + // code easier to reason about. + __ popl(EAX); + __ cfi().AdjustCFAOffset(-4); + __ addl(ESP, Immediate(8)); + __ cfi().AdjustCFAOffset(-8); + } else { + if (!RequiresCurrentMethod()) { + CHECK(is_frame_entry); + __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); + } + // We need a temporary. + __ pushl(EAX); + __ cfi().AdjustCFAOffset(4); + __ movl(EAX, Immediate(address)); + __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), Immediate(1)); + __ popl(EAX); // Put stack as expected before exiting or calling stub. + __ cfi().AdjustCFAOffset(-4); + __ j(kCarryClear, &done); + GenerateInvokeRuntime( + GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value()); + __ Bind(&done); + } + } +} + +void CodeGeneratorX86::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address + __ Bind(&frame_entry_label_); + bool skip_overflow_check = + IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86); __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes))); RecordPcInfo(nullptr, 0); } - if (HasEmptyFrame()) { - return; - } + if (!HasEmptyFrame()) { + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ pushl(reg); + __ cfi().AdjustCFAOffset(kX86WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); + } + } - for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { - Register reg = kCoreCalleeSaves[i]; - if (allocated_registers_.ContainsCoreRegister(reg)) { - __ pushl(reg); - __ cfi().AdjustCFAOffset(kX86WordSize); - __ cfi().RelOffset(DWARFReg(reg), 0); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ subl(ESP, Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); } - } - int adjust = GetFrameSize() - FrameEntrySpillSize(); - __ subl(ESP, Immediate(adjust)); - __ cfi().AdjustCFAOffset(adjust); - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { - __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + } } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); - } + MaybeIncrementHotness(/* is_frame_entry= */ true); } void CodeGeneratorX86::GenerateFrameExit() { @@ -1391,18 +1445,7 @@ void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* suc HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - __ pushl(EAX); - __ movl(EAX, Address(ESP, kX86WordSize)); - NearLabel overflow; - __ cmpw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(ArtMethod::MaxCounter())); - __ j(kEqual, &overflow); - __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(1)); - __ Bind(&overflow); - __ popl(EAX); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index e305b50c8e..16446ce561 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -625,6 +625,7 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateExplicitNullCheck(HNullCheck* instruction) override; void MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass); + void MaybeIncrementHotness(bool is_frame_entry); // When we don't know the proper offset for the value, we use kDummy32BitOffset. // The correct value will be inserted when processing Assembler fixups. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 48a3d90f6f..dd3a4f4dc0 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1346,70 +1346,108 @@ static dwarf::Reg DWARFReg(FloatRegister reg) { return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); } -void CodeGeneratorX86_64::GenerateFrameEntry() { - __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address - __ Bind(&frame_entry_label_); - bool skip_overflow_check = IsLeafMethod() - && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); - DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - +void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { NearLabel overflow; - __ cmpw(Address(CpuRegister(kMethodRegisterArgument), - ArtMethod::HotnessCountOffset().Int32Value()), + Register method = kMethodRegisterArgument; + if (!is_frame_entry) { + CHECK(RequiresCurrentMethod()); + method = TMP; + __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset)); + } + __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()), Immediate(ArtMethod::MaxCounter())); __ j(kEqual, &overflow); - __ addw(Address(CpuRegister(kMethodRegisterArgument), - ArtMethod::HotnessCountOffset().Int32Value()), + __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1)); __ Bind(&overflow); } + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + uint64_t address = reinterpret_cast64<uint64_t>(info); + NearLabel done; + __ movq(CpuRegister(TMP), Immediate(address)); + __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), + Immediate(1)); + __ j(kCarryClear, &done); + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + // Frame alignment, and the stub expects the method on the stack. + __ pushq(CpuRegister(RDI)); + __ cfi().AdjustCFAOffset(kX86_64WordSize); + __ cfi().RelOffset(DWARFReg(RDI), 0); + } else if (!RequiresCurrentMethod()) { + CHECK(is_frame_entry); + __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); + } + GenerateInvokeRuntime( + GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value()); + if (HasEmptyFrame()) { + __ popq(CpuRegister(RDI)); + __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); + __ cfi().Restore(DWARFReg(RDI)); + } + __ Bind(&done); + } +} + +void CodeGeneratorX86_64::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address + __ Bind(&frame_entry_label_); + bool skip_overflow_check = IsLeafMethod() + && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + + if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64); __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes))); RecordPcInfo(nullptr, 0); } - if (HasEmptyFrame()) { - return; - } - - for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { - Register reg = kCoreCalleeSaves[i]; - if (allocated_registers_.ContainsCoreRegister(reg)) { - __ pushq(CpuRegister(reg)); - __ cfi().AdjustCFAOffset(kX86_64WordSize); - __ cfi().RelOffset(DWARFReg(reg), 0); + if (!HasEmptyFrame()) { + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ pushq(CpuRegister(reg)); + __ cfi().AdjustCFAOffset(kX86_64WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); + } } - } - int adjust = GetFrameSize() - GetCoreSpillSize(); - __ subq(CpuRegister(RSP), Immediate(adjust)); - __ cfi().AdjustCFAOffset(adjust); - uint32_t xmm_spill_location = GetFpuSpillStart(); - size_t xmm_spill_slot_size = GetCalleePreservedFPWidth(); + int adjust = GetFrameSize() - GetCoreSpillSize(); + __ subq(CpuRegister(RSP), Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); + uint32_t xmm_spill_location = GetFpuSpillStart(); + size_t xmm_spill_slot_size = GetCalleePreservedFPWidth(); + + for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { + if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { + int offset = xmm_spill_location + (xmm_spill_slot_size * i); + __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); + __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); + } + } - for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { - if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { - int offset = xmm_spill_location + (xmm_spill_slot_size * i); - __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); - __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + CHECK(!HasEmptyFrame()); + __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), + CpuRegister(kMethodRegisterArgument)); } - } - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { - __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), - CpuRegister(kMethodRegisterArgument)); + if (GetGraph()->HasShouldDeoptimizeFlag()) { + CHECK(!HasEmptyFrame()); + // Initialize should_deoptimize flag to 0. + __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + } } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); - } + MaybeIncrementHotness(/* is_frame_entry= */ true); } void CodeGeneratorX86_64::GenerateFrameExit() { @@ -1556,16 +1594,7 @@ void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0)); - NearLabel overflow; - __ cmpw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(ArtMethod::MaxCounter())); - __ j(kEqual, &overflow); - __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(1)); - __ Bind(&overflow); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 5537a4a4d9..2e8d9b3315 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -603,6 +603,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls); + void MaybeIncrementHotness(bool is_frame_entry); + // When we don't know the proper offset for the value, we use kDummy32BitOffset. // We will fix this up in the linker later to have the right value. static constexpr int32_t kDummy32BitOffset = 256; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index ff7ed349a3..ecaedc7cd8 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1813,6 +1813,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, graph_->IsDebuggable(), /* osr= */ false, /* is_shared_jit_code= */ graph_->IsCompilingForSharedJitCode(), + /* baseline= */ graph_->IsCompilingBaseline(), /* start_instruction_id= */ caller_instruction_counter); callee_graph->SetArtMethod(resolved_method); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 09ae6fab84..043338466f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -322,6 +322,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool debuggable = false, bool osr = false, bool is_shared_jit_code = false, + bool baseline = false, int start_instruction_id = 0) : allocator_(allocator), arena_stack_(arena_stack), @@ -358,6 +359,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { art_method_(nullptr), inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()), osr_(osr), + baseline_(baseline), cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)), is_shared_jit_code_(is_shared_jit_code) { blocks_.reserve(kDefaultNumberOfBlocks); @@ -589,6 +591,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool IsCompilingOsr() const { return osr_; } + bool IsCompilingBaseline() const { return baseline_; } + bool IsCompilingForSharedJitCode() const { return is_shared_jit_code_; } @@ -786,6 +790,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // compiled code entries which the interpreter can directly jump to. const bool osr_; + // Whether we are compiling baseline (not running optimizations). This affects + // the code being generated. + const bool baseline_; + // List of methods that are assumed to have single implementation. ArenaSet<ArtMethod*> cha_single_implementation_list_; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 3f11170c66..f8eae2fb70 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -851,7 +851,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, dead_reference_safe, compiler_options.GetDebuggable(), /* osr= */ osr, - /* is_shared_jit_code= */ is_shared_jit_code); + /* is_shared_jit_code= */ is_shared_jit_code, + /* baseline= */ baseline); if (method != nullptr) { graph->SetArtMethod(method); @@ -1174,7 +1175,8 @@ static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* alloca jni_compiled_method.GetFrameSize(), jni_compiled_method.GetCoreSpillMask(), jni_compiled_method.GetFpSpillMask(), - /* num_dex_registers= */ 0); + /* num_dex_registers= */ 0, + /* baseline= */ false); stack_map_stream->EndMethod(); return stack_map_stream->Encode(); } diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 3f6010d7af..dd6d1a2959 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -42,7 +42,8 @@ void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offs void StackMapStream::BeginMethod(size_t frame_size_in_bytes, size_t core_spill_mask, size_t fp_spill_mask, - uint32_t num_dex_registers) { + uint32_t num_dex_registers, + bool baseline) { DCHECK(!in_method_) << "Mismatched Begin/End calls"; in_method_ = true; DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called"; @@ -52,6 +53,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, core_spill_mask_ = core_spill_mask; fp_spill_mask_ = fp_spill_mask; num_dex_registers_ = num_dex_registers; + baseline_ = baseline; if (kVerifyStackMaps) { dchecks_.emplace_back([=](const CodeInfo& code_info) { @@ -299,6 +301,7 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls"; uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0; + flags |= baseline_ ? CodeInfo::kIsBaseline : 0; uint32_t bit_table_flags = 0; ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) { if (bit_table->size() != 0) { // Record which bit-tables are stored. diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index f45e3d720e..67f716ce70 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -61,7 +61,8 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { void BeginMethod(size_t frame_size_in_bytes, size_t core_spill_mask, size_t fp_spill_mask, - uint32_t num_dex_registers); + uint32_t num_dex_registers, + bool baseline = false); void EndMethod(); void BeginStackMapEntry(uint32_t dex_pc, @@ -119,6 +120,7 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { uint32_t core_spill_mask_ = 0; uint32_t fp_spill_mask_ = 0; uint32_t num_dex_registers_ = 0; + bool baseline_; BitTableBuilder<StackMap> stack_maps_; BitTableBuilder<RegisterMask> register_masks_; BitmapTableBuilder stack_masks_; |