diff options
34 files changed, 440 insertions, 154 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index bef7169da1..8406ef5504 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -395,7 +395,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) { GetStackMapStream()->BeginMethod(HasEmptyFrame() ? 0 : frame_size_, core_spill_mask_, fpu_spill_mask_, - GetGraph()->GetNumberOfVRegs()); + GetGraph()->GetNumberOfVRegs(), + GetGraph()->IsCompilingBaseline()); size_t frame_start = GetAssembler()->CodeSize(); GenerateFrameEntry(); diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc index 47c62f9366..894c7a4c59 100644 --- a/compiler/optimizing/code_generator_arm64.cc +++ b/compiler/optimizing/code_generator_arm64.cc @@ -1061,19 +1061,66 @@ void ParallelMoveResolverARM64::EmitMove(size_t index) { codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid); } -void CodeGeneratorARM64::GenerateFrameEntry() { +void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) { MacroAssembler* masm = GetVIXLAssembler(); - __ Bind(&frame_entry_label_); - if (GetCompilerOptions().CountHotnessInCompiledCode()) { UseScratchRegisterScope temps(masm); + Register counter = temps.AcquireX(); + Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX(); + if (!is_frame_entry) { + __ Ldr(method, MemOperand(sp, 0)); + } + __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); + __ Add(counter, counter, 1); + // Subtract one if the counter would overflow. + __ Sub(counter, counter, Operand(counter, LSR, 16)); + __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value())); + } + + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + uint32_t address = reinterpret_cast32<uint32_t>(info); + vixl::aarch64::Label done; + UseScratchRegisterScope temps(masm); Register temp = temps.AcquireX(); - __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); - __ Add(temp, temp, 1); - // Subtract one if the counter would overflow. - __ Sub(temp, temp, Operand(temp, LSR, 16)); - __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + Register counter = temps.AcquireW(); + __ Mov(temp, address); + __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Add(counter, counter, 1); + __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Tst(counter, 0xffff); + __ B(ne, &done); + if (is_frame_entry) { + if (HasEmptyFrame()) { + // The entyrpoint expects the method at the bottom of the stack. We + // claim stack space necessary for alignment. + __ Claim(kStackAlignment); + __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0)); + } else if (!RequiresCurrentMethod()) { + __ Str(kArtMethodRegister, MemOperand(sp, 0)); + } + } else { + CHECK(RequiresCurrentMethod()); + } + uint32_t entrypoint_offset = + GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value(); + __ Ldr(lr, MemOperand(tr, entrypoint_offset)); + // Note: we don't record the call here (and therefore don't generate a stack + // map), as the entrypoint should never be suspended. + __ Blr(lr); + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + __ Ldr(lr, MemOperand(sp, 8)); + __ Drop(kStackAlignment); + } + __ Bind(&done); } +} + +void CodeGeneratorARM64::GenerateFrameEntry() { + MacroAssembler* masm = GetVIXLAssembler(); + __ Bind(&frame_entry_label_); bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod(); @@ -1136,7 +1183,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() { __ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag())); } } - + MaybeIncrementHotness(/* is_frame_entry= */ true); MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__); } @@ -3177,17 +3224,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - Register temp1 = temps.AcquireX(); - Register temp2 = temps.AcquireX(); - __ Ldr(temp1, MemOperand(sp, 0)); - __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); - __ Add(temp2, temp2, 1); - // Subtract one if the counter would overflow. - __ Sub(temp2, temp2, Operand(temp2, LSR, 16)); - __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value())); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h index 253e91505d..6b2c80529b 100644 --- a/compiler/optimizing/code_generator_arm64.h +++ b/compiler/optimizing/code_generator_arm64.h @@ -787,6 +787,7 @@ class CodeGeneratorARM64 : public CodeGenerator { } void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass); + void MaybeIncrementHotness(bool is_frame_entry); private: // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc index 9100c6c547..49a608eee7 100644 --- a/compiler/optimizing/code_generator_arm_vixl.cc +++ b/compiler/optimizing/code_generator_arm_vixl.cc @@ -2080,27 +2080,79 @@ void CodeGeneratorARMVIXL::ComputeSpillMask() { } } -void CodeGeneratorARMVIXL::GenerateFrameEntry() { - bool skip_overflow_check = - IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); - DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - __ Bind(&frame_entry_label_); - +void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { UseScratchRegisterScope temps(GetVIXLAssembler()); vixl32::Register temp = temps.Acquire(); static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong"); - // Load with sign extend to set the high bits for integer overflow check. + if (!is_frame_entry) { + __ Push(vixl32::Register(kMethodRegister)); + GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); + } + // Load with zero extend to clear the high bits for integer overflow check. __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); __ Add(temp, temp, 1); // Subtract one if the counter would overflow. __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16)); __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); + if (!is_frame_entry) { + __ Pop(vixl32::Register(kMethodRegister)); + } } + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + uint32_t address = reinterpret_cast32<uint32_t>(info); + vixl::aarch32::Label done; + UseScratchRegisterScope temps(GetVIXLAssembler()); + temps.Exclude(ip); + if (!is_frame_entry) { + __ Push(r4); // Will be used as temporary. For frame entry, r4 is always available. + } + __ Mov(r4, address); + __ Ldrh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + __ Add(ip, ip, 1); + __ Strh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value())); + if (!is_frame_entry) { + __ Pop(r4); + } + __ Lsls(ip, ip, 16); + __ B(ne, &done); + uint32_t entry_point_offset = + GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value(); + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + // For leaf methods, we need to spill lr and r0. Also spill r1 and r2 for + // alignment. + uint32_t core_spill_mask = + (1 << lr.GetCode()) | (1 << r0.GetCode()) | (1 << r1.GetCode()) | (1 << r2.GetCode()); + __ Push(RegisterList(core_spill_mask)); + __ Ldr(lr, MemOperand(tr, entry_point_offset)); + __ Blx(lr); + __ Pop(RegisterList(core_spill_mask)); + } else { + if (!RequiresCurrentMethod()) { + CHECK(is_frame_entry); + GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0); + } + __ Ldr(lr, MemOperand(tr, entry_point_offset)); + __ Blx(lr); + } + __ Bind(&done); + } +} + +void CodeGeneratorARMVIXL::GenerateFrameEntry() { + bool skip_overflow_check = + IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + __ Bind(&frame_entry_label_); + if (HasEmptyFrame()) { // Ensure that the CFI opcode list is not empty. GetAssembler()->cfi().Nop(); + MaybeIncrementHotness(/* is_frame_entry= */ true); return; } @@ -2201,6 +2253,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() { GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag()); } + MaybeIncrementHotness(/* is_frame_entry= */ true); MaybeGenerateMarkingRegisterCheck(/* code= */ 1); } @@ -2498,19 +2551,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - UseScratchRegisterScope temps(GetVIXLAssembler()); - vixl32::Register temp = temps.Acquire(); - __ Push(vixl32::Register(kMethodRegister)); - GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize); - // Load with sign extend to set the high bits for integer overflow check. - __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); - __ Add(temp, temp, 1); - // Subtract one if the counter would overflow. - __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16)); - __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value())); - __ Pop(vixl32::Register(kMethodRegister)); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h index 3d4c231842..48fb0827d7 100644 --- a/compiler/optimizing/code_generator_arm_vixl.h +++ b/compiler/optimizing/code_generator_arm_vixl.h @@ -757,6 +757,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator { } void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl32::Register klass); + void MaybeIncrementHotness(bool is_frame_entry); private: // Encoding of thunk type and data for link-time generated thunks for Baker read barriers. diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc index 54da87919c..3077be05ac 100644 --- a/compiler/optimizing/code_generator_x86.cc +++ b/compiler/optimizing/code_generator_x86.cc @@ -1072,58 +1072,112 @@ static dwarf::Reg DWARFReg(Register reg) { return dwarf::Reg::X86Core(static_cast<int>(reg)); } -void CodeGeneratorX86::GenerateFrameEntry() { - __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address - __ Bind(&frame_entry_label_); - bool skip_overflow_check = - IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); - DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - +void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { + Register reg = EAX; + if (is_frame_entry) { + reg = kMethodRegisterArgument; + } else { + __ pushl(EAX); + __ movl(EAX, Address(ESP, kX86WordSize)); + } NearLabel overflow; - __ cmpw(Address(kMethodRegisterArgument, - ArtMethod::HotnessCountOffset().Int32Value()), + __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(ArtMethod::MaxCounter())); __ j(kEqual, &overflow); - __ addw(Address(kMethodRegisterArgument, - ArtMethod::HotnessCountOffset().Int32Value()), + __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1)); __ Bind(&overflow); + if (!is_frame_entry) { + __ popl(EAX); + } } + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + uint32_t address = reinterpret_cast32<uint32_t>(info); + NearLabel done; + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + // Alignment + __ subl(ESP, Immediate(8)); + __ cfi().AdjustCFAOffset(8); + // We need a temporary. The stub also expects the method at bottom of stack. + __ pushl(EAX); + __ cfi().AdjustCFAOffset(4); + __ movl(EAX, Immediate(address)); + __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), Immediate(1)); + __ j(kCarryClear, &done); + GenerateInvokeRuntime( + GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value()); + __ Bind(&done); + // We don't strictly require to restore EAX, but this makes the generated + // code easier to reason about. + __ popl(EAX); + __ cfi().AdjustCFAOffset(-4); + __ addl(ESP, Immediate(8)); + __ cfi().AdjustCFAOffset(-8); + } else { + if (!RequiresCurrentMethod()) { + CHECK(is_frame_entry); + __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); + } + // We need a temporary. + __ pushl(EAX); + __ cfi().AdjustCFAOffset(4); + __ movl(EAX, Immediate(address)); + __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), Immediate(1)); + __ popl(EAX); // Put stack as expected before exiting or calling stub. + __ cfi().AdjustCFAOffset(-4); + __ j(kCarryClear, &done); + GenerateInvokeRuntime( + GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value()); + __ Bind(&done); + } + } +} + +void CodeGeneratorX86::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address + __ Bind(&frame_entry_label_); + bool skip_overflow_check = + IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86); __ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes))); RecordPcInfo(nullptr, 0); } - if (HasEmptyFrame()) { - return; - } + if (!HasEmptyFrame()) { + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ pushl(reg); + __ cfi().AdjustCFAOffset(kX86WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); + } + } - for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { - Register reg = kCoreCalleeSaves[i]; - if (allocated_registers_.ContainsCoreRegister(reg)) { - __ pushl(reg); - __ cfi().AdjustCFAOffset(kX86WordSize); - __ cfi().RelOffset(DWARFReg(reg), 0); + int adjust = GetFrameSize() - FrameEntrySpillSize(); + __ subl(ESP, Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); } - } - int adjust = GetFrameSize() - FrameEntrySpillSize(); - __ subl(ESP, Immediate(adjust)); - __ cfi().AdjustCFAOffset(adjust); - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { - __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument); + if (GetGraph()->HasShouldDeoptimizeFlag()) { + // Initialize should_deoptimize flag to 0. + __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + } } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); - } + MaybeIncrementHotness(/* is_frame_entry= */ true); } void CodeGeneratorX86::GenerateFrameExit() { @@ -1391,18 +1445,7 @@ void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* suc HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - __ pushl(EAX); - __ movl(EAX, Address(ESP, kX86WordSize)); - NearLabel overflow; - __ cmpw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(ArtMethod::MaxCounter())); - __ j(kEqual, &overflow); - __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(1)); - __ Bind(&overflow); - __ popl(EAX); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h index e305b50c8e..16446ce561 100644 --- a/compiler/optimizing/code_generator_x86.h +++ b/compiler/optimizing/code_generator_x86.h @@ -625,6 +625,7 @@ class CodeGeneratorX86 : public CodeGenerator { void GenerateExplicitNullCheck(HNullCheck* instruction) override; void MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass); + void MaybeIncrementHotness(bool is_frame_entry); // When we don't know the proper offset for the value, we use kDummy32BitOffset. // The correct value will be inserted when processing Assembler fixups. diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc index 48a3d90f6f..dd3a4f4dc0 100644 --- a/compiler/optimizing/code_generator_x86_64.cc +++ b/compiler/optimizing/code_generator_x86_64.cc @@ -1346,70 +1346,108 @@ static dwarf::Reg DWARFReg(FloatRegister reg) { return dwarf::Reg::X86_64Fp(static_cast<int>(reg)); } -void CodeGeneratorX86_64::GenerateFrameEntry() { - __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address - __ Bind(&frame_entry_label_); - bool skip_overflow_check = IsLeafMethod() - && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); - DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); - +void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) { if (GetCompilerOptions().CountHotnessInCompiledCode()) { NearLabel overflow; - __ cmpw(Address(CpuRegister(kMethodRegisterArgument), - ArtMethod::HotnessCountOffset().Int32Value()), + Register method = kMethodRegisterArgument; + if (!is_frame_entry) { + CHECK(RequiresCurrentMethod()); + method = TMP; + __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset)); + } + __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()), Immediate(ArtMethod::MaxCounter())); __ j(kEqual, &overflow); - __ addw(Address(CpuRegister(kMethodRegisterArgument), - ArtMethod::HotnessCountOffset().Int32Value()), + __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()), Immediate(1)); __ Bind(&overflow); } + if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) { + ScopedObjectAccess soa(Thread::Current()); + ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize); + uint64_t address = reinterpret_cast64<uint64_t>(info); + NearLabel done; + __ movq(CpuRegister(TMP), Immediate(address)); + __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), + Immediate(1)); + __ j(kCarryClear, &done); + if (HasEmptyFrame()) { + CHECK(is_frame_entry); + // Frame alignment, and the stub expects the method on the stack. + __ pushq(CpuRegister(RDI)); + __ cfi().AdjustCFAOffset(kX86_64WordSize); + __ cfi().RelOffset(DWARFReg(RDI), 0); + } else if (!RequiresCurrentMethod()) { + CHECK(is_frame_entry); + __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI)); + } + GenerateInvokeRuntime( + GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value()); + if (HasEmptyFrame()) { + __ popq(CpuRegister(RDI)); + __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize)); + __ cfi().Restore(DWARFReg(RDI)); + } + __ Bind(&done); + } +} + +void CodeGeneratorX86_64::GenerateFrameEntry() { + __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address + __ Bind(&frame_entry_label_); + bool skip_overflow_check = IsLeafMethod() + && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64); + DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks()); + + if (!skip_overflow_check) { size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64); __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes))); RecordPcInfo(nullptr, 0); } - if (HasEmptyFrame()) { - return; - } - - for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { - Register reg = kCoreCalleeSaves[i]; - if (allocated_registers_.ContainsCoreRegister(reg)) { - __ pushq(CpuRegister(reg)); - __ cfi().AdjustCFAOffset(kX86_64WordSize); - __ cfi().RelOffset(DWARFReg(reg), 0); + if (!HasEmptyFrame()) { + for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) { + Register reg = kCoreCalleeSaves[i]; + if (allocated_registers_.ContainsCoreRegister(reg)) { + __ pushq(CpuRegister(reg)); + __ cfi().AdjustCFAOffset(kX86_64WordSize); + __ cfi().RelOffset(DWARFReg(reg), 0); + } } - } - int adjust = GetFrameSize() - GetCoreSpillSize(); - __ subq(CpuRegister(RSP), Immediate(adjust)); - __ cfi().AdjustCFAOffset(adjust); - uint32_t xmm_spill_location = GetFpuSpillStart(); - size_t xmm_spill_slot_size = GetCalleePreservedFPWidth(); + int adjust = GetFrameSize() - GetCoreSpillSize(); + __ subq(CpuRegister(RSP), Immediate(adjust)); + __ cfi().AdjustCFAOffset(adjust); + uint32_t xmm_spill_location = GetFpuSpillStart(); + size_t xmm_spill_slot_size = GetCalleePreservedFPWidth(); + + for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { + if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { + int offset = xmm_spill_location + (xmm_spill_slot_size * i); + __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); + __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); + } + } - for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) { - if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) { - int offset = xmm_spill_location + (xmm_spill_slot_size * i); - __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i])); - __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset); + // Save the current method if we need it. Note that we do not + // do this in HCurrentMethod, as the instruction might have been removed + // in the SSA graph. + if (RequiresCurrentMethod()) { + CHECK(!HasEmptyFrame()); + __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), + CpuRegister(kMethodRegisterArgument)); } - } - // Save the current method if we need it. Note that we do not - // do this in HCurrentMethod, as the instruction might have been removed - // in the SSA graph. - if (RequiresCurrentMethod()) { - __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), - CpuRegister(kMethodRegisterArgument)); + if (GetGraph()->HasShouldDeoptimizeFlag()) { + CHECK(!HasEmptyFrame()); + // Initialize should_deoptimize flag to 0. + __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); + } } - if (GetGraph()->HasShouldDeoptimizeFlag()) { - // Initialize should_deoptimize flag to 0. - __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0)); - } + MaybeIncrementHotness(/* is_frame_entry= */ true); } void CodeGeneratorX86_64::GenerateFrameExit() { @@ -1556,16 +1594,7 @@ void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* HLoopInformation* info = block->GetLoopInformation(); if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) { - if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) { - __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0)); - NearLabel overflow; - __ cmpw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(ArtMethod::MaxCounter())); - __ j(kEqual, &overflow); - __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()), - Immediate(1)); - __ Bind(&overflow); - } + codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false); GenerateSuspendCheck(info->GetSuspendCheck(), successor); return; } diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h index 5537a4a4d9..2e8d9b3315 100644 --- a/compiler/optimizing/code_generator_x86_64.h +++ b/compiler/optimizing/code_generator_x86_64.h @@ -603,6 +603,8 @@ class CodeGeneratorX86_64 : public CodeGenerator { void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls); + void MaybeIncrementHotness(bool is_frame_entry); + // When we don't know the proper offset for the value, we use kDummy32BitOffset. // We will fix this up in the linker later to have the right value. static constexpr int32_t kDummy32BitOffset = 256; diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index ff7ed349a3..ecaedc7cd8 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -1813,6 +1813,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction, graph_->IsDebuggable(), /* osr= */ false, /* is_shared_jit_code= */ graph_->IsCompilingForSharedJitCode(), + /* baseline= */ graph_->IsCompilingBaseline(), /* start_instruction_id= */ caller_instruction_counter); callee_graph->SetArtMethod(resolved_method); diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index 09ae6fab84..043338466f 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -322,6 +322,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool debuggable = false, bool osr = false, bool is_shared_jit_code = false, + bool baseline = false, int start_instruction_id = 0) : allocator_(allocator), arena_stack_(arena_stack), @@ -358,6 +359,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { art_method_(nullptr), inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()), osr_(osr), + baseline_(baseline), cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)), is_shared_jit_code_(is_shared_jit_code) { blocks_.reserve(kDefaultNumberOfBlocks); @@ -589,6 +591,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { bool IsCompilingOsr() const { return osr_; } + bool IsCompilingBaseline() const { return baseline_; } + bool IsCompilingForSharedJitCode() const { return is_shared_jit_code_; } @@ -786,6 +790,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // compiled code entries which the interpreter can directly jump to. const bool osr_; + // Whether we are compiling baseline (not running optimizations). This affects + // the code being generated. + const bool baseline_; + // List of methods that are assumed to have single implementation. ArenaSet<ArtMethod*> cha_single_implementation_list_; diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index 3f11170c66..f8eae2fb70 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -851,7 +851,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator, dead_reference_safe, compiler_options.GetDebuggable(), /* osr= */ osr, - /* is_shared_jit_code= */ is_shared_jit_code); + /* is_shared_jit_code= */ is_shared_jit_code, + /* baseline= */ baseline); if (method != nullptr) { graph->SetArtMethod(method); @@ -1174,7 +1175,8 @@ static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* alloca jni_compiled_method.GetFrameSize(), jni_compiled_method.GetCoreSpillMask(), jni_compiled_method.GetFpSpillMask(), - /* num_dex_registers= */ 0); + /* num_dex_registers= */ 0, + /* baseline= */ false); stack_map_stream->EndMethod(); return stack_map_stream->Encode(); } diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc index 3f6010d7af..dd6d1a2959 100644 --- a/compiler/optimizing/stack_map_stream.cc +++ b/compiler/optimizing/stack_map_stream.cc @@ -42,7 +42,8 @@ void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offs void StackMapStream::BeginMethod(size_t frame_size_in_bytes, size_t core_spill_mask, size_t fp_spill_mask, - uint32_t num_dex_registers) { + uint32_t num_dex_registers, + bool baseline) { DCHECK(!in_method_) << "Mismatched Begin/End calls"; in_method_ = true; DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called"; @@ -52,6 +53,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes, core_spill_mask_ = core_spill_mask; fp_spill_mask_ = fp_spill_mask; num_dex_registers_ = num_dex_registers; + baseline_ = baseline; if (kVerifyStackMaps) { dchecks_.emplace_back([=](const CodeInfo& code_info) { @@ -299,6 +301,7 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() { DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls"; uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0; + flags |= baseline_ ? CodeInfo::kIsBaseline : 0; uint32_t bit_table_flags = 0; ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) { if (bit_table->size() != 0) { // Record which bit-tables are stored. diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h index f45e3d720e..67f716ce70 100644 --- a/compiler/optimizing/stack_map_stream.h +++ b/compiler/optimizing/stack_map_stream.h @@ -61,7 +61,8 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { void BeginMethod(size_t frame_size_in_bytes, size_t core_spill_mask, size_t fp_spill_mask, - uint32_t num_dex_registers); + uint32_t num_dex_registers, + bool baseline = false); void EndMethod(); void BeginStackMapEntry(uint32_t dex_pc, @@ -119,6 +120,7 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> { uint32_t core_spill_mask_ = 0; uint32_t fp_spill_mask_ = 0; uint32_t num_dex_registers_ = 0; + bool baseline_; BitTableBuilder<StackMap> stack_maps_; BitTableBuilder<RegisterMask> register_masks_; BitmapTableBuilder stack_masks_; diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc index 319e3590a5..0a43bfc4c7 100644 --- a/dex2oat/linker/oat_writer_test.cc +++ b/dex2oat/linker/oat_writer_test.cc @@ -471,7 +471,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) { EXPECT_EQ(56U, sizeof(OatHeader)); EXPECT_EQ(4U, sizeof(OatMethodOffsets)); EXPECT_EQ(8U, sizeof(OatQuickMethodHeader)); - EXPECT_EQ(168 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), + EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)), sizeof(QuickEntryPoints)); } diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index bad37bcab2..8b1fc9e91c 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -2803,3 +2803,15 @@ ENTRY art_quick_update_inline_cache .Ldone: blx lr END art_quick_update_inline_cache + +// On entry, method is at the bottom of the stack. +ENTRY art_quick_compile_optimized + SETUP_SAVE_EVERYTHING_FRAME r0 + ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod + mov r1, rSELF @ pass Thread::Current + bl artCompileOptimized @ (ArtMethod*, Thread*) + RESTORE_SAVE_EVERYTHING_FRAME + // We don't need to restore the marking register here, as + // artCompileOptimized doesn't allow thread suspension. + blx lr +END art_quick_compile_optimized diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 6e9b533663..e0094e6f3d 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -2927,3 +2927,15 @@ ENTRY art_quick_update_inline_cache .Ldone: ret END art_quick_update_inline_cache + +// On entry, method is at the bottom of the stack. +ENTRY art_quick_compile_optimized + SETUP_SAVE_EVERYTHING_FRAME + ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod + mov x1, xSELF // pass Thread::Current + bl artCompileOptimized // (ArtMethod*, Thread*) + RESTORE_SAVE_EVERYTHING_FRAME + // We don't need to restore the marking register here, as + // artCompileOptimized doesn't allow thread suspension. + ret +END art_quick_compile_optimized diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index 2bf82d0b6c..794ee89848 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -2570,3 +2570,20 @@ END_FUNCTION art_quick_update_inline_cache // TODO: implement these! UNIMPLEMENTED art_quick_memcmp16 + +// On entry, the method is at the bottom of the stack. +DEFINE_FUNCTION art_quick_compile_optimized + SETUP_SAVE_EVERYTHING_FRAME ebx, ebx + mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %eax // Fetch ArtMethod + sub LITERAL(8), %esp // Alignment padding + CFI_ADJUST_CFA_OFFSET(8) + pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current() + CFI_ADJUST_CFA_OFFSET(4) + pushl %eax + CFI_ADJUST_CFA_OFFSET(4) + call SYMBOL(artCompileOptimized) // (ArtMethod*, Thread*) + addl LITERAL(16), %esp // Pop arguments. + CFI_ADJUST_CFA_OFFSET(-16) + RESTORE_SAVE_EVERYTHING_FRAME + ret +END_FUNCTION art_quick_compile_optimized diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 22d0ce4edb..3b30c37309 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -2511,3 +2511,13 @@ DEFINE_FUNCTION art_quick_update_inline_cache .Ldone: ret END_FUNCTION art_quick_update_inline_cache + +// On entry, method is at the bottom of the stack. +DEFINE_FUNCTION art_quick_compile_optimized + SETUP_SAVE_EVERYTHING_FRAME + movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod + movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() + call SYMBOL(artCompileOptimized) // (ArtMethod*, Thread*) + RESTORE_SAVE_EVERYTHING_FRAME // restore frame up to return address + ret +END_FUNCTION art_quick_compile_optimized diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h index d41f9a0a2f..048deb4803 100644 --- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h +++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h @@ -125,8 +125,9 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp // StringBuilder append qpoints->pStringBuilderAppend = art_quick_string_builder_append; - // InlineCache update + // Tiered JIT support qpoints->pUpdateInlineCache = art_quick_update_inline_cache; + qpoints->pCompileOptimized = art_quick_compile_optimized; } } // namespace art diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h index efab7c25a5..e031b21fb3 100644 --- a/runtime/entrypoints/quick/quick_entrypoints_list.h +++ b/runtime/entrypoints/quick/quick_entrypoints_list.h @@ -172,6 +172,7 @@ V(StringBuilderAppend, void*, uint32_t) \ \ V(UpdateInlineCache, void, void) \ + V(CompileOptimized, void, ArtMethod*, Thread*) \ \ V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \ V(ReadBarrierMarkReg00, mirror::Object*, mirror::Object*) \ diff --git a/runtime/entrypoints/quick/quick_thread_entrypoints.cc b/runtime/entrypoints/quick/quick_thread_entrypoints.cc index 0838059714..64be926853 100644 --- a/runtime/entrypoints/quick/quick_thread_entrypoints.cc +++ b/runtime/entrypoints/quick/quick_thread_entrypoints.cc @@ -15,6 +15,8 @@ */ #include "callee_save_frame.h" +#include "jit/jit.h" +#include "runtime.h" #include "thread-inl.h" namespace art { @@ -25,4 +27,11 @@ extern "C" void artTestSuspendFromCode(Thread* self) REQUIRES_SHARED(Locks::muta self->CheckSuspend(); } +extern "C" void artCompileOptimized(ArtMethod* method, Thread* self) + REQUIRES_SHARED(Locks::mutator_lock_) { + ScopedQuickEntrypointChecks sqec(self); + ScopedAssertNoThreadSuspension sants("Enqueuing optimized compilation"); + Runtime::Current()->GetJit()->EnqueueOptimizedCompilation(method, self); +} + } // namespace art diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h index 3f4e91ed91..740629a400 100644 --- a/runtime/entrypoints/runtime_asm_entrypoints.h +++ b/runtime/entrypoints/runtime_asm_entrypoints.h @@ -88,6 +88,7 @@ static inline const void* GetQuickInstrumentationExitPc() { } extern "C" void* art_quick_string_builder_append(uint32_t format); +extern "C" void art_quick_compile_optimized(ArtMethod*, Thread*); } // namespace art diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc index 210d85162e..36f5b398e8 100644 --- a/runtime/entrypoints_order_test.cc +++ b/runtime/entrypoints_order_test.cc @@ -338,7 +338,9 @@ class EntrypointsOrderTest : public CommonRuntimeTest { sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pStringBuilderAppend, pUpdateInlineCache, sizeof(void*)); - EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUpdateInlineCache, pReadBarrierJni, + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUpdateInlineCache, pCompileOptimized, + sizeof(void*)); + EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCompileOptimized, pReadBarrierJni, sizeof(void*)); EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMarkReg00, sizeof(void*)); diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index 8c7d657918..ffcee4b8d0 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -89,6 +89,8 @@ JitCompilerInterface* (*Jit::jit_load_)(void) = nullptr; JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) { auto* jit_options = new JitOptions; jit_options->use_jit_compilation_ = options.GetOrDefault(RuntimeArgumentMap::UseJitCompilation); + jit_options->use_tiered_jit_compilation_ = + options.GetOrDefault(RuntimeArgumentMap::UseTieredJitCompilation); jit_options->code_cache_initial_capacity_ = options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity); @@ -318,13 +320,14 @@ bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool baseline, bool osr // If we get a request to compile a proxy method, we pass the actual Java method // of that proxy method, as the compiler does not expect a proxy method. ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(kRuntimePointerSize); - if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr, prejit, region)) { + if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr, prejit, baseline, region)) { return false; } VLOG(jit) << "Compiling method " << ArtMethod::PrettyMethod(method_to_compile) - << " osr=" << std::boolalpha << osr; + << " osr=" << std::boolalpha << osr + << " baseline=" << std::boolalpha << baseline; bool success = jit_compiler_->CompileMethod(self, region, method_to_compile, baseline, osr); code_cache_->DoneCompiling(method_to_compile, self, osr); if (!success) { @@ -1449,7 +1452,10 @@ bool Jit::MaybeCompileMethod(Thread* self, if (old_count < HotMethodThreshold() && new_count >= HotMethodThreshold()) { if (!code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) { DCHECK(thread_pool_ != nullptr); - thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompile)); + JitCompileTask::TaskKind kind = options_->UseTieredJitCompilation() + ? JitCompileTask::TaskKind::kCompileBaseline + : JitCompileTask::TaskKind::kCompile; + thread_pool_->AddTask(self, new JitCompileTask(method, kind)); } } if (old_count < OSRMethodThreshold() && new_count >= OSRMethodThreshold()) { @@ -1467,6 +1473,11 @@ bool Jit::MaybeCompileMethod(Thread* self, return true; } +void Jit::EnqueueOptimizedCompilation(ArtMethod* method, Thread* self) { + thread_pool_->AddTask( + self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompile)); +} + class ScopedSetRuntimeThread { public: explicit ScopedSetRuntimeThread(Thread* self) diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index e5b77c2c7e..42adf6ba7c 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -114,6 +114,10 @@ class JitOptions { return use_jit_compilation_; } + bool UseTieredJitCompilation() const { + return use_tiered_jit_compilation_; + } + void SetUseJitCompilation(bool b) { use_jit_compilation_ = b; } @@ -137,6 +141,7 @@ class JitOptions { static uint32_t RoundUpThreshold(uint32_t threshold); bool use_jit_compilation_; + bool use_tiered_jit_compilation_; size_t code_cache_initial_capacity_; size_t code_cache_max_capacity_; uint32_t compile_threshold_; @@ -383,6 +388,8 @@ class Jit { // class path methods. void NotifyZygoteCompilationDone(); + void EnqueueOptimizedCompilation(ArtMethod* method, Thread* self); + private: Jit(JitCodeCache* code_cache, JitOptions* options); diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index c02a699eed..519655d843 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -1581,9 +1581,19 @@ bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr, bool prejit, + bool baseline, JitMemoryRegion* region) { - if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) { - return false; + const void* existing_entry_point = method->GetEntryPointFromQuickCompiledCode(); + if (!osr && ContainsPc(existing_entry_point)) { + OatQuickMethodHeader* method_header = + OatQuickMethodHeader::FromEntryPoint(existing_entry_point); + if (CodeInfo::IsBaseline(method_header->GetOptimizedCodeInfoPtr()) == baseline) { + VLOG(jit) << "Not compiling " + << method->PrettyMethod() + << " because it has already been compiled" + << " baseline=" << std::boolalpha << baseline; + return false; + } } if (NeedsClinitCheckBeforeCall(method) && !prejit) { diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index 58cf0e36a7..61fee34a6e 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -184,6 +184,7 @@ class JitCodeCache { Thread* self, bool osr, bool prejit, + bool baseline, JitMemoryRegion* region) REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::jit_lock_); diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc index 2cb569c61a..8c88760158 100644 --- a/runtime/jit/profiling_info.cc +++ b/runtime/jit/profiling_info.cc @@ -26,7 +26,8 @@ namespace art { ProfilingInfo::ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries) - : method_(method), + : baseline_hotness_count_(0), + method_(method), saved_entry_point_(nullptr), number_of_inline_caches_(entries.size()), current_inline_uses_(0), diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h index d4dc49867b..ada103658a 100644 --- a/runtime/jit/profiling_info.h +++ b/runtime/jit/profiling_info.h @@ -126,9 +126,18 @@ class ProfilingInfo { (current_inline_uses_ > 0); } + static constexpr MemberOffset BaselineHotnessCountOffset() { + return MemberOffset(OFFSETOF_MEMBER(ProfilingInfo, baseline_hotness_count_)); + } + private: ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries); + // Hotness count for methods compiled with the JIT baseline compiler. Once + // a threshold is hit (currentily the maximum value of uint16_t), we will + // JIT compile optimized the method. + uint16_t baseline_hotness_count_; + // Method this profiling info is for. // Not 'const' as JVMTI introduces obsolete methods that we implement by creating new ArtMethods. // See JitCodeCache::MoveObsoleteMethod. diff --git a/runtime/oat.h b/runtime/oat.h index 3b20ea1777..6c739b2380 100644 --- a/runtime/oat.h +++ b/runtime/oat.h @@ -32,8 +32,8 @@ class InstructionSetFeatures; class PACKED(4) OatHeader { public: static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } }; - // Last oat version changed reason: pUpdateInlineCache entrypoint. - static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '7', '\0' } }; + // Last oat version changed reason: pCompileOptimized entrypoint. + static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '8', '\0' } }; static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline"; static constexpr const char* kDebuggableKey = "debuggable"; diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc index bfedfa9c1b..7ef1e6d52b 100644 --- a/runtime/parsed_options.cc +++ b/runtime/parsed_options.cc @@ -201,6 +201,10 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize .WithType<bool>() .WithValueMap({{"false", false}, {"true", true}}) .IntoKey(M::UseJitCompilation) + .Define("-Xusetieredjit:_") + .WithType<bool>() + .WithValueMap({{"false", false}, {"true", true}}) + .IntoKey(M::UseTieredJitCompilation) .Define("-Xjitinitialsize:_") .WithType<MemoryKiB>() .IntoKey(M::JITCodeCacheInitialCapacity) diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def index 037167ebcc..5db5a9058a 100644 --- a/runtime/runtime_options.def +++ b/runtime/runtime_options.def @@ -75,6 +75,7 @@ RUNTIME_OPTIONS_KEY (Unit, LowMemoryMode) RUNTIME_OPTIONS_KEY (bool, UseTLAB, (kUseTlab || kUseReadBarrier)) RUNTIME_OPTIONS_KEY (bool, EnableHSpaceCompactForOOM, true) RUNTIME_OPTIONS_KEY (bool, UseJitCompilation, true) +RUNTIME_OPTIONS_KEY (bool, UseTieredJitCompilation, false) RUNTIME_OPTIONS_KEY (bool, DumpNativeStackOnSigQuit, true) RUNTIME_OPTIONS_KEY (bool, MadviseRandomAccess, false) RUNTIME_OPTIONS_KEY (JniIdType, OpaqueJniIds, JniIdType::kDefault) // -Xopaque-jni-ids:{true, false} diff --git a/runtime/stack_map.h b/runtime/stack_map.h index 598f3e4473..2065a79dd5 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -429,6 +429,10 @@ class CodeInfo { return (*code_info_data & kHasInlineInfo) != 0; } + ALWAYS_INLINE static bool IsBaseline(const uint8_t* code_info_data) { + return (*code_info_data & kIsBaseline) != 0; + } + private: // Scan backward to determine dex register locations at given stack map. void DecodeDexRegisterMap(uint32_t stack_map_index, @@ -472,6 +476,7 @@ class CodeInfo { enum Flags { kHasInlineInfo = 1 << 0, + kIsBaseline = 1 << 1, }; // The CodeInfo starts with sequence of variable-length bit-encoded integers. |