summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--compiler/optimizing/code_generator.cc3
-rw-r--r--compiler/optimizing/code_generator_arm64.cc77
-rw-r--r--compiler/optimizing/code_generator_arm64.h1
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.cc81
-rw-r--r--compiler/optimizing/code_generator_arm_vixl.h1
-rw-r--r--compiler/optimizing/code_generator_x86.cc133
-rw-r--r--compiler/optimizing/code_generator_x86.h1
-rw-r--r--compiler/optimizing/code_generator_x86_64.cc135
-rw-r--r--compiler/optimizing/code_generator_x86_64.h2
-rw-r--r--compiler/optimizing/inliner.cc1
-rw-r--r--compiler/optimizing/nodes.h8
-rw-r--r--compiler/optimizing/optimizing_compiler.cc6
-rw-r--r--compiler/optimizing/stack_map_stream.cc5
-rw-r--r--compiler/optimizing/stack_map_stream.h4
-rw-r--r--dex2oat/linker/oat_writer_test.cc2
-rw-r--r--runtime/arch/arm/quick_entrypoints_arm.S12
-rw-r--r--runtime/arch/arm64/quick_entrypoints_arm64.S12
-rw-r--r--runtime/arch/x86/quick_entrypoints_x86.S17
-rw-r--r--runtime/arch/x86_64/quick_entrypoints_x86_64.S10
-rw-r--r--runtime/entrypoints/quick/quick_default_init_entrypoints.h3
-rw-r--r--runtime/entrypoints/quick/quick_entrypoints_list.h1
-rw-r--r--runtime/entrypoints/quick/quick_thread_entrypoints.cc9
-rw-r--r--runtime/entrypoints/runtime_asm_entrypoints.h1
-rw-r--r--runtime/entrypoints_order_test.cc4
-rw-r--r--runtime/jit/jit.cc17
-rw-r--r--runtime/jit/jit.h7
-rw-r--r--runtime/jit/jit_code_cache.cc14
-rw-r--r--runtime/jit/jit_code_cache.h1
-rw-r--r--runtime/jit/profiling_info.cc3
-rw-r--r--runtime/jit/profiling_info.h9
-rw-r--r--runtime/oat.h4
-rw-r--r--runtime/parsed_options.cc4
-rw-r--r--runtime/runtime_options.def1
-rw-r--r--runtime/stack_map.h5
34 files changed, 440 insertions, 154 deletions
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index bef7169da1..8406ef5504 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -395,7 +395,8 @@ void CodeGenerator::Compile(CodeAllocator* allocator) {
GetStackMapStream()->BeginMethod(HasEmptyFrame() ? 0 : frame_size_,
core_spill_mask_,
fpu_spill_mask_,
- GetGraph()->GetNumberOfVRegs());
+ GetGraph()->GetNumberOfVRegs(),
+ GetGraph()->IsCompilingBaseline());
size_t frame_start = GetAssembler()->CodeSize();
GenerateFrameEntry();
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 47c62f9366..894c7a4c59 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -1061,19 +1061,66 @@ void ParallelMoveResolverARM64::EmitMove(size_t index) {
codegen_->MoveLocation(move->GetDestination(), move->GetSource(), DataType::Type::kVoid);
}
-void CodeGeneratorARM64::GenerateFrameEntry() {
+void CodeGeneratorARM64::MaybeIncrementHotness(bool is_frame_entry) {
MacroAssembler* masm = GetVIXLAssembler();
- __ Bind(&frame_entry_label_);
-
if (GetCompilerOptions().CountHotnessInCompiledCode()) {
UseScratchRegisterScope temps(masm);
+ Register counter = temps.AcquireX();
+ Register method = is_frame_entry ? kArtMethodRegister : temps.AcquireX();
+ if (!is_frame_entry) {
+ __ Ldr(method, MemOperand(sp, 0));
+ }
+ __ Ldrh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
+ __ Add(counter, counter, 1);
+ // Subtract one if the counter would overflow.
+ __ Sub(counter, counter, Operand(counter, LSR, 16));
+ __ Strh(counter, MemOperand(method, ArtMethod::HotnessCountOffset().Int32Value()));
+ }
+
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ uint32_t address = reinterpret_cast32<uint32_t>(info);
+ vixl::aarch64::Label done;
+ UseScratchRegisterScope temps(masm);
Register temp = temps.AcquireX();
- __ Ldrh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
- __ Add(temp, temp, 1);
- // Subtract one if the counter would overflow.
- __ Sub(temp, temp, Operand(temp, LSR, 16));
- __ Strh(temp, MemOperand(kArtMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+ Register counter = temps.AcquireW();
+ __ Mov(temp, address);
+ __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ __ Add(counter, counter, 1);
+ __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ __ Tst(counter, 0xffff);
+ __ B(ne, &done);
+ if (is_frame_entry) {
+ if (HasEmptyFrame()) {
+ // The entyrpoint expects the method at the bottom of the stack. We
+ // claim stack space necessary for alignment.
+ __ Claim(kStackAlignment);
+ __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0));
+ } else if (!RequiresCurrentMethod()) {
+ __ Str(kArtMethodRegister, MemOperand(sp, 0));
+ }
+ } else {
+ CHECK(RequiresCurrentMethod());
+ }
+ uint32_t entrypoint_offset =
+ GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
+ __ Ldr(lr, MemOperand(tr, entrypoint_offset));
+ // Note: we don't record the call here (and therefore don't generate a stack
+ // map), as the entrypoint should never be suspended.
+ __ Blr(lr);
+ if (HasEmptyFrame()) {
+ CHECK(is_frame_entry);
+ __ Ldr(lr, MemOperand(sp, 8));
+ __ Drop(kStackAlignment);
+ }
+ __ Bind(&done);
}
+}
+
+void CodeGeneratorARM64::GenerateFrameEntry() {
+ MacroAssembler* masm = GetVIXLAssembler();
+ __ Bind(&frame_entry_label_);
bool do_overflow_check =
FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm64) || !IsLeafMethod();
@@ -1136,7 +1183,7 @@ void CodeGeneratorARM64::GenerateFrameEntry() {
__ Str(wzr, MemOperand(sp, GetStackOffsetOfShouldDeoptimizeFlag()));
}
}
-
+ MaybeIncrementHotness(/* is_frame_entry= */ true);
MaybeGenerateMarkingRegisterCheck(/* code= */ __LINE__);
}
@@ -3177,17 +3224,7 @@ void InstructionCodeGeneratorARM64::HandleGoto(HInstruction* got, HBasicBlock* s
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
- if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- Register temp1 = temps.AcquireX();
- Register temp2 = temps.AcquireX();
- __ Ldr(temp1, MemOperand(sp, 0));
- __ Ldrh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
- __ Add(temp2, temp2, 1);
- // Subtract one if the counter would overflow.
- __ Sub(temp2, temp2, Operand(temp2, LSR, 16));
- __ Strh(temp2, MemOperand(temp1, ArtMethod::HotnessCountOffset().Int32Value()));
- }
+ codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 253e91505d..6b2c80529b 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -787,6 +787,7 @@ class CodeGeneratorARM64 : public CodeGenerator {
}
void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass);
+ void MaybeIncrementHotness(bool is_frame_entry);
private:
// Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 9100c6c547..49a608eee7 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -2080,27 +2080,79 @@ void CodeGeneratorARMVIXL::ComputeSpillMask() {
}
}
-void CodeGeneratorARMVIXL::GenerateFrameEntry() {
- bool skip_overflow_check =
- IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
- DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
- __ Bind(&frame_entry_label_);
-
+void CodeGeneratorARMVIXL::MaybeIncrementHotness(bool is_frame_entry) {
if (GetCompilerOptions().CountHotnessInCompiledCode()) {
UseScratchRegisterScope temps(GetVIXLAssembler());
vixl32::Register temp = temps.Acquire();
static_assert(ArtMethod::MaxCounter() == 0xFFFF, "asm is probably wrong");
- // Load with sign extend to set the high bits for integer overflow check.
+ if (!is_frame_entry) {
+ __ Push(vixl32::Register(kMethodRegister));
+ GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
+ }
+ // Load with zero extend to clear the high bits for integer overflow check.
__ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
__ Add(temp, temp, 1);
// Subtract one if the counter would overflow.
__ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16));
__ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
+ if (!is_frame_entry) {
+ __ Pop(vixl32::Register(kMethodRegister));
+ }
}
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ uint32_t address = reinterpret_cast32<uint32_t>(info);
+ vixl::aarch32::Label done;
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ if (!is_frame_entry) {
+ __ Push(r4); // Will be used as temporary. For frame entry, r4 is always available.
+ }
+ __ Mov(r4, address);
+ __ Ldrh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ __ Add(ip, ip, 1);
+ __ Strh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ if (!is_frame_entry) {
+ __ Pop(r4);
+ }
+ __ Lsls(ip, ip, 16);
+ __ B(ne, &done);
+ uint32_t entry_point_offset =
+ GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
+ if (HasEmptyFrame()) {
+ CHECK(is_frame_entry);
+ // For leaf methods, we need to spill lr and r0. Also spill r1 and r2 for
+ // alignment.
+ uint32_t core_spill_mask =
+ (1 << lr.GetCode()) | (1 << r0.GetCode()) | (1 << r1.GetCode()) | (1 << r2.GetCode());
+ __ Push(RegisterList(core_spill_mask));
+ __ Ldr(lr, MemOperand(tr, entry_point_offset));
+ __ Blx(lr);
+ __ Pop(RegisterList(core_spill_mask));
+ } else {
+ if (!RequiresCurrentMethod()) {
+ CHECK(is_frame_entry);
+ GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
+ }
+ __ Ldr(lr, MemOperand(tr, entry_point_offset));
+ __ Blx(lr);
+ }
+ __ Bind(&done);
+ }
+}
+
+void CodeGeneratorARMVIXL::GenerateFrameEntry() {
+ bool skip_overflow_check =
+ IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+ __ Bind(&frame_entry_label_);
+
if (HasEmptyFrame()) {
// Ensure that the CFI opcode list is not empty.
GetAssembler()->cfi().Nop();
+ MaybeIncrementHotness(/* is_frame_entry= */ true);
return;
}
@@ -2201,6 +2253,7 @@ void CodeGeneratorARMVIXL::GenerateFrameEntry() {
GetAssembler()->StoreToOffset(kStoreWord, temp, sp, GetStackOffsetOfShouldDeoptimizeFlag());
}
+ MaybeIncrementHotness(/* is_frame_entry= */ true);
MaybeGenerateMarkingRegisterCheck(/* code= */ 1);
}
@@ -2498,19 +2551,7 @@ void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock*
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
- if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
- UseScratchRegisterScope temps(GetVIXLAssembler());
- vixl32::Register temp = temps.Acquire();
- __ Push(vixl32::Register(kMethodRegister));
- GetAssembler()->LoadFromOffset(kLoadWord, kMethodRegister, sp, kArmWordSize);
- // Load with sign extend to set the high bits for integer overflow check.
- __ Ldrh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
- __ Add(temp, temp, 1);
- // Subtract one if the counter would overflow.
- __ Sub(temp, temp, Operand(temp, ShiftType::LSR, 16));
- __ Strh(temp, MemOperand(kMethodRegister, ArtMethod::HotnessCountOffset().Int32Value()));
- __ Pop(vixl32::Register(kMethodRegister));
- }
+ codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 3d4c231842..48fb0827d7 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -757,6 +757,7 @@ class CodeGeneratorARMVIXL : public CodeGenerator {
}
void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl32::Register klass);
+ void MaybeIncrementHotness(bool is_frame_entry);
private:
// Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 54da87919c..3077be05ac 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -1072,58 +1072,112 @@ static dwarf::Reg DWARFReg(Register reg) {
return dwarf::Reg::X86Core(static_cast<int>(reg));
}
-void CodeGeneratorX86::GenerateFrameEntry() {
- __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
- __ Bind(&frame_entry_label_);
- bool skip_overflow_check =
- IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
- DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
-
+void CodeGeneratorX86::MaybeIncrementHotness(bool is_frame_entry) {
if (GetCompilerOptions().CountHotnessInCompiledCode()) {
+ Register reg = EAX;
+ if (is_frame_entry) {
+ reg = kMethodRegisterArgument;
+ } else {
+ __ pushl(EAX);
+ __ movl(EAX, Address(ESP, kX86WordSize));
+ }
NearLabel overflow;
- __ cmpw(Address(kMethodRegisterArgument,
- ArtMethod::HotnessCountOffset().Int32Value()),
+ __ cmpw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
Immediate(ArtMethod::MaxCounter()));
__ j(kEqual, &overflow);
- __ addw(Address(kMethodRegisterArgument,
- ArtMethod::HotnessCountOffset().Int32Value()),
+ __ addw(Address(reg, ArtMethod::HotnessCountOffset().Int32Value()),
Immediate(1));
__ Bind(&overflow);
+ if (!is_frame_entry) {
+ __ popl(EAX);
+ }
}
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ uint32_t address = reinterpret_cast32<uint32_t>(info);
+ NearLabel done;
+ if (HasEmptyFrame()) {
+ CHECK(is_frame_entry);
+ // Alignment
+ __ subl(ESP, Immediate(8));
+ __ cfi().AdjustCFAOffset(8);
+ // We need a temporary. The stub also expects the method at bottom of stack.
+ __ pushl(EAX);
+ __ cfi().AdjustCFAOffset(4);
+ __ movl(EAX, Immediate(address));
+ __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), Immediate(1));
+ __ j(kCarryClear, &done);
+ GenerateInvokeRuntime(
+ GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
+ __ Bind(&done);
+ // We don't strictly require to restore EAX, but this makes the generated
+ // code easier to reason about.
+ __ popl(EAX);
+ __ cfi().AdjustCFAOffset(-4);
+ __ addl(ESP, Immediate(8));
+ __ cfi().AdjustCFAOffset(-8);
+ } else {
+ if (!RequiresCurrentMethod()) {
+ CHECK(is_frame_entry);
+ __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
+ }
+ // We need a temporary.
+ __ pushl(EAX);
+ __ cfi().AdjustCFAOffset(4);
+ __ movl(EAX, Immediate(address));
+ __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()), Immediate(1));
+ __ popl(EAX); // Put stack as expected before exiting or calling stub.
+ __ cfi().AdjustCFAOffset(-4);
+ __ j(kCarryClear, &done);
+ GenerateInvokeRuntime(
+ GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
+ __ Bind(&done);
+ }
+ }
+}
+
+void CodeGeneratorX86::GenerateFrameEntry() {
+ __ cfi().SetCurrentCFAOffset(kX86WordSize); // return address
+ __ Bind(&frame_entry_label_);
+ bool skip_overflow_check =
+ IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+
if (!skip_overflow_check) {
size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86);
__ testl(EAX, Address(ESP, -static_cast<int32_t>(reserved_bytes)));
RecordPcInfo(nullptr, 0);
}
- if (HasEmptyFrame()) {
- return;
- }
+ if (!HasEmptyFrame()) {
+ for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ __ pushl(reg);
+ __ cfi().AdjustCFAOffset(kX86WordSize);
+ __ cfi().RelOffset(DWARFReg(reg), 0);
+ }
+ }
- for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
- Register reg = kCoreCalleeSaves[i];
- if (allocated_registers_.ContainsCoreRegister(reg)) {
- __ pushl(reg);
- __ cfi().AdjustCFAOffset(kX86WordSize);
- __ cfi().RelOffset(DWARFReg(reg), 0);
+ int adjust = GetFrameSize() - FrameEntrySpillSize();
+ __ subl(ESP, Immediate(adjust));
+ __ cfi().AdjustCFAOffset(adjust);
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
}
- }
- int adjust = GetFrameSize() - FrameEntrySpillSize();
- __ subl(ESP, Immediate(adjust));
- __ cfi().AdjustCFAOffset(adjust);
- // Save the current method if we need it. Note that we do not
- // do this in HCurrentMethod, as the instruction might have been removed
- // in the SSA graph.
- if (RequiresCurrentMethod()) {
- __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
+ if (GetGraph()->HasShouldDeoptimizeFlag()) {
+ // Initialize should_deoptimize flag to 0.
+ __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
+ }
}
- if (GetGraph()->HasShouldDeoptimizeFlag()) {
- // Initialize should_deoptimize flag to 0.
- __ movl(Address(ESP, GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
- }
+ MaybeIncrementHotness(/* is_frame_entry= */ true);
}
void CodeGeneratorX86::GenerateFrameExit() {
@@ -1391,18 +1445,7 @@ void InstructionCodeGeneratorX86::HandleGoto(HInstruction* got, HBasicBlock* suc
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
- if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
- __ pushl(EAX);
- __ movl(EAX, Address(ESP, kX86WordSize));
- NearLabel overflow;
- __ cmpw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()),
- Immediate(ArtMethod::MaxCounter()));
- __ j(kEqual, &overflow);
- __ addw(Address(EAX, ArtMethod::HotnessCountOffset().Int32Value()),
- Immediate(1));
- __ Bind(&overflow);
- __ popl(EAX);
- }
+ codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index e305b50c8e..16446ce561 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -625,6 +625,7 @@ class CodeGeneratorX86 : public CodeGenerator {
void GenerateExplicitNullCheck(HNullCheck* instruction) override;
void MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass);
+ void MaybeIncrementHotness(bool is_frame_entry);
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// The correct value will be inserted when processing Assembler fixups.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 48a3d90f6f..dd3a4f4dc0 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -1346,70 +1346,108 @@ static dwarf::Reg DWARFReg(FloatRegister reg) {
return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
}
-void CodeGeneratorX86_64::GenerateFrameEntry() {
- __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
- __ Bind(&frame_entry_label_);
- bool skip_overflow_check = IsLeafMethod()
- && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
- DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
-
+void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
if (GetCompilerOptions().CountHotnessInCompiledCode()) {
NearLabel overflow;
- __ cmpw(Address(CpuRegister(kMethodRegisterArgument),
- ArtMethod::HotnessCountOffset().Int32Value()),
+ Register method = kMethodRegisterArgument;
+ if (!is_frame_entry) {
+ CHECK(RequiresCurrentMethod());
+ method = TMP;
+ __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
+ }
+ __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
Immediate(ArtMethod::MaxCounter()));
__ j(kEqual, &overflow);
- __ addw(Address(CpuRegister(kMethodRegisterArgument),
- ArtMethod::HotnessCountOffset().Int32Value()),
+ __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
Immediate(1));
__ Bind(&overflow);
}
+ if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ uint64_t address = reinterpret_cast64<uint64_t>(info);
+ NearLabel done;
+ __ movq(CpuRegister(TMP), Immediate(address));
+ __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
+ Immediate(1));
+ __ j(kCarryClear, &done);
+ if (HasEmptyFrame()) {
+ CHECK(is_frame_entry);
+ // Frame alignment, and the stub expects the method on the stack.
+ __ pushq(CpuRegister(RDI));
+ __ cfi().AdjustCFAOffset(kX86_64WordSize);
+ __ cfi().RelOffset(DWARFReg(RDI), 0);
+ } else if (!RequiresCurrentMethod()) {
+ CHECK(is_frame_entry);
+ __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
+ }
+ GenerateInvokeRuntime(
+ GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
+ if (HasEmptyFrame()) {
+ __ popq(CpuRegister(RDI));
+ __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
+ __ cfi().Restore(DWARFReg(RDI));
+ }
+ __ Bind(&done);
+ }
+}
+
+void CodeGeneratorX86_64::GenerateFrameEntry() {
+ __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
+ __ Bind(&frame_entry_label_);
+ bool skip_overflow_check = IsLeafMethod()
+ && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
+ DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+
+
if (!skip_overflow_check) {
size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
__ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
RecordPcInfo(nullptr, 0);
}
- if (HasEmptyFrame()) {
- return;
- }
-
- for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
- Register reg = kCoreCalleeSaves[i];
- if (allocated_registers_.ContainsCoreRegister(reg)) {
- __ pushq(CpuRegister(reg));
- __ cfi().AdjustCFAOffset(kX86_64WordSize);
- __ cfi().RelOffset(DWARFReg(reg), 0);
+ if (!HasEmptyFrame()) {
+ for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
+ Register reg = kCoreCalleeSaves[i];
+ if (allocated_registers_.ContainsCoreRegister(reg)) {
+ __ pushq(CpuRegister(reg));
+ __ cfi().AdjustCFAOffset(kX86_64WordSize);
+ __ cfi().RelOffset(DWARFReg(reg), 0);
+ }
}
- }
- int adjust = GetFrameSize() - GetCoreSpillSize();
- __ subq(CpuRegister(RSP), Immediate(adjust));
- __ cfi().AdjustCFAOffset(adjust);
- uint32_t xmm_spill_location = GetFpuSpillStart();
- size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
+ int adjust = GetFrameSize() - GetCoreSpillSize();
+ __ subq(CpuRegister(RSP), Immediate(adjust));
+ __ cfi().AdjustCFAOffset(adjust);
+ uint32_t xmm_spill_location = GetFpuSpillStart();
+ size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
+
+ for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
+ if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
+ int offset = xmm_spill_location + (xmm_spill_slot_size * i);
+ __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
+ __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
+ }
+ }
- for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
- if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
- int offset = xmm_spill_location + (xmm_spill_slot_size * i);
- __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
- __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
+ // Save the current method if we need it. Note that we do not
+ // do this in HCurrentMethod, as the instruction might have been removed
+ // in the SSA graph.
+ if (RequiresCurrentMethod()) {
+ CHECK(!HasEmptyFrame());
+ __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
+ CpuRegister(kMethodRegisterArgument));
}
- }
- // Save the current method if we need it. Note that we do not
- // do this in HCurrentMethod, as the instruction might have been removed
- // in the SSA graph.
- if (RequiresCurrentMethod()) {
- __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
- CpuRegister(kMethodRegisterArgument));
+ if (GetGraph()->HasShouldDeoptimizeFlag()) {
+ CHECK(!HasEmptyFrame());
+ // Initialize should_deoptimize flag to 0.
+ __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
+ }
}
- if (GetGraph()->HasShouldDeoptimizeFlag()) {
- // Initialize should_deoptimize flag to 0.
- __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
- }
+ MaybeIncrementHotness(/* is_frame_entry= */ true);
}
void CodeGeneratorX86_64::GenerateFrameExit() {
@@ -1556,16 +1594,7 @@ void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock*
HLoopInformation* info = block->GetLoopInformation();
if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
- if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
- __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
- NearLabel overflow;
- __ cmpw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
- Immediate(ArtMethod::MaxCounter()));
- __ j(kEqual, &overflow);
- __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
- Immediate(1));
- __ Bind(&overflow);
- }
+ codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
GenerateSuspendCheck(info->GetSuspendCheck(), successor);
return;
}
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 5537a4a4d9..2e8d9b3315 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -603,6 +603,8 @@ class CodeGeneratorX86_64 : public CodeGenerator {
void MaybeGenerateInlineCacheCheck(HInstruction* instruction, CpuRegister cls);
+ void MaybeIncrementHotness(bool is_frame_entry);
+
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// We will fix this up in the linker later to have the right value.
static constexpr int32_t kDummy32BitOffset = 256;
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index ff7ed349a3..ecaedc7cd8 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -1813,6 +1813,7 @@ bool HInliner::TryBuildAndInlineHelper(HInvoke* invoke_instruction,
graph_->IsDebuggable(),
/* osr= */ false,
/* is_shared_jit_code= */ graph_->IsCompilingForSharedJitCode(),
+ /* baseline= */ graph_->IsCompilingBaseline(),
/* start_instruction_id= */ caller_instruction_counter);
callee_graph->SetArtMethod(resolved_method);
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 09ae6fab84..043338466f 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -322,6 +322,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool debuggable = false,
bool osr = false,
bool is_shared_jit_code = false,
+ bool baseline = false,
int start_instruction_id = 0)
: allocator_(allocator),
arena_stack_(arena_stack),
@@ -358,6 +359,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
art_method_(nullptr),
inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
osr_(osr),
+ baseline_(baseline),
cha_single_implementation_list_(allocator->Adapter(kArenaAllocCHA)),
is_shared_jit_code_(is_shared_jit_code) {
blocks_.reserve(kDefaultNumberOfBlocks);
@@ -589,6 +591,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
bool IsCompilingOsr() const { return osr_; }
+ bool IsCompilingBaseline() const { return baseline_; }
+
bool IsCompilingForSharedJitCode() const {
return is_shared_jit_code_;
}
@@ -786,6 +790,10 @@ class HGraph : public ArenaObject<kArenaAllocGraph> {
// compiled code entries which the interpreter can directly jump to.
const bool osr_;
+ // Whether we are compiling baseline (not running optimizations). This affects
+ // the code being generated.
+ const bool baseline_;
+
// List of methods that are assumed to have single implementation.
ArenaSet<ArtMethod*> cha_single_implementation_list_;
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 3f11170c66..f8eae2fb70 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -851,7 +851,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* allocator,
dead_reference_safe,
compiler_options.GetDebuggable(),
/* osr= */ osr,
- /* is_shared_jit_code= */ is_shared_jit_code);
+ /* is_shared_jit_code= */ is_shared_jit_code,
+ /* baseline= */ baseline);
if (method != nullptr) {
graph->SetArtMethod(method);
@@ -1174,7 +1175,8 @@ static ScopedArenaVector<uint8_t> CreateJniStackMap(ScopedArenaAllocator* alloca
jni_compiled_method.GetFrameSize(),
jni_compiled_method.GetCoreSpillMask(),
jni_compiled_method.GetFpSpillMask(),
- /* num_dex_registers= */ 0);
+ /* num_dex_registers= */ 0,
+ /* baseline= */ false);
stack_map_stream->EndMethod();
return stack_map_stream->Encode();
}
diff --git a/compiler/optimizing/stack_map_stream.cc b/compiler/optimizing/stack_map_stream.cc
index 3f6010d7af..dd6d1a2959 100644
--- a/compiler/optimizing/stack_map_stream.cc
+++ b/compiler/optimizing/stack_map_stream.cc
@@ -42,7 +42,8 @@ void StackMapStream::SetStackMapNativePcOffset(size_t i, uint32_t native_pc_offs
void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
size_t core_spill_mask,
size_t fp_spill_mask,
- uint32_t num_dex_registers) {
+ uint32_t num_dex_registers,
+ bool baseline) {
DCHECK(!in_method_) << "Mismatched Begin/End calls";
in_method_ = true;
DCHECK_EQ(packed_frame_size_, 0u) << "BeginMethod was already called";
@@ -52,6 +53,7 @@ void StackMapStream::BeginMethod(size_t frame_size_in_bytes,
core_spill_mask_ = core_spill_mask;
fp_spill_mask_ = fp_spill_mask;
num_dex_registers_ = num_dex_registers;
+ baseline_ = baseline;
if (kVerifyStackMaps) {
dchecks_.emplace_back([=](const CodeInfo& code_info) {
@@ -299,6 +301,7 @@ ScopedArenaVector<uint8_t> StackMapStream::Encode() {
DCHECK(in_inline_info_ == false) << "Mismatched Begin/End calls";
uint32_t flags = (inline_infos_.size() > 0) ? CodeInfo::kHasInlineInfo : 0;
+ flags |= baseline_ ? CodeInfo::kIsBaseline : 0;
uint32_t bit_table_flags = 0;
ForEachBitTable([&bit_table_flags](size_t i, auto bit_table) {
if (bit_table->size() != 0) { // Record which bit-tables are stored.
diff --git a/compiler/optimizing/stack_map_stream.h b/compiler/optimizing/stack_map_stream.h
index f45e3d720e..67f716ce70 100644
--- a/compiler/optimizing/stack_map_stream.h
+++ b/compiler/optimizing/stack_map_stream.h
@@ -61,7 +61,8 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
void BeginMethod(size_t frame_size_in_bytes,
size_t core_spill_mask,
size_t fp_spill_mask,
- uint32_t num_dex_registers);
+ uint32_t num_dex_registers,
+ bool baseline = false);
void EndMethod();
void BeginStackMapEntry(uint32_t dex_pc,
@@ -119,6 +120,7 @@ class StackMapStream : public DeletableArenaObject<kArenaAllocStackMapStream> {
uint32_t core_spill_mask_ = 0;
uint32_t fp_spill_mask_ = 0;
uint32_t num_dex_registers_ = 0;
+ bool baseline_;
BitTableBuilder<StackMap> stack_maps_;
BitTableBuilder<RegisterMask> register_masks_;
BitmapTableBuilder stack_masks_;
diff --git a/dex2oat/linker/oat_writer_test.cc b/dex2oat/linker/oat_writer_test.cc
index 319e3590a5..0a43bfc4c7 100644
--- a/dex2oat/linker/oat_writer_test.cc
+++ b/dex2oat/linker/oat_writer_test.cc
@@ -471,7 +471,7 @@ TEST_F(OatTest, OatHeaderSizeCheck) {
EXPECT_EQ(56U, sizeof(OatHeader));
EXPECT_EQ(4U, sizeof(OatMethodOffsets));
EXPECT_EQ(8U, sizeof(OatQuickMethodHeader));
- EXPECT_EQ(168 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+ EXPECT_EQ(169 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
sizeof(QuickEntryPoints));
}
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index bad37bcab2..8b1fc9e91c 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -2803,3 +2803,15 @@ ENTRY art_quick_update_inline_cache
.Ldone:
blx lr
END art_quick_update_inline_cache
+
+// On entry, method is at the bottom of the stack.
+ENTRY art_quick_compile_optimized
+ SETUP_SAVE_EVERYTHING_FRAME r0
+ ldr r0, [sp, FRAME_SIZE_SAVE_EVERYTHING] @ pass ArtMethod
+ mov r1, rSELF @ pass Thread::Current
+ bl artCompileOptimized @ (ArtMethod*, Thread*)
+ RESTORE_SAVE_EVERYTHING_FRAME
+ // We don't need to restore the marking register here, as
+ // artCompileOptimized doesn't allow thread suspension.
+ blx lr
+END art_quick_compile_optimized
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 6e9b533663..e0094e6f3d 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -2927,3 +2927,15 @@ ENTRY art_quick_update_inline_cache
.Ldone:
ret
END art_quick_update_inline_cache
+
+// On entry, method is at the bottom of the stack.
+ENTRY art_quick_compile_optimized
+ SETUP_SAVE_EVERYTHING_FRAME
+ ldr x0, [sp, #FRAME_SIZE_SAVE_EVERYTHING] // pass ArtMethod
+ mov x1, xSELF // pass Thread::Current
+ bl artCompileOptimized // (ArtMethod*, Thread*)
+ RESTORE_SAVE_EVERYTHING_FRAME
+ // We don't need to restore the marking register here, as
+ // artCompileOptimized doesn't allow thread suspension.
+ ret
+END art_quick_compile_optimized
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 2bf82d0b6c..794ee89848 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -2570,3 +2570,20 @@ END_FUNCTION art_quick_update_inline_cache
// TODO: implement these!
UNIMPLEMENTED art_quick_memcmp16
+
+// On entry, the method is at the bottom of the stack.
+DEFINE_FUNCTION art_quick_compile_optimized
+ SETUP_SAVE_EVERYTHING_FRAME ebx, ebx
+ mov FRAME_SIZE_SAVE_EVERYTHING(%esp), %eax // Fetch ArtMethod
+ sub LITERAL(8), %esp // Alignment padding
+ CFI_ADJUST_CFA_OFFSET(8)
+ pushl %fs:THREAD_SELF_OFFSET // pass Thread::Current()
+ CFI_ADJUST_CFA_OFFSET(4)
+ pushl %eax
+ CFI_ADJUST_CFA_OFFSET(4)
+ call SYMBOL(artCompileOptimized) // (ArtMethod*, Thread*)
+ addl LITERAL(16), %esp // Pop arguments.
+ CFI_ADJUST_CFA_OFFSET(-16)
+ RESTORE_SAVE_EVERYTHING_FRAME
+ ret
+END_FUNCTION art_quick_compile_optimized
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 22d0ce4edb..3b30c37309 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -2511,3 +2511,13 @@ DEFINE_FUNCTION art_quick_update_inline_cache
.Ldone:
ret
END_FUNCTION art_quick_update_inline_cache
+
+// On entry, method is at the bottom of the stack.
+DEFINE_FUNCTION art_quick_compile_optimized
+ SETUP_SAVE_EVERYTHING_FRAME
+ movq FRAME_SIZE_SAVE_EVERYTHING(%rsp), %rdi // pass ArtMethod
+ movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current()
+ call SYMBOL(artCompileOptimized) // (ArtMethod*, Thread*)
+ RESTORE_SAVE_EVERYTHING_FRAME // restore frame up to return address
+ ret
+END_FUNCTION art_quick_compile_optimized
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index d41f9a0a2f..048deb4803 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -125,8 +125,9 @@ static void DefaultInitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qp
// StringBuilder append
qpoints->pStringBuilderAppend = art_quick_string_builder_append;
- // InlineCache update
+ // Tiered JIT support
qpoints->pUpdateInlineCache = art_quick_update_inline_cache;
+ qpoints->pCompileOptimized = art_quick_compile_optimized;
}
} // namespace art
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index efab7c25a5..e031b21fb3 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -172,6 +172,7 @@
V(StringBuilderAppend, void*, uint32_t) \
\
V(UpdateInlineCache, void, void) \
+ V(CompileOptimized, void, ArtMethod*, Thread*) \
\
V(ReadBarrierJni, void, mirror::CompressedReference<mirror::Object>*, Thread*) \
V(ReadBarrierMarkReg00, mirror::Object*, mirror::Object*) \
diff --git a/runtime/entrypoints/quick/quick_thread_entrypoints.cc b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
index 0838059714..64be926853 100644
--- a/runtime/entrypoints/quick/quick_thread_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_thread_entrypoints.cc
@@ -15,6 +15,8 @@
*/
#include "callee_save_frame.h"
+#include "jit/jit.h"
+#include "runtime.h"
#include "thread-inl.h"
namespace art {
@@ -25,4 +27,11 @@ extern "C" void artTestSuspendFromCode(Thread* self) REQUIRES_SHARED(Locks::muta
self->CheckSuspend();
}
+extern "C" void artCompileOptimized(ArtMethod* method, Thread* self)
+ REQUIRES_SHARED(Locks::mutator_lock_) {
+ ScopedQuickEntrypointChecks sqec(self);
+ ScopedAssertNoThreadSuspension sants("Enqueuing optimized compilation");
+ Runtime::Current()->GetJit()->EnqueueOptimizedCompilation(method, self);
+}
+
} // namespace art
diff --git a/runtime/entrypoints/runtime_asm_entrypoints.h b/runtime/entrypoints/runtime_asm_entrypoints.h
index 3f4e91ed91..740629a400 100644
--- a/runtime/entrypoints/runtime_asm_entrypoints.h
+++ b/runtime/entrypoints/runtime_asm_entrypoints.h
@@ -88,6 +88,7 @@ static inline const void* GetQuickInstrumentationExitPc() {
}
extern "C" void* art_quick_string_builder_append(uint32_t format);
+extern "C" void art_quick_compile_optimized(ArtMethod*, Thread*);
} // namespace art
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 210d85162e..36f5b398e8 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -338,7 +338,9 @@ class EntrypointsOrderTest : public CommonRuntimeTest {
sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pStringBuilderAppend, pUpdateInlineCache,
sizeof(void*));
- EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUpdateInlineCache, pReadBarrierJni,
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pUpdateInlineCache, pCompileOptimized,
+ sizeof(void*));
+ EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCompileOptimized, pReadBarrierJni,
sizeof(void*));
EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pReadBarrierJni, pReadBarrierMarkReg00,
sizeof(void*));
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index 8c7d657918..ffcee4b8d0 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -89,6 +89,8 @@ JitCompilerInterface* (*Jit::jit_load_)(void) = nullptr;
JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& options) {
auto* jit_options = new JitOptions;
jit_options->use_jit_compilation_ = options.GetOrDefault(RuntimeArgumentMap::UseJitCompilation);
+ jit_options->use_tiered_jit_compilation_ =
+ options.GetOrDefault(RuntimeArgumentMap::UseTieredJitCompilation);
jit_options->code_cache_initial_capacity_ =
options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheInitialCapacity);
@@ -318,13 +320,14 @@ bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool baseline, bool osr
// If we get a request to compile a proxy method, we pass the actual Java method
// of that proxy method, as the compiler does not expect a proxy method.
ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
- if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr, prejit, region)) {
+ if (!code_cache_->NotifyCompilationOf(method_to_compile, self, osr, prejit, baseline, region)) {
return false;
}
VLOG(jit) << "Compiling method "
<< ArtMethod::PrettyMethod(method_to_compile)
- << " osr=" << std::boolalpha << osr;
+ << " osr=" << std::boolalpha << osr
+ << " baseline=" << std::boolalpha << baseline;
bool success = jit_compiler_->CompileMethod(self, region, method_to_compile, baseline, osr);
code_cache_->DoneCompiling(method_to_compile, self, osr);
if (!success) {
@@ -1449,7 +1452,10 @@ bool Jit::MaybeCompileMethod(Thread* self,
if (old_count < HotMethodThreshold() && new_count >= HotMethodThreshold()) {
if (!code_cache_->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
DCHECK(thread_pool_ != nullptr);
- thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompile));
+ JitCompileTask::TaskKind kind = options_->UseTieredJitCompilation()
+ ? JitCompileTask::TaskKind::kCompileBaseline
+ : JitCompileTask::TaskKind::kCompile;
+ thread_pool_->AddTask(self, new JitCompileTask(method, kind));
}
}
if (old_count < OSRMethodThreshold() && new_count >= OSRMethodThreshold()) {
@@ -1467,6 +1473,11 @@ bool Jit::MaybeCompileMethod(Thread* self,
return true;
}
+void Jit::EnqueueOptimizedCompilation(ArtMethod* method, Thread* self) {
+ thread_pool_->AddTask(
+ self, new JitCompileTask(method, JitCompileTask::TaskKind::kCompile));
+}
+
class ScopedSetRuntimeThread {
public:
explicit ScopedSetRuntimeThread(Thread* self)
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index e5b77c2c7e..42adf6ba7c 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -114,6 +114,10 @@ class JitOptions {
return use_jit_compilation_;
}
+ bool UseTieredJitCompilation() const {
+ return use_tiered_jit_compilation_;
+ }
+
void SetUseJitCompilation(bool b) {
use_jit_compilation_ = b;
}
@@ -137,6 +141,7 @@ class JitOptions {
static uint32_t RoundUpThreshold(uint32_t threshold);
bool use_jit_compilation_;
+ bool use_tiered_jit_compilation_;
size_t code_cache_initial_capacity_;
size_t code_cache_max_capacity_;
uint32_t compile_threshold_;
@@ -383,6 +388,8 @@ class Jit {
// class path methods.
void NotifyZygoteCompilationDone();
+ void EnqueueOptimizedCompilation(ArtMethod* method, Thread* self);
+
private:
Jit(JitCodeCache* code_cache, JitOptions* options);
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index c02a699eed..519655d843 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1581,9 +1581,19 @@ bool JitCodeCache::NotifyCompilationOf(ArtMethod* method,
Thread* self,
bool osr,
bool prejit,
+ bool baseline,
JitMemoryRegion* region) {
- if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
- return false;
+ const void* existing_entry_point = method->GetEntryPointFromQuickCompiledCode();
+ if (!osr && ContainsPc(existing_entry_point)) {
+ OatQuickMethodHeader* method_header =
+ OatQuickMethodHeader::FromEntryPoint(existing_entry_point);
+ if (CodeInfo::IsBaseline(method_header->GetOptimizedCodeInfoPtr()) == baseline) {
+ VLOG(jit) << "Not compiling "
+ << method->PrettyMethod()
+ << " because it has already been compiled"
+ << " baseline=" << std::boolalpha << baseline;
+ return false;
+ }
}
if (NeedsClinitCheckBeforeCall(method) && !prejit) {
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 58cf0e36a7..61fee34a6e 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -184,6 +184,7 @@ class JitCodeCache {
Thread* self,
bool osr,
bool prejit,
+ bool baseline,
JitMemoryRegion* region)
REQUIRES_SHARED(Locks::mutator_lock_)
REQUIRES(!Locks::jit_lock_);
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 2cb569c61a..8c88760158 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -26,7 +26,8 @@
namespace art {
ProfilingInfo::ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
- : method_(method),
+ : baseline_hotness_count_(0),
+ method_(method),
saved_entry_point_(nullptr),
number_of_inline_caches_(entries.size()),
current_inline_uses_(0),
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index d4dc49867b..ada103658a 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -126,9 +126,18 @@ class ProfilingInfo {
(current_inline_uses_ > 0);
}
+ static constexpr MemberOffset BaselineHotnessCountOffset() {
+ return MemberOffset(OFFSETOF_MEMBER(ProfilingInfo, baseline_hotness_count_));
+ }
+
private:
ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries);
+ // Hotness count for methods compiled with the JIT baseline compiler. Once
+ // a threshold is hit (currentily the maximum value of uint16_t), we will
+ // JIT compile optimized the method.
+ uint16_t baseline_hotness_count_;
+
// Method this profiling info is for.
// Not 'const' as JVMTI introduces obsolete methods that we implement by creating new ArtMethods.
// See JitCodeCache::MoveObsoleteMethod.
diff --git a/runtime/oat.h b/runtime/oat.h
index 3b20ea1777..6c739b2380 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,8 +32,8 @@ class InstructionSetFeatures;
class PACKED(4) OatHeader {
public:
static constexpr std::array<uint8_t, 4> kOatMagic { { 'o', 'a', 't', '\n' } };
- // Last oat version changed reason: pUpdateInlineCache entrypoint.
- static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '7', '\0' } };
+ // Last oat version changed reason: pCompileOptimized entrypoint.
+ static constexpr std::array<uint8_t, 4> kOatVersion { { '1', '7', '8', '\0' } };
static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
static constexpr const char* kDebuggableKey = "debuggable";
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index bfedfa9c1b..7ef1e6d52b 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -201,6 +201,10 @@ std::unique_ptr<RuntimeParser> ParsedOptions::MakeParser(bool ignore_unrecognize
.WithType<bool>()
.WithValueMap({{"false", false}, {"true", true}})
.IntoKey(M::UseJitCompilation)
+ .Define("-Xusetieredjit:_")
+ .WithType<bool>()
+ .WithValueMap({{"false", false}, {"true", true}})
+ .IntoKey(M::UseTieredJitCompilation)
.Define("-Xjitinitialsize:_")
.WithType<MemoryKiB>()
.IntoKey(M::JITCodeCacheInitialCapacity)
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index 037167ebcc..5db5a9058a 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -75,6 +75,7 @@ RUNTIME_OPTIONS_KEY (Unit, LowMemoryMode)
RUNTIME_OPTIONS_KEY (bool, UseTLAB, (kUseTlab || kUseReadBarrier))
RUNTIME_OPTIONS_KEY (bool, EnableHSpaceCompactForOOM, true)
RUNTIME_OPTIONS_KEY (bool, UseJitCompilation, true)
+RUNTIME_OPTIONS_KEY (bool, UseTieredJitCompilation, false)
RUNTIME_OPTIONS_KEY (bool, DumpNativeStackOnSigQuit, true)
RUNTIME_OPTIONS_KEY (bool, MadviseRandomAccess, false)
RUNTIME_OPTIONS_KEY (JniIdType, OpaqueJniIds, JniIdType::kDefault) // -Xopaque-jni-ids:{true, false}
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 598f3e4473..2065a79dd5 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -429,6 +429,10 @@ class CodeInfo {
return (*code_info_data & kHasInlineInfo) != 0;
}
+ ALWAYS_INLINE static bool IsBaseline(const uint8_t* code_info_data) {
+ return (*code_info_data & kIsBaseline) != 0;
+ }
+
private:
// Scan backward to determine dex register locations at given stack map.
void DecodeDexRegisterMap(uint32_t stack_map_index,
@@ -472,6 +476,7 @@ class CodeInfo {
enum Flags {
kHasInlineInfo = 1 << 0,
+ kIsBaseline = 1 << 1,
};
// The CodeInfo starts with sequence of variable-length bit-encoded integers.