Simplify hotness count in baseline compiled code.
- Always require the current ArtMethod, which also removes the need to
handle empty frames.
- Remove the use of some temporary registers.
- Require a profiling info when compiling baseline.
- Add a slow path for requiring an optimized compilation.
- Make the counter decrement instead of increment.
A next CL will make the hotness configurable through --jittreshold.
Test: test.py
Bug: 146423102
Change-Id: I1485f66401d6ed218456fe2849eb05fa77479668
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index ced94f4..27eabaf 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -1081,6 +1081,11 @@
}
}
}
+ if (GetGraph()->IsCompilingBaseline()) {
+ // We need the current method in case we reach the hotness threshold. As a
+ // side effect this makes the frame non-empty.
+ SetRequiresCurrentMethod();
+ }
}
CodeGenerator::~CodeGenerator() {}
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index bcb5ac5..933e270 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -848,6 +848,29 @@
DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARM64);
};
+class CompileOptimizedSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+ CompileOptimizedSlowPathARM64() : SlowPathCodeARM64(/* instruction= */ nullptr) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ uint32_t entrypoint_offset =
+ GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
+ __ Bind(GetEntryLabel());
+ __ Ldr(lr, MemOperand(tr, entrypoint_offset));
+ // Note: we don't record the call here (and therefore don't generate a stack
+ // map), as the entrypoint should never be suspended.
+ __ Blr(lr);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const override {
+ return "CompileOptimizedSlowPath";
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARM64);
+};
+
#undef __
Location InvokeDexCallingConventionVisitorARM64::GetNextLocation(DataType::Type type) {
@@ -1199,46 +1222,22 @@
}
if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- ScopedProfilingInfoUse spiu(
- Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
- ProfilingInfo* info = spiu.GetProfilingInfo();
- if (info != nullptr) {
- uint64_t address = reinterpret_cast64<uint64_t>(info);
- vixl::aarch64::Label done;
- UseScratchRegisterScope temps(masm);
- Register temp = temps.AcquireX();
- Register counter = temps.AcquireW();
- __ Mov(temp, address);
- __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
- __ Add(counter, counter, 1);
- __ And(counter, counter, interpreter::kTieredHotnessMask);
- __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
- __ Cbnz(counter, &done);
- if (is_frame_entry) {
- if (HasEmptyFrame()) {
- // The entrypoint expects the method at the bottom of the stack. We
- // claim stack space necessary for alignment.
- IncreaseFrame(kStackAlignment);
- __ Stp(kArtMethodRegister, lr, MemOperand(sp, 0));
- } else if (!RequiresCurrentMethod()) {
- __ Str(kArtMethodRegister, MemOperand(sp, 0));
- }
- } else {
- CHECK(RequiresCurrentMethod());
- }
- uint32_t entrypoint_offset =
- GetThreadOffset<kArm64PointerSize>(kQuickCompileOptimized).Int32Value();
- __ Ldr(lr, MemOperand(tr, entrypoint_offset));
- // Note: we don't record the call here (and therefore don't generate a stack
- // map), as the entrypoint should never be suspended.
- __ Blr(lr);
- if (HasEmptyFrame()) {
- CHECK(is_frame_entry);
- __ Ldr(lr, MemOperand(sp, 8));
- DecreaseFrame(kStackAlignment);
- }
- __ Bind(&done);
- }
+ SlowPathCodeARM64* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARM64();
+ AddSlowPath(slow_path);
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ DCHECK(!HasEmptyFrame());
+ uint64_t address = reinterpret_cast64<uint64_t>(info);
+ vixl::aarch64::Label done;
+ UseScratchRegisterScope temps(masm);
+ Register temp = temps.AcquireX();
+ Register counter = temps.AcquireW();
+ __ Ldr(temp, DeduplicateUint64Literal(address));
+ __ Ldrh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ __ Cbz(counter, slow_path->GetEntryLabel());
+ __ Add(counter, counter, -1);
+ __ Strh(counter, MemOperand(temp, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ __ Bind(slow_path->GetExitLabel());
}
}
@@ -4458,21 +4457,18 @@
GetGraph()->IsCompilingBaseline() &&
!Runtime::Current()->IsAotCompiler()) {
DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
- ScopedProfilingInfoUse spiu(
- Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
- ProfilingInfo* info = spiu.GetProfilingInfo();
- if (info != nullptr) {
- InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
- uint64_t address = reinterpret_cast64<uint64_t>(cache);
- vixl::aarch64::Label done;
- __ Mov(x8, address);
- __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
- // Fast path for a monomorphic cache.
- __ Cmp(klass, x9);
- __ B(eq, &done);
- InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
- __ Bind(&done);
- }
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
+ uint64_t address = reinterpret_cast64<uint64_t>(cache);
+ vixl::aarch64::Label done;
+ __ Mov(x8, address);
+ __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
+ // Fast path for a monomorphic cache.
+ __ Cmp(klass, x9);
+ __ B(eq, &done);
+ InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
+ __ Bind(&done);
}
}
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index aa06c5a..c514c22 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -997,6 +997,29 @@
DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathARMVIXL);
};
+class CompileOptimizedSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+ CompileOptimizedSlowPathARMVIXL() : SlowPathCodeARMVIXL(/* instruction= */ nullptr) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ uint32_t entry_point_offset =
+ GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
+ __ Bind(GetEntryLabel());
+ __ Ldr(lr, MemOperand(tr, entry_point_offset));
+ // Note: we don't record the call here (and therefore don't generate a stack
+ // map), as the entrypoint should never be suspended.
+ __ Blx(lr);
+ __ B(GetExitLabel());
+ }
+
+ const char* GetDescription() const override {
+ return "CompileOptimizedSlowPath";
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathARMVIXL);
+};
+
inline vixl32::Condition ARMCondition(IfCondition cond) {
switch (cond) {
case kCondEQ: return eq;
@@ -2200,54 +2223,20 @@
}
if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- ScopedProfilingInfoUse spiu(
- Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
- ProfilingInfo* info = spiu.GetProfilingInfo();
- if (info != nullptr) {
- uint32_t address = reinterpret_cast32<uint32_t>(info);
- vixl::aarch32::Label done;
- UseScratchRegisterScope temps(GetVIXLAssembler());
- temps.Exclude(ip);
- if (!is_frame_entry) {
- __ Push(r4); // Will be used as temporary. For frame entry, r4 is always available.
- GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize);
- }
- __ Mov(r4, address);
- __ Ldrh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
- __ Add(ip, ip, 1);
- instruction_visitor_.GenerateAndConst(ip, ip, interpreter::kTieredHotnessMask);
- __ Strh(ip, MemOperand(r4, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
- if (!is_frame_entry) {
- __ Pop(r4);
- GetAssembler()->cfi().AdjustCFAOffset(-static_cast<int>(kArmWordSize));
- }
- __ Lsls(ip, ip, 16);
- __ B(ne, &done);
- uint32_t entry_point_offset =
- GetThreadOffset<kArmPointerSize>(kQuickCompileOptimized).Int32Value();
- if (HasEmptyFrame()) {
- CHECK(is_frame_entry);
- // For leaf methods, we need to spill lr and r0. Also spill r1 and r2 for
- // alignment.
- uint32_t core_spill_mask =
- (1 << lr.GetCode()) | (1 << r0.GetCode()) | (1 << r1.GetCode()) | (1 << r2.GetCode());
- __ Push(RegisterList(core_spill_mask));
- GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(core_spill_mask));
- __ Ldr(lr, MemOperand(tr, entry_point_offset));
- __ Blx(lr);
- __ Pop(RegisterList(core_spill_mask));
- GetAssembler()->cfi().AdjustCFAOffset(
- -static_cast<int>(kArmWordSize) * POPCOUNT(core_spill_mask));
- } else {
- if (!RequiresCurrentMethod()) {
- CHECK(is_frame_entry);
- GetAssembler()->StoreToOffset(kStoreWord, kMethodRegister, sp, 0);
- }
- __ Ldr(lr, MemOperand(tr, entry_point_offset));
- __ Blx(lr);
- }
- __ Bind(&done);
- }
+ SlowPathCodeARMVIXL* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathARMVIXL();
+ AddSlowPath(slow_path);
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ DCHECK(!HasEmptyFrame());
+ uint32_t address = reinterpret_cast32<uint32_t>(info);
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ vixl32::Register tmp = temps.Acquire();
+ __ Mov(lr, address);
+ __ Ldrh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ __ Adds(tmp, tmp, -1);
+ __ B(cc, slow_path->GetEntryLabel());
+ __ Strh(tmp, MemOperand(lr, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()));
+ __ Bind(slow_path->GetExitLabel());
}
}
@@ -3535,23 +3524,20 @@
GetGraph()->IsCompilingBaseline() &&
!Runtime::Current()->IsAotCompiler()) {
DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
- ScopedProfilingInfoUse spiu(
- Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
- ProfilingInfo* info = spiu.GetProfilingInfo();
- if (info != nullptr) {
- InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
- uint32_t address = reinterpret_cast32<uint32_t>(cache);
- vixl32::Label done;
- UseScratchRegisterScope temps(GetVIXLAssembler());
- temps.Exclude(ip);
- __ Mov(r4, address);
- __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
- // Fast path for a monomorphic cache.
- __ Cmp(klass, ip);
- __ B(eq, &done, /* is_far_target= */ false);
- InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
- __ Bind(&done);
- }
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
+ uint32_t address = reinterpret_cast32<uint32_t>(cache);
+ vixl32::Label done;
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ __ Mov(r4, address);
+ __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
+ // Fast path for a monomorphic cache.
+ __ Cmp(klass, ip);
+ __ B(eq, &done, /* is_far_target= */ false);
+ InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
+ __ Bind(&done);
}
}
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 758a471..f19eaae 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -967,6 +967,26 @@
DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86);
};
+class CompileOptimizedSlowPathX86 : public SlowPathCode {
+ public:
+ CompileOptimizedSlowPathX86() : SlowPathCode(/* instruction= */ nullptr) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+ __ Bind(GetEntryLabel());
+ x86_codegen->GenerateInvokeRuntime(
+ GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const override {
+ return "CompileOptimizedSlowPath";
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86);
+};
+
#undef __
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<X86Assembler*>(GetAssembler())-> // NOLINT
@@ -1210,52 +1230,19 @@
}
if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- ScopedProfilingInfoUse spiu(
- Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
- ProfilingInfo* info = spiu.GetProfilingInfo();
- if (info != nullptr) {
- uint32_t address = reinterpret_cast32<uint32_t>(info);
- NearLabel done;
- if (HasEmptyFrame()) {
- CHECK(is_frame_entry);
- // Alignment
- IncreaseFrame(8);
- // We need a temporary. The stub also expects the method at bottom of stack.
- __ pushl(EAX);
- __ cfi().AdjustCFAOffset(4);
- __ movl(EAX, Immediate(address));
- __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
- Immediate(1));
- __ andw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
- Immediate(interpreter::kTieredHotnessMask));
- __ j(kNotZero, &done);
- GenerateInvokeRuntime(
- GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
- __ Bind(&done);
- // We don't strictly require to restore EAX, but this makes the generated
- // code easier to reason about.
- __ popl(EAX);
- __ cfi().AdjustCFAOffset(-4);
- DecreaseFrame(8);
- } else {
- if (!RequiresCurrentMethod()) {
- CHECK(is_frame_entry);
- __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
- }
- // We need a temporary.
- __ pushl(EAX);
- __ cfi().AdjustCFAOffset(4);
- __ movl(EAX, Immediate(address));
- __ addw(Address(EAX, ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
- Immediate(1));
- __ popl(EAX); // Put stack as expected before exiting or calling stub.
- __ cfi().AdjustCFAOffset(-4);
- __ j(kCarryClear, &done);
- GenerateInvokeRuntime(
- GetThreadOffset<kX86PointerSize>(kQuickCompileOptimized).Int32Value());
- __ Bind(&done);
- }
- }
+ SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86();
+ AddSlowPath(slow_path);
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ uint32_t address = reinterpret_cast32<uint32_t>(info) +
+ ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
+ DCHECK(!HasEmptyFrame());
+ // With multiple threads, this can overflow. This is OK, we will eventually get to see
+ // it reaching 0. Also, at this point we have no register available to look
+ // at the counter directly.
+ __ addw(Address::Absolute(address), Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
}
@@ -2669,25 +2656,22 @@
GetGraph()->IsCompilingBaseline() &&
!Runtime::Current()->IsAotCompiler()) {
DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
- ScopedProfilingInfoUse spiu(
- Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
- ProfilingInfo* info = spiu.GetProfilingInfo();
- if (info != nullptr) {
- InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
- uint32_t address = reinterpret_cast32<uint32_t>(cache);
- if (kIsDebugBuild) {
- uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
- CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
- }
- Register temp = EBP;
- NearLabel done;
- __ movl(temp, Immediate(address));
- // Fast path for a monomorphic cache.
- __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
- __ j(kEqual, &done);
- GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
- __ Bind(&done);
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
+ uint32_t address = reinterpret_cast32<uint32_t>(cache);
+ if (kIsDebugBuild) {
+ uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
+ CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
}
+ Register temp = EBP;
+ NearLabel done;
+ __ movl(temp, Immediate(address));
+ // Fast path for a monomorphic cache.
+ __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
+ __ j(kEqual, &done);
+ GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
+ __ Bind(&done);
}
}
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index c402e83..b0bdffe 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -991,6 +991,26 @@
DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
};
+class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
+ public:
+ CompileOptimizedSlowPathX86_64() : SlowPathCode(/* instruction= */ nullptr) {}
+
+ void EmitNativeCode(CodeGenerator* codegen) override {
+ CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+ __ Bind(GetEntryLabel());
+ x86_64_codegen->GenerateInvokeRuntime(
+ GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
+ __ jmp(GetExitLabel());
+ }
+
+ const char* GetDescription() const override {
+ return "CompileOptimizedSlowPath";
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
+};
+
#undef __
// NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
#define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
@@ -1602,37 +1622,22 @@
}
if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
- ScopedProfilingInfoUse spiu(
- Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
- ProfilingInfo* info = spiu.GetProfilingInfo();
- if (info != nullptr) {
- uint64_t address = reinterpret_cast64<uint64_t>(info);
- NearLabel done;
- __ movq(CpuRegister(TMP), Immediate(address));
- __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
- Immediate(1));
- __ andw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
- Immediate(interpreter::kTieredHotnessMask));
- __ j(kNotZero, &done);
- if (HasEmptyFrame()) {
- CHECK(is_frame_entry);
- // Frame alignment, and the stub expects the method on the stack.
- __ pushq(CpuRegister(RDI));
- __ cfi().AdjustCFAOffset(kX86_64WordSize);
- __ cfi().RelOffset(DWARFReg(RDI), 0);
- } else if (!RequiresCurrentMethod()) {
- CHECK(is_frame_entry);
- __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset), CpuRegister(RDI));
- }
- GenerateInvokeRuntime(
- GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
- if (HasEmptyFrame()) {
- __ popq(CpuRegister(RDI));
- __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
- __ cfi().Restore(DWARFReg(RDI));
- }
- __ Bind(&done);
- }
+ SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64();
+ AddSlowPath(slow_path);
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ CHECK(!HasEmptyFrame());
+ uint64_t address = reinterpret_cast64<uint64_t>(info);
+ // Note: if the address was in the 32bit range, we could use
+ // Address::Absolute and avoid this movq.
+ __ movq(CpuRegister(TMP), Immediate(address));
+ // With multiple threads, this can overflow. This is OK, we will eventually get to see
+ // it reaching 0. Also, at this point we have no register available to look
+ // at the counter directly.
+ __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
+ Immediate(-1));
+ __ j(kEqual, slow_path->GetEntryLabel());
+ __ Bind(slow_path->GetExitLabel());
}
}
@@ -2903,21 +2908,18 @@
if (!instruction->GetLocations()->Intrinsified() &&
GetGraph()->IsCompilingBaseline() &&
!Runtime::Current()->IsAotCompiler()) {
- ScopedProfilingInfoUse spiu(
- Runtime::Current()->GetJit(), GetGraph()->GetArtMethod(), Thread::Current());
- ProfilingInfo* info = spiu.GetProfilingInfo();
- if (info != nullptr) {
- InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
- uint64_t address = reinterpret_cast64<uint64_t>(cache);
- NearLabel done;
- __ movq(CpuRegister(TMP), Immediate(address));
- // Fast path for a monomorphic cache.
- __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
- __ j(kEqual, &done);
- GenerateInvokeRuntime(
- GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
- __ Bind(&done);
- }
+ ProfilingInfo* info = GetGraph()->GetProfilingInfo();
+ DCHECK(info != nullptr);
+ InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
+ uint64_t address = reinterpret_cast64<uint64_t>(cache);
+ NearLabel done;
+ __ movq(CpuRegister(TMP), Immediate(address));
+ // Fast path for a monomorphic cache.
+ __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
+ __ j(kEqual, &done);
+ GenerateInvokeRuntime(
+ GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
+ __ Bind(&done);
}
}
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index a40218d..ac71ce9 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -621,9 +621,7 @@
ArtMethod* caller = graph_->GetArtMethod();
// Under JIT, we should always know the caller.
DCHECK(caller != nullptr);
- ScopedProfilingInfoUse spiu(Runtime::Current()->GetJit(), caller, Thread::Current());
- ProfilingInfo* profiling_info = spiu.GetProfilingInfo();
-
+ ProfilingInfo* profiling_info = graph_->GetProfilingInfo();
if (profiling_info == nullptr) {
return kInlineCacheNoData;
}
@@ -1995,6 +1993,11 @@
/* start_instruction_id= */ caller_instruction_counter);
callee_graph->SetArtMethod(resolved_method);
+ ScopedProfilingInfoUse spiu(Runtime::Current()->GetJit(), resolved_method, Thread::Current());
+ if (Runtime::Current()->GetJit() != nullptr) {
+ callee_graph->SetProfilingInfo(spiu.GetProfilingInfo());
+ }
+
// When they are needed, allocate `inline_stats_` on the Arena instead
// of on the stack, as Clang might produce a stack frame too large
// for this function, that would not fit the requirements of the
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 16e26dc..c50e047 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -75,6 +75,7 @@
class FieldInfo;
class LiveInterval;
class LocationSummary;
+class ProfilingInfo;
class SlowPathCode;
class SsaBuilder;
@@ -704,6 +705,9 @@
ArtMethod* GetArtMethod() const { return art_method_; }
void SetArtMethod(ArtMethod* method) { art_method_ = method; }
+ void SetProfilingInfo(ProfilingInfo* info) { profiling_info_ = info; }
+ ProfilingInfo* GetProfilingInfo() const { return profiling_info_; }
+
// Returns an instruction with the opposite Boolean value from 'cond'.
// The instruction has been inserted into the graph, either as a constant, or
// before cursor.
@@ -870,6 +874,9 @@
// (such as when the superclass could not be found).
ArtMethod* art_method_;
+ // The `ProfilingInfo` associated with the method being compiled.
+ ProfilingInfo* profiling_info_;
+
// How we are compiling the graph: either optimized, osr, or baseline.
// For osr, we will make all loops seen as irreducible and emit special
// stack maps to mark compiled code entries which the interpreter can
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index c4dd31d..10b59d2 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -812,6 +812,14 @@
graph->SetArtMethod(method);
}
+ jit::Jit* jit = Runtime::Current()->GetJit();
+ if (jit != nullptr) {
+ ProfilingInfo* info = jit->GetCodeCache()->GetProfilingInfo(method, Thread::Current());
+ DCHECK(compilation_kind != CompilationKind::kBaseline || info != nullptr)
+ << "Compiling a method baseline should always have a ProfilingInfo";
+ graph->SetProfilingInfo(info);
+ }
+
std::unique_ptr<CodeGenerator> codegen(
CodeGenerator::Create(graph,
compiler_options,
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index d070339..b964b7c 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -1467,6 +1467,10 @@
}
void Jit::EnqueueOptimizedCompilation(ArtMethod* method, Thread* self) {
+ // Reset the hotness counter so the baseline compiled code doesn't call this
+ // method repeatedly.
+ GetCodeCache()->ResetHotnessCounter(method, self);
+
if (thread_pool_ == nullptr) {
return;
}
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 5cf08f9..047f7a4 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -1157,7 +1157,7 @@
// Start polling the liveness of compiled code to prepare for the next full collection.
if (next_collection_will_be_full) {
for (auto it : profiling_infos_) {
- it.second->SetBaselineHotnessCount(0);
+ it.second->ResetCounter();
}
// Change entry points of native methods back to the GenericJNI entrypoint.
@@ -1280,19 +1280,38 @@
ContainsElement(current_baseline_compilations_, method);
}
+ProfilingInfo* JitCodeCache::GetProfilingInfo(ArtMethod* method, Thread* self) {
+ MutexLock mu(self, *Locks::jit_lock_);
+ DCHECK(IsMethodBeingCompiled(method))
+ << "GetProfilingInfo should only be called when the method is being compiled";
+ auto it = profiling_infos_.find(method);
+ if (it == profiling_infos_.end()) {
+ return nullptr;
+ }
+ return it->second;
+}
+
+void JitCodeCache::ResetHotnessCounter(ArtMethod* method, Thread* self) {
+ MutexLock mu(self, *Locks::jit_lock_);
+ auto it = profiling_infos_.find(method);
+ DCHECK(it != profiling_infos_.end());
+ it->second->ResetCounter();
+}
+
+
void JitCodeCache::DoCollection(Thread* self, bool collect_profiling_info) {
ScopedTrace trace(__FUNCTION__);
{
MutexLock mu(self, *Locks::jit_lock_);
// Update to interpreter the methods that have baseline entrypoints and whose baseline
- // hotness count is zero.
+ // hotness count hasn't changed.
// Note that these methods may be in thread stack or concurrently revived
// between. That's OK, as the thread executing it will mark it.
uint16_t warmup_threshold = Runtime::Current()->GetJITOptions()->GetWarmupThreshold();
for (auto it : profiling_infos_) {
ProfilingInfo* info = it.second;
- if (info->GetBaselineHotnessCount() == 0) {
+ if (!info->CounterHasChanged()) {
const void* entry_point = info->GetMethod()->GetEntryPointFromQuickCompiledCode();
if (ContainsPc(entry_point)) {
OatQuickMethodHeader* method_header =
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 76b7f77..356a4dd 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -399,6 +399,9 @@
return shared_region_.IsInExecSpace(ptr);
}
+ ProfilingInfo* GetProfilingInfo(ArtMethod* method, Thread* self);
+ void ResetHotnessCounter(ArtMethod* method, Thread* self);
+
private:
JitCodeCache();
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index e101f9a..b8e7303 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -26,7 +26,7 @@
namespace art {
ProfilingInfo::ProfilingInfo(ArtMethod* method, const std::vector<uint32_t>& entries)
- : baseline_hotness_count_(0),
+ : baseline_hotness_count_(interpreter::kTieredHotnessMask),
method_(method),
number_of_inline_caches_(entries.size()),
current_inline_uses_(0) {
@@ -112,8 +112,10 @@
self_(self),
// Fetch the profiling info ahead of using it. If it's null when fetching,
// we should not call JitCodeCache::DoneCompilerUse.
- profiling_info_(jit->GetCodeCache()->NotifyCompilerUse(method, self)) {
-}
+ profiling_info_(jit == nullptr
+ ? nullptr
+ : jit->GetCodeCache()->NotifyCompilerUse(method, self))
+ {}
ScopedProfilingInfoUse::~ScopedProfilingInfoUse() {
if (profiling_info_ != nullptr) {
diff --git a/runtime/jit/profiling_info.h b/runtime/jit/profiling_info.h
index b1ea227..e658717 100644
--- a/runtime/jit/profiling_info.h
+++ b/runtime/jit/profiling_info.h
@@ -22,6 +22,7 @@
#include "base/macros.h"
#include "base/value_object.h"
#include "gc_root.h"
+#include "interpreter/mterp/nterp.h"
#include "offsets.h"
namespace art {
@@ -106,8 +107,12 @@
return MemberOffset(OFFSETOF_MEMBER(ProfilingInfo, baseline_hotness_count_));
}
- void SetBaselineHotnessCount(uint16_t count) {
- baseline_hotness_count_ = count;
+ void ResetCounter() {
+ baseline_hotness_count_ = interpreter::kTieredHotnessMask;
+ }
+
+ bool CounterHasChanged() const {
+ return baseline_hotness_count_ != interpreter::kTieredHotnessMask;
}
uint16_t GetBaselineHotnessCount() const {