Baseline JIT: update inline caches in compiled code.
In trying to remove profiling from interpreter, to speed up
interpreter performance.
Bug: 119800099
Test: test.py --baseline
Change-Id: Ica1fa41a889b31262d9f5691b30a31fbcec01b34
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 10397e8..0162311 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -18,7 +18,7 @@
#include "arch/arm64/asm_support_arm64.h"
#include "arch/arm64/instruction_set_features_arm64.h"
-#include "art_method.h"
+#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
#include "class_table.h"
@@ -4041,6 +4041,26 @@
HandleInvoke(invoke);
}
+void CodeGeneratorARM64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
+ Register klass) {
+ DCHECK_EQ(klass.GetCode(), 0u);
+ if (GetCompilerOptions().IsBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+ ScopedObjectAccess soa(Thread::Current());
+ ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
+ uint64_t address = reinterpret_cast64<uint64_t>(cache);
+ vixl::aarch64::Label done;
+ __ Mov(x8, address);
+ __ Ldr(x9, MemOperand(x8, InlineCache::ClassesOffset().Int32Value()));
+ // Fast path for a monomorphic cache.
+ __ Cmp(klass, x9);
+ __ B(eq, &done);
+ InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
+ __ Bind(&done);
+ }
+}
+
void InstructionCodeGeneratorARM64::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
LocationSummary* locations = invoke->GetLocations();
@@ -4049,13 +4069,6 @@
Offset class_offset = mirror::Object::ClassOffset();
Offset entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArm64PointerSize);
- // The register ip1 is required to be used for the hidden argument in
- // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
- MacroAssembler* masm = GetVIXLAssembler();
- UseScratchRegisterScope scratch_scope(masm);
- scratch_scope.Exclude(ip1);
- __ Mov(ip1, invoke->GetDexMethodIndex());
-
// Ensure that between load and MaybeRecordImplicitNullCheck there are no pools emitted.
if (receiver.IsStackSlot()) {
__ Ldr(temp.W(), StackOperandFrom(receiver));
@@ -4080,6 +4093,17 @@
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
+
+ // If we're compiling baseline, update the inline cache.
+ codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
+
+ // The register ip1 is required to be used for the hidden argument in
+ // art_quick_imt_conflict_trampoline, so prevent VIXL from using it.
+ MacroAssembler* masm = GetVIXLAssembler();
+ UseScratchRegisterScope scratch_scope(masm);
+ scratch_scope.Exclude(ip1);
+ __ Mov(ip1, invoke->GetDexMethodIndex());
+
__ Ldr(temp,
MemOperand(temp, mirror::Class::ImtPtrOffset(kArm64PointerSize).Uint32Value()));
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
@@ -4260,6 +4284,10 @@
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
GetAssembler()->MaybeUnpoisonHeapReference(temp.W());
+
+ // If we're compiling baseline, update the inline cache.
+ MaybeGenerateInlineCacheCheck(invoke, temp);
+
// temp = temp->GetMethodAt(method_offset);
__ Ldr(temp, MemOperand(temp, method_offset));
// lr = temp->GetEntryPoint();
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index a669094..253e915 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -786,6 +786,8 @@
CodeGenerator::MaybeRecordImplicitNullCheck(instr);
}
+ void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl::aarch64::Register klass);
+
private:
// Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 1c69dd6..68e2dfa 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -18,7 +18,7 @@
#include "arch/arm/asm_support_arm.h"
#include "arch/arm/instruction_set_features_arm.h"
-#include "art_method.h"
+#include "art_method-inl.h"
#include "base/bit_utils.h"
#include "base/bit_utils_iterator.h"
#include "class_table.h"
@@ -34,6 +34,7 @@
#include "linker/linker_patch.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
+#include "scoped_thread_state_change-inl.h"
#include "thread.h"
#include "utils/arm/assembler_arm_vixl.h"
#include "utils/arm/managed_register_arm.h"
@@ -3297,6 +3298,28 @@
invoke->GetLocations()->AddTemp(LocationFrom(r12));
}
+void CodeGeneratorARMVIXL::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
+ vixl32::Register klass) {
+ DCHECK_EQ(r0.GetCode(), klass.GetCode());
+ if (GetCompilerOptions().IsBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+ ScopedObjectAccess soa(Thread::Current());
+ ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
+ uint32_t address = reinterpret_cast32<uint32_t>(cache);
+ vixl32::Label done;
+ UseScratchRegisterScope temps(GetVIXLAssembler());
+ temps.Exclude(ip);
+ __ Mov(r4, address);
+ __ Ldr(ip, MemOperand(r4, InlineCache::ClassesOffset().Int32Value()));
+ // Fast path for a monomorphic cache.
+ __ Cmp(klass, ip);
+ __ B(eq, &done, /* is_far_target= */ false);
+ InvokeRuntime(kQuickUpdateInlineCache, instruction, instruction->GetDexPc());
+ __ Bind(&done);
+ }
+}
+
void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
LocationSummary* locations = invoke->GetLocations();
@@ -3324,10 +3347,15 @@
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+ // If we're compiling baseline, update the inline cache.
+ codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
+
GetAssembler()->LoadFromOffset(kLoadWord,
temp,
temp,
mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+
uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
invoke->GetImtIndex(), kArmPointerSize));
// temp = temp->GetImtEntryAt(method_offset);
@@ -8906,6 +8934,9 @@
// concurrent copying collector may not in the future).
GetAssembler()->MaybeUnpoisonHeapReference(temp);
+ // If we're compiling baseline, update the inline cache.
+ MaybeGenerateInlineCacheCheck(invoke, temp);
+
// temp = temp->GetMethodAt(method_offset);
uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
kArmPointerSize).Int32Value();
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index fae615d..3d4c231 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -756,6 +756,8 @@
CodeGenerator::MaybeRecordImplicitNullCheck(instr);
}
+ void MaybeGenerateInlineCacheCheck(HInstruction* instruction, vixl32::Register klass);
+
private:
// Encoding of thunk type and data for link-time generated thunks for Baker read barriers.
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 797fe32..ac36ce3 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -16,7 +16,7 @@
#include "code_generator_x86.h"
-#include "art_method.h"
+#include "art_method-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
#include "compiled_method.h"
@@ -27,10 +27,12 @@
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_x86.h"
+#include "jit/profiling_info.h"
#include "linker/linker_patch.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
+#include "scoped_thread_state_change-inl.h"
#include "thread.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
@@ -2260,6 +2262,10 @@
}
HandleInvoke(invoke);
+ if (codegen_->GetCompilerOptions().IsBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ // Add one temporary for inline cache update.
+ invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
+ }
}
void LocationsBuilderX86::HandleInvoke(HInvoke* invoke) {
@@ -2283,6 +2289,34 @@
HandleInvoke(invoke);
// Add the hidden argument.
invoke->GetLocations()->AddTemp(Location::FpuRegisterLocation(XMM7));
+
+ if (codegen_->GetCompilerOptions().IsBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ // Add one temporary for inline cache update.
+ invoke->GetLocations()->AddTemp(Location::RegisterLocation(EBP));
+ }
+}
+
+void CodeGeneratorX86::MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass) {
+ DCHECK_EQ(EAX, klass);
+ if (GetCompilerOptions().IsBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ DCHECK(!instruction->GetEnvironment()->IsFromInlinedInvoke());
+ ScopedObjectAccess soa(Thread::Current());
+ ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
+ uint32_t address = reinterpret_cast32<uint32_t>(cache);
+ if (kIsDebugBuild) {
+ uint32_t temp_index = instruction->GetLocations()->GetTempCount() - 1u;
+ CHECK_EQ(EBP, instruction->GetLocations()->GetTemp(temp_index).AsRegister<Register>());
+ }
+ Register temp = EBP;
+ NearLabel done;
+ __ movl(temp, Immediate(address));
+ // Fast path for a monomorphic cache.
+ __ cmpl(klass, Address(temp, InlineCache::ClassesOffset().Int32Value()));
+ __ j(kEqual, &done);
+ GenerateInvokeRuntime(GetThreadOffset<kX86PointerSize>(kQuickUpdateInlineCache).Int32Value());
+ __ Bind(&done);
+ }
}
void InstructionCodeGeneratorX86::VisitInvokeInterface(HInvokeInterface* invoke) {
@@ -2316,6 +2350,9 @@
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
+
+ codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
+
// temp = temp->GetAddressOfIMT()
__ movl(temp,
Address(temp, mirror::Class::ImtPtrOffset(kX86PointerSize).Uint32Value()));
@@ -4939,6 +4976,9 @@
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
+
+ MaybeGenerateInlineCacheCheck(invoke, temp);
+
// temp = temp->GetMethodAt(method_offset);
__ movl(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 6bf6b0b..e305b50 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -624,6 +624,8 @@
void GenerateImplicitNullCheck(HNullCheck* instruction) override;
void GenerateExplicitNullCheck(HNullCheck* instruction) override;
+ void MaybeGenerateInlineCacheCheck(HInstruction* instruction, Register klass);
+
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// The correct value will be inserted when processing Assembler fixups.
static constexpr int32_t kDummy32BitOffset = 256;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index cd8c609..100a86b 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -16,7 +16,7 @@
#include "code_generator_x86_64.h"
-#include "art_method.h"
+#include "art_method-inl.h"
#include "class_table.h"
#include "code_generator_utils.h"
#include "compiled_method.h"
@@ -26,11 +26,13 @@
#include "heap_poisoning.h"
#include "intrinsics.h"
#include "intrinsics_x86_64.h"
+#include "jit/profiling_info.h"
#include "linker/linker_patch.h"
#include "lock_word.h"
#include "mirror/array-inl.h"
#include "mirror/class-inl.h"
#include "mirror/object_reference.h"
+#include "scoped_thread_state_change-inl.h"
#include "thread.h"
#include "utils/assembler.h"
#include "utils/stack_checks.h"
@@ -1068,6 +1070,9 @@
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
+
+ MaybeGenerateInlineCacheCheck(invoke->GetDexPc(), temp);
+
// temp = temp->GetMethodAt(method_offset);
__ movq(temp, Address(temp, method_offset));
// call temp->GetEntryPoint();
@@ -2520,6 +2525,24 @@
invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
}
+void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(uint32_t dex_pc, CpuRegister klass) {
+ DCHECK_EQ(RDI, klass.AsRegister());
+ if (GetCompilerOptions().IsBaseline() && !Runtime::Current()->IsAotCompiler()) {
+ ScopedObjectAccess soa(Thread::Current());
+ ProfilingInfo* info = GetGraph()->GetArtMethod()->GetProfilingInfo(kRuntimePointerSize);
+ InlineCache* cache = info->GetInlineCache(dex_pc);
+ uint64_t address = reinterpret_cast64<uint64_t>(cache);
+ NearLabel done;
+ __ movq(CpuRegister(TMP), Immediate(address));
+ // Fast path for a monomorphic cache.
+ __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
+ __ j(kEqual, &done);
+ GenerateInvokeRuntime(
+ GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
+ __ Bind(&done);
+ }
+}
+
void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
// TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
LocationSummary* locations = invoke->GetLocations();
@@ -2528,11 +2551,6 @@
Location receiver = locations->InAt(0);
size_t class_offset = mirror::Object::ClassOffset().SizeValue();
- // Set the hidden argument. This is safe to do this here, as RAX
- // won't be modified thereafter, before the `call` instruction.
- DCHECK_EQ(RAX, hidden_reg.AsRegister());
- codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
-
if (receiver.IsStackSlot()) {
__ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
// /* HeapReference<Class> */ temp = temp->klass_
@@ -2550,6 +2568,15 @@
// intact/accessible until the end of the marking phase (the
// concurrent copying collector may not in the future).
__ MaybeUnpoisonHeapReference(temp);
+
+ codegen_->MaybeGenerateInlineCacheCheck(invoke->GetDexPc(), temp);
+
+ // Set the hidden argument. This is safe to do this here, as RAX
+ // won't be modified thereafter, before the `call` instruction.
+ // We also di it after MaybeGenerateInlineCache that may use RAX.
+ DCHECK_EQ(RAX, hidden_reg.AsRegister());
+ codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
+
// temp = temp->GetAddressOfIMT()
__ movq(temp,
Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index ef8f5ac..20db423 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -600,6 +600,8 @@
void GenerateNop() override;
void GenerateImplicitNullCheck(HNullCheck* instruction) override;
void GenerateExplicitNullCheck(HNullCheck* instruction) override;
+ void MaybeGenerateInlineCacheCheck(uint32_t dex_pc, CpuRegister cls);
+
// When we don't know the proper offset for the value, we use kDummy32BitOffset.
// We will fix this up in the linker later to have the right value.