Small optimization for recursive calls: avoid dex cache.
Change-Id: I044757a2f06e535cdc1480c4fc8182b89635baf6
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 9c2facb..86567ed 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -613,9 +613,12 @@
// Sharpening to kDirect only works if we compile PIC.
DCHECK((optimized_invoke_type == invoke_type) || (optimized_invoke_type != kDirect)
|| compiler_driver_->GetCompilerOptions().GetCompilePic());
+ bool is_recursive =
+ (target_method.dex_method_index == outer_compilation_unit_->GetDexMethodIndex());
+ DCHECK(!is_recursive || (target_method.dex_file == outer_compilation_unit_->GetDexFile()));
invoke = new (arena_) HInvokeStaticOrDirect(
arena_, number_of_arguments, return_type, dex_pc, target_method.dex_method_index,
- optimized_invoke_type);
+ is_recursive, optimized_invoke_type);
}
size_t start_index = 0;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 0fe28e8..7731a10 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -527,6 +527,8 @@
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kArm);
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
+ __ Bind(&frame_entry_label_);
+
if (!skip_overflow_check) {
__ AddConstant(IP, SP, -static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
__ LoadFromOffset(kLoadWord, IP, IP, 0);
@@ -1185,18 +1187,22 @@
// temp = method;
codegen_->LoadCurrentMethod(temp);
- // temp = temp->dex_cache_resolved_methods_;
- __ LoadFromOffset(
- kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
- // temp = temp[index_in_cache]
- __ LoadFromOffset(
- kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
- // LR = temp[offset_of_quick_compiled_code]
- __ LoadFromOffset(kLoadWord, LR, temp,
- mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArmWordSize).Int32Value());
- // LR()
- __ blx(LR);
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ LoadFromOffset(
+ kLoadWord, temp, temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value());
+ // temp = temp[index_in_cache]
+ __ LoadFromOffset(
+ kLoadWord, temp, temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex()));
+ // LR = temp[offset_of_quick_compiled_code]
+ __ LoadFromOffset(kLoadWord, LR, temp,
+ mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArmWordSize).Int32Value());
+ // LR()
+ __ blx(LR);
+ } else {
+ __ bl(codegen_->GetFrameEntryLabel());
+ }
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
DCHECK(!codegen_->IsLeafMethod());
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index dd69e4d..4b03dff 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -247,9 +247,12 @@
void ComputeSpillMask() OVERRIDE;
+ Label* GetFrameEntryLabel() { return &frame_entry_label_; }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
+ Label frame_entry_label_;
LocationsBuilderARM location_builder_;
InstructionCodeGeneratorARM instruction_visitor_;
ParallelMoveResolverARM move_resolver_;
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index cc7bf3c..0909424 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -442,6 +442,8 @@
}
void CodeGeneratorARM64::GenerateFrameEntry() {
+ __ Bind(&frame_entry_label_);
+
bool do_overflow_check = FrameNeedsStackCheck(GetFrameSize(), kArm64) || !IsLeafMethod();
if (do_overflow_check) {
UseScratchRegisterScope temps(GetVIXLAssembler());
@@ -1845,17 +1847,21 @@
//
// Currently we implement the app -> app logic, which looks up in the resolve cache.
- // temp = method;
- LoadCurrentMethod(temp);
- // temp = temp->dex_cache_resolved_methods_;
- __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset()));
- // temp = temp[index_in_cache];
- __ Ldr(temp, HeapOperand(temp, index_in_cache));
- // lr = temp->entry_point_from_quick_compiled_code_;
- __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kArm64WordSize)));
- // lr();
- __ Blr(lr);
+ if (!invoke->IsRecursive()) {
+ // temp = method;
+ LoadCurrentMethod(temp);
+ // temp = temp->dex_cache_resolved_methods_;
+ __ Ldr(temp, HeapOperand(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset()));
+ // temp = temp[index_in_cache];
+ __ Ldr(temp, HeapOperand(temp, index_in_cache));
+ // lr = temp->entry_point_from_quick_compiled_code_;
+ __ Ldr(lr, HeapOperand(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kArm64WordSize)));
+ // lr();
+ __ Blr(lr);
+ } else {
+ __ Bl(&frame_entry_label_);
+ }
RecordPcInfo(invoke, invoke->GetDexPc());
DCHECK(!IsLeafMethod());
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index 100dafe..9a99dcc 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -297,6 +297,7 @@
private:
// Labels for each block that will be compiled.
vixl::Label* block_labels_;
+ vixl::Label frame_entry_label_;
LocationsBuilderARM64 location_builder_;
InstructionCodeGeneratorARM64 instruction_visitor_;
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 66f1d5e..a22c91a 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -449,6 +449,7 @@
codegen_(codegen) {}
void CodeGeneratorX86::GenerateFrameEntry() {
+ __ Bind(&frame_entry_label_);
bool skip_overflow_check =
IsLeafMethod() && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86);
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
@@ -1125,13 +1126,17 @@
// temp = method;
codegen_->LoadCurrentMethod(temp);
- // temp = temp->dex_cache_resolved_methods_;
- __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
- // temp = temp[index_in_cache]
- __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
- // (temp + offset_of_quick_compiled_code)()
- __ call(Address(
- temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().Int32Value()));
+ // temp = temp[index_in_cache]
+ __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+ // (temp + offset_of_quick_compiled_code)()
+ __ call(Address(
+ temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86WordSize).Int32Value()));
+ } else {
+ __ call(codegen_->GetFrameEntryLabel());
+ }
DCHECK(!codegen_->IsLeafMethod());
codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 55d71e3..54cb6cd 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -241,9 +241,12 @@
return type == Primitive::kPrimLong;
}
+ Label* GetFrameEntryLabel() { return &frame_entry_label_; }
+
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
+ Label frame_entry_label_;
LocationsBuilderX86 location_builder_;
InstructionCodeGeneratorX86 instruction_visitor_;
ParallelMoveResolverX86 move_resolver_;
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 6bc28ff..90b7bda 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -361,13 +361,17 @@
// temp = method;
LoadCurrentMethod(temp);
- // temp = temp->dex_cache_resolved_methods_;
- __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
- // temp = temp[index_in_cache]
- __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
- // (temp + offset_of_quick_compiled_code)()
- __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
- kX86_64WordSize).SizeValue()));
+ if (!invoke->IsRecursive()) {
+ // temp = temp->dex_cache_resolved_methods_;
+ __ movl(temp, Address(temp, mirror::ArtMethod::DexCacheResolvedMethodsOffset().SizeValue()));
+ // temp = temp[index_in_cache]
+ __ movl(temp, Address(temp, CodeGenerator::GetCacheOffset(invoke->GetDexMethodIndex())));
+ // (temp + offset_of_quick_compiled_code)()
+ __ call(Address(temp, mirror::ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+ kX86_64WordSize).SizeValue()));
+ } else {
+ __ call(&frame_entry_label_);
+ }
DCHECK(!IsLeafMethod());
RecordPcInfo(invoke, invoke->GetDexPc());
@@ -472,6 +476,7 @@
}
void CodeGeneratorX86_64::GenerateFrameEntry() {
+ __ Bind(&frame_entry_label_);
bool skip_overflow_check = IsLeafMethod()
&& !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index c30f4c2..dbdbf86 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -248,6 +248,7 @@
private:
// Labels for each block that will be compiled.
GrowableArray<Label> block_labels_;
+ Label frame_entry_label_;
LocationsBuilderX86_64 location_builder_;
InstructionCodeGeneratorX86_64 instruction_visitor_;
ParallelMoveResolverX86_64 move_resolver_;
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 2cc021c..1a0ebe5 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -1711,9 +1711,11 @@
Primitive::Type return_type,
uint32_t dex_pc,
uint32_t dex_method_index,
+ bool is_recursive,
InvokeType invoke_type)
: HInvoke(arena, number_of_arguments, return_type, dex_pc, dex_method_index),
- invoke_type_(invoke_type) {}
+ invoke_type_(invoke_type),
+ is_recursive_(is_recursive) {}
bool CanDoImplicitNullCheck() const OVERRIDE {
// We access the method via the dex cache so we can't do an implicit null check.
@@ -1722,11 +1724,13 @@
}
InvokeType GetInvokeType() const { return invoke_type_; }
+ bool IsRecursive() const { return is_recursive_; }
DECLARE_INSTRUCTION(InvokeStaticOrDirect);
private:
const InvokeType invoke_type_;
+ const bool is_recursive_;
DISALLOW_COPY_AND_ASSIGN(HInvokeStaticOrDirect);
};
diff --git a/compiler/utils/x86/assembler_x86.cc b/compiler/utils/x86/assembler_x86.cc
index 1f0dba5..03744e4 100644
--- a/compiler/utils/x86/assembler_x86.cc
+++ b/compiler/utils/x86/assembler_x86.cc
@@ -51,7 +51,8 @@
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xE8);
static const int kSize = 5;
- EmitLabel(label, kSize);
+ // Offset by one because we already have emitted the opcode.
+ EmitLabel(label, kSize - 1);
}
diff --git a/compiler/utils/x86_64/assembler_x86_64.cc b/compiler/utils/x86_64/assembler_x86_64.cc
index 5afa603..7e8e769 100644
--- a/compiler/utils/x86_64/assembler_x86_64.cc
+++ b/compiler/utils/x86_64/assembler_x86_64.cc
@@ -57,7 +57,8 @@
AssemblerBuffer::EnsureCapacity ensured(&buffer_);
EmitUint8(0xE8);
static const int kSize = 5;
- EmitLabel(label, kSize);
+ // Offset by one because we already have emitted the opcode.
+ EmitLabel(label, kSize - 1);
}
void X86_64Assembler::pushq(CpuRegister reg) {