Implement on-stack replacement for arm/arm64/x86/x86_64.
High-level overview:
- osr_method_threshold is used to know when to compile a method
in osr mode (-> treat all loops as irreducible).
- branch instructions in the compiler query whether they can
jump to an osr method.
- An osr entry point is found through the stack maps: if a stack
map is duplicated in the CodeInfo, it is an osr entry point.
Change-Id: Ifb39338cd281e2c7eccce67f4e18d46428be71e4
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 3a9ce1b..97c60de 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -64,7 +64,8 @@
virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
- ArtMethod* method ATTRIBUTE_UNUSED)
+ ArtMethod* method ATTRIBUTE_UNUSED,
+ bool osr ATTRIBUTE_UNUSED)
SHARED_REQUIRES(Locks::mutator_lock_) {
return false;
}
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 6774758..68f4783 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -60,11 +60,12 @@
delete reinterpret_cast<JitCompiler*>(handle);
}
-extern "C" bool jit_compile_method(void* handle, ArtMethod* method, Thread* self)
+extern "C" bool jit_compile_method(
+ void* handle, ArtMethod* method, Thread* self, bool osr)
SHARED_REQUIRES(Locks::mutator_lock_) {
auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle);
DCHECK(jit_compiler != nullptr);
- return jit_compiler->CompileMethod(self, method);
+ return jit_compiler->CompileMethod(self, method, osr);
}
extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count)
@@ -201,7 +202,7 @@
}
}
-bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
+bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) {
TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit));
const uint64_t start_time = NanoTime();
StackHandleScope<2> hs(self);
@@ -223,8 +224,8 @@
// of that proxy method, as the compiler does not expect a proxy method.
ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
- success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
- if (success && perf_file_ != nullptr) {
+ success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile, osr);
+ if (success && (perf_file_ != nullptr)) {
const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
std::ostringstream stream;
stream << std::hex
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 037a18a..5294d0e 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -37,7 +37,7 @@
public:
static JitCompiler* Create();
virtual ~JitCompiler();
- bool CompileMethod(Thread* self, ArtMethod* method)
+ bool CompileMethod(Thread* self, ArtMethod* method, bool osr)
SHARED_REQUIRES(Locks::mutator_lock_);
CompilerCallbacks* GetCompilerCallbacks() const;
size_t GetTotalCompileTime() const {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c7430e7..8d77daf 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -72,74 +72,6 @@
size_t index_;
};
-class SwitchTable : public ValueObject {
- public:
- SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
- : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
- int32_t table_offset = instruction.VRegB_31t();
- const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
- if (sparse) {
- CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
- } else {
- CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
- }
- num_entries_ = table[1];
- values_ = reinterpret_cast<const int32_t*>(&table[2]);
- }
-
- uint16_t GetNumEntries() const {
- return num_entries_;
- }
-
- void CheckIndex(size_t index) const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
- }
- }
-
- int32_t GetEntryAt(size_t index) const {
- CheckIndex(index);
- return values_[index];
- }
-
- uint32_t GetDexPcForIndex(size_t index) const {
- CheckIndex(index);
- return dex_pc_ +
- (reinterpret_cast<const int16_t*>(values_ + index) -
- reinterpret_cast<const int16_t*>(&instruction_));
- }
-
- // Index of the first value in the table.
- size_t GetFirstValueIndex() const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- return num_entries_;
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- return 1;
- }
- }
-
- private:
- const Instruction& instruction_;
- const uint32_t dex_pc_;
-
- // Whether this is a sparse-switch table (or a packed-switch one).
- const bool sparse_;
-
- // This can't be const as it needs to be computed off of the given instruction, and complicated
- // expressions in the initializer list seemed very ugly.
- uint16_t num_entries_;
-
- const int32_t* values_;
-
- DISALLOW_COPY_AND_ASSIGN(SwitchTable);
-};
-
void HGraphBuilder::InitializeLocals(uint16_t count) {
graph_->SetNumberOfVRegs(count);
locals_.resize(count);
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 1d604e7..93e17d6 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -30,7 +30,6 @@
namespace art {
class Instruction;
-class SwitchTable;
class HGraphBuilder : public ValueObject {
public:
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index a3bbfdb..ffec382 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -629,8 +629,72 @@
return stack_map_stream_.PrepareForFillIn();
}
-void CodeGenerator::BuildStackMaps(MemoryRegion region) {
+static void CheckCovers(uint32_t dex_pc,
+ const HGraph& graph,
+ const CodeInfo& code_info,
+ const ArenaVector<HSuspendCheck*>& loop_headers,
+ ArenaVector<size_t>* covered) {
+ StackMapEncoding encoding = code_info.ExtractEncoding();
+ for (size_t i = 0; i < loop_headers.size(); ++i) {
+ if (loop_headers[i]->GetDexPc() == dex_pc) {
+ if (graph.IsCompilingOsr()) {
+ DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid());
+ }
+ ++(*covered)[i];
+ }
+ }
+}
+
+// Debug helper to ensure loop entries in compiled code are matched by
+// dex branch instructions.
+static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
+ const CodeInfo& code_info,
+ const DexFile::CodeItem& code_item) {
+ ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc));
+ for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) {
+ if (it.Current()->IsLoopHeader()) {
+ HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck();
+ if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
+ loop_headers.push_back(suspend_check);
+ }
+ }
+ }
+ ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc));
+ const uint16_t* code_ptr = code_item.insns_;
+ const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+
+ size_t dex_pc = 0;
+ while (code_ptr < code_end) {
+ const Instruction& instruction = *Instruction::At(code_ptr);
+ if (instruction.IsBranch()) {
+ uint32_t target = dex_pc + instruction.GetTargetOffset();
+ CheckCovers(target, graph, code_info, loop_headers, &covered);
+ } else if (instruction.IsSwitch()) {
+ SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
+ uint16_t num_entries = table.GetNumEntries();
+ size_t offset = table.GetFirstValueIndex();
+
+ // Use a larger loop counter type to avoid overflow issues.
+ for (size_t i = 0; i < num_entries; ++i) {
+ // The target of the case.
+ uint32_t target = dex_pc + table.GetEntryAt(i + offset);
+ CheckCovers(target, graph, code_info, loop_headers, &covered);
+ }
+ }
+ dex_pc += instruction.SizeInCodeUnits();
+ code_ptr += instruction.SizeInCodeUnits();
+ }
+
+ for (size_t i = 0; i < covered.size(); ++i) {
+ DCHECK_NE(covered[i], 0u) << "Loop in compiled code has no dex branch equivalent";
+ }
+}
+
+void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) {
stack_map_stream_.FillIn(region);
+ if (kIsDebugBuild) {
+ CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item);
+ }
}
void CodeGenerator::RecordPcInfo(HInstruction* instruction,
@@ -705,6 +769,46 @@
EmitEnvironment(instruction->GetEnvironment(), slow_path);
stack_map_stream_.EndStackMapEntry();
+
+ HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
+ if (instruction->IsSuspendCheck() &&
+ (info != nullptr) &&
+ graph_->IsCompilingOsr() &&
+ (inlining_depth == 0)) {
+ DCHECK_EQ(info->GetSuspendCheck(), instruction);
+ // We duplicate the stack map as a marker that this stack map can be an OSR entry.
+ // Duplicating it avoids having the runtime recognize and skip an OSR stack map.
+ DCHECK(info->IsIrreducible());
+ stack_map_stream_.BeginStackMapEntry(
+ dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0);
+ EmitEnvironment(instruction->GetEnvironment(), slow_path);
+ stack_map_stream_.EndStackMapEntry();
+ if (kIsDebugBuild) {
+ HEnvironment* environment = instruction->GetEnvironment();
+ for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
+ HInstruction* in_environment = environment->GetInstructionAt(i);
+ if (in_environment != nullptr) {
+ DCHECK(in_environment->IsPhi() || in_environment->IsConstant());
+ Location location = environment->GetLocationAt(i);
+ DCHECK(location.IsStackSlot() ||
+ location.IsDoubleStackSlot() ||
+ location.IsConstant() ||
+ location.IsInvalid());
+ if (location.IsStackSlot() || location.IsDoubleStackSlot()) {
+ DCHECK_LT(location.GetStackIndex(), static_cast<int32_t>(GetFrameSize()));
+ }
+ }
+ }
+ }
+ } else if (kIsDebugBuild) {
+ // Ensure stack maps are unique, by checking that the native pc in the stack map
+ // last emitted is different than the native pc of the stack map just emitted.
+ size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
+ if (number_of_stack_maps > 1) {
+ DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_offset,
+ stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_offset);
+ }
+ }
}
bool CodeGenerator::HasStackMapAtCurrentPc() {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4f8f146..0a688cf 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -288,7 +288,7 @@
slow_paths_.push_back(slow_path);
}
- void BuildStackMaps(MemoryRegion region);
+ void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
size_t ComputeStackMapsSize();
bool IsLeafMethod() const {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 9b91b53..a8841d3 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -758,6 +758,7 @@
compiler_driver_->GetInstructionSet(),
invoke_type,
graph_->IsDebuggable(),
+ /* osr */ false,
graph_->GetCurrentInstructionId());
callee_graph->SetArtMethod(resolved_method);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 3dda850..f269885 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -647,6 +647,10 @@
header_->GetGraph()->SetHasIrreducibleLoops(true);
PopulateIrreducibleRecursive(back_edge);
} else {
+ if (header_->GetGraph()->IsCompilingOsr()) {
+ irreducible_ = true;
+ header_->GetGraph()->SetHasIrreducibleLoops(true);
+ }
PopulateRecursive(back_edge);
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b808347..116b1c6 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -274,6 +274,7 @@
InstructionSet instruction_set,
InvokeType invoke_type = kInvalidInvokeType,
bool debuggable = false,
+ bool osr = false,
int start_instruction_id = 0)
: arena_(arena),
blocks_(arena->Adapter(kArenaAllocBlockList)),
@@ -302,7 +303,8 @@
cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_current_method_(nullptr),
- inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) {
+ inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
+ osr_(osr) {
blocks_.reserve(kDefaultNumberOfBlocks);
}
@@ -478,6 +480,8 @@
return instruction_set_;
}
+ bool IsCompilingOsr() const { return osr_; }
+
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
@@ -606,6 +610,11 @@
// collection pointer to passes which may create NullConstant.
ReferenceTypeInfo inexact_object_rti_;
+ // Whether we are compiling this graph for on stack replacement: this will
+ // make all loops seen as irreducible and emit special stack maps to mark
+ // compiled code entries which the interpreter can directly jump to.
+ const bool osr_;
+
friend class SsaBuilder; // For caching constants.
friend class SsaLivenessAnalysis; // For the linear order.
ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -6040,6 +6049,74 @@
FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
#undef INSTRUCTION_TYPE_CHECK
+class SwitchTable : public ValueObject {
+ public:
+ SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
+ : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
+ int32_t table_offset = instruction.VRegB_31t();
+ const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+ if (sparse) {
+ CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
+ } else {
+ CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+ }
+ num_entries_ = table[1];
+ values_ = reinterpret_cast<const int32_t*>(&table[2]);
+ }
+
+ uint16_t GetNumEntries() const {
+ return num_entries_;
+ }
+
+ void CheckIndex(size_t index) const {
+ if (sparse_) {
+ // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+ DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
+ } else {
+ // In a packed table, we have the starting key and num_entries_ values.
+ DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
+ }
+ }
+
+ int32_t GetEntryAt(size_t index) const {
+ CheckIndex(index);
+ return values_[index];
+ }
+
+ uint32_t GetDexPcForIndex(size_t index) const {
+ CheckIndex(index);
+ return dex_pc_ +
+ (reinterpret_cast<const int16_t*>(values_ + index) -
+ reinterpret_cast<const int16_t*>(&instruction_));
+ }
+
+ // Index of the first value in the table.
+ size_t GetFirstValueIndex() const {
+ if (sparse_) {
+ // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+ return num_entries_;
+ } else {
+ // In a packed table, we have the starting key and num_entries_ values.
+ return 1;
+ }
+ }
+
+ private:
+ const Instruction& instruction_;
+ const uint32_t dex_pc_;
+
+ // Whether this is a sparse-switch table (or a packed-switch one).
+ const bool sparse_;
+
+ // This can't be const as it needs to be computed off of the given instruction, and complicated
+ // expressions in the initializer list seemed very ugly.
+ uint16_t num_entries_;
+
+ const int32_t* values_;
+
+ DISALLOW_COPY_AND_ASSIGN(SwitchTable);
+};
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bdc664b..736ac32 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -300,7 +300,7 @@
}
}
- bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method)
+ bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr)
OVERRIDE
SHARED_REQUIRES(Locks::mutator_lock_);
@@ -309,7 +309,8 @@
CompiledMethod* Emit(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
- CompilerDriver* driver) const;
+ CompilerDriver* driver,
+ const DexFile::CodeItem* item) const;
// Try compiling a method and return the code generator used for
// compiling it.
@@ -327,7 +328,8 @@
uint32_t method_idx,
jobject class_loader,
const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const;
+ Handle<mirror::DexCache> dex_cache,
+ bool osr) const;
std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
@@ -580,11 +582,12 @@
CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
- CompilerDriver* compiler_driver) const {
+ CompilerDriver* compiler_driver,
+ const DexFile::CodeItem* code_item) const {
ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
stack_map.resize(codegen->ComputeStackMapsSize());
- codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
+ codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item);
CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
@@ -615,7 +618,8 @@
uint32_t method_idx,
jobject class_loader,
const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const {
+ Handle<mirror::DexCache> dex_cache,
+ bool osr) const {
MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
CompilerDriver* compiler_driver = GetCompilerDriver();
InstructionSet instruction_set = compiler_driver->GetInstructionSet();
@@ -663,8 +667,14 @@
dex_compilation_unit.GetDexFile(),
dex_compilation_unit.GetClassDefIndex());
HGraph* graph = new (arena) HGraph(
- arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
- kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
+ arena,
+ dex_file,
+ method_idx,
+ requires_barrier,
+ compiler_driver->GetInstructionSet(),
+ kInvalidInvokeType,
+ compiler_driver->GetCompilerOptions().GetDebuggable(),
+ osr);
std::unique_ptr<CodeGenerator> codegen(
CodeGenerator::Create(graph,
@@ -797,10 +807,11 @@
method_idx,
jclass_loader,
dex_file,
- dex_cache));
+ dex_cache,
+ /* osr */ false));
if (codegen.get() != nullptr) {
MaybeRecordStat(MethodCompilationStat::kCompiled);
- method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
+ method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
}
} else {
if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -843,7 +854,8 @@
bool OptimizingCompiler::JitCompile(Thread* self,
jit::JitCodeCache* code_cache,
- ArtMethod* method) {
+ ArtMethod* method,
+ bool osr) {
StackHandleScope<2> hs(self);
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
method->GetDeclaringClass()->GetClassLoader()));
@@ -873,7 +885,8 @@
method_idx,
jclass_loader,
*dex_file,
- dex_cache));
+ dex_cache,
+ osr));
if (codegen.get() == nullptr) {
return false;
}
@@ -885,7 +898,7 @@
return false;
}
MaybeRecordStat(MethodCompilationStat::kCompiled);
- codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
+ codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
const void* code = code_cache->CommitCode(
self,
method,
@@ -896,7 +909,8 @@
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
code_allocator.GetMemory().data(),
- code_allocator.GetSize());
+ code_allocator.GetSize(),
+ osr);
if (code == nullptr) {
code_cache->ClearData(self, stack_map_data);
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index 631b784..b3a2979 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -429,6 +429,56 @@
END art_quick_invoke_stub_internal
/*
+ * On stack replacement stub.
+ * On entry:
+ * r0 = stack to copy
+ * r1 = size of stack
+ * r2 = pc to call
+ * r3 = JValue* result
+ * [sp] = shorty
+ * [sp + 4] = thread
+ */
+ENTRY art_quick_osr_stub
+ SPILL_ALL_CALLEE_SAVE_GPRS @ Spill regs (9)
+ mov r11, sp @ Save the stack pointer
+ mov r10, r1 @ Save size of stack
+ ldr r9, [r11, #40] @ Move managed thread pointer into r9
+ mov r8, r2 @ Save the pc to call
+ sub r7, sp, #12 @ Reserve space for stack pointer, JValue result, and ArtMethod* slot
+ and r7, #0xFFFFFFF0 @ Align stack pointer
+ mov sp, r7 @ Update stack pointer
+ str r11, [sp, #4] @ Save old stack pointer
+ str r3, [sp, #8] @ Save JValue result
+ mov ip, #0
+ str ip, [sp] @ Store null for ArtMethod* at bottom of frame
+ sub sp, sp, r1 @ Reserve space for callee stack
+ mov r2, r1
+ mov r1, r0
+ mov r0, sp
+ bl memcpy @ memcpy (dest r0, src r1, bytes r2)
+ bl .Losr_entry @ Call the method
+ ldr r11, [sp, #4] @ Restore saved stack pointer
+ ldr r10, [sp, #8] @ Restire JValue result
+ mov sp, r11 @ Restore stack pointer.
+ ldr r4, [sp, #36] @ load shorty
+ ldr r4, [r4, #0] @ load return type
+ cmp r4, #68 @ Test if result type char == 'D'.
+ beq .Losr_fp_result
+ cmp r4, #70 @ Test if result type char == 'F'.
+ beq .Losr_fp_result
+ strd r0, [r10] @ Store r0/r1 into result pointer
+ b .Losr_exit
+.Losr_fp_result:
+ vstr d0, [r10] @ Store s0-s1/d0 into result pointer
+.Losr_exit:
+ pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+.Losr_entry:
+ sub r10, r10, #4
+ str lr, [sp, r10] @ Store link register per the compiler ABI
+ bx r8
+END art_quick_osr_stub
+
+ /*
* On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
*/
ARM_ENTRY art_quick_do_long_jump
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 9ccabad..e848008 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -915,6 +915,105 @@
+/* extern"C" void art_quick_osr_stub(void** stack, x0
+ * size_t stack_size_in_bytes, x1
+ * const uin8_t* native_pc, x2
+ * JValue *result, x3
+ * char *shorty, x4
+ * Thread *self) x5
+ */
+ENTRY art_quick_osr_stub
+SAVE_SIZE=15*8 // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
+ mov x9, sp // Save stack pointer.
+ .cfi_register sp,x9
+
+ sub x10, sp, # SAVE_SIZE
+ and x10, x10, # ~0xf // Enforce 16 byte stack alignment.
+ mov sp, x10 // Set new SP.
+
+ str x28, [sp, #112]
+ stp x26, x27, [sp, #96]
+ stp x24, x25, [sp, #80]
+ stp x22, x23, [sp, #64]
+ stp x20, x21, [sp, #48]
+ stp x9, x19, [sp, #32] // Save old stack pointer and x19.
+ stp x3, x4, [sp, #16] // Save result and shorty addresses.
+ stp xFP, xLR, [sp] // Store LR & FP.
+ mov xSELF, x5 // Move thread pointer into SELF register.
+
+ sub sp, sp, #16
+ str xzr, [sp] // Store null for ArtMethod* slot
+ // Branch to stub.
+ bl .Losr_entry
+ add sp, sp, #16
+
+ // Restore return value address and shorty address.
+ ldp x3,x4, [sp, #16]
+ ldr x28, [sp, #112]
+ ldp x26, x27, [sp, #96]
+ ldp x24, x25, [sp, #80]
+ ldp x22, x23, [sp, #64]
+ ldp x20, x21, [sp, #48]
+
+ // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
+ ldrb w10, [x4]
+
+ // Check the return type and store the correct register into the jvalue in memory.
+
+ // Don't set anything for a void type.
+ cmp w10, #'V'
+ beq .Losr_exit
+
+ // Is it a double?
+ cmp w10, #'D'
+ bne .Lno_double
+ str d0, [x3]
+ b .Losr_exit
+
+.Lno_double: // Is it a float?
+ cmp w10, #'F'
+ bne .Lno_float
+ str s0, [x3]
+ b .Losr_exit
+
+.Lno_float: // Just store x0. Doesn't matter if it is 64 or 32 bits.
+ str x0, [x3]
+
+.Losr_exit: // Finish up.
+ ldp x2, x19, [sp, #32] // Restore stack pointer and x19.
+ ldp xFP, xLR, [sp] // Restore old frame pointer and link register.
+ mov sp, x2
+ ret
+
+.Losr_entry:
+ // Update stack pointer for the callee
+ sub sp, sp, x1
+
+ // Update link register slot expected by the callee.
+ sub w1, w1, #8
+ str lr, [sp, x1]
+
+ // Copy arguments into stack frame.
+ // Use simple copy routine for now.
+ // 4 bytes per slot.
+ // X0 - source address
+ // W1 - args length
+ // SP - destination address.
+ // W10 - temporary
+.Losr_loop_entry:
+ cmp w1, #0
+ beq .Losr_loop_exit
+ sub w1, w1, #4
+ ldr w10, [x0, x1]
+ str w10, [sp, x1]
+ b .Losr_loop_entry
+
+.Losr_loop_exit:
+ // Branch to the OSR entry point.
+ br x2
+
+END art_quick_osr_stub
+
/*
* On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
*/
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index da30331..fbee5d7 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -1712,5 +1712,65 @@
ret
END_FUNCTION art_quick_read_barrier_for_root_slow
+ /*
+ * On stack replacement stub.
+ * On entry:
+ * [sp] = return address
+ * [sp + 4] = stack to copy
+ * [sp + 8] = size of stack
+ * [sp + 12] = pc to call
+ * [sp + 16] = JValue* result
+ * [sp + 20] = shorty
+ * [sp + 24] = thread
+ */
+DEFINE_FUNCTION art_quick_osr_stub
+ // Save native callee saves.
+ PUSH ebp
+ PUSH ebx
+ PUSH esi
+ PUSH edi
+ mov 4+16(%esp), %esi // ESI = argument array
+ mov 8+16(%esp), %ecx // ECX = size of args
+ mov 12+16(%esp), %ebx // EBX = pc to call
+ mov %esp, %ebp // Save stack pointer
+ andl LITERAL(0xFFFFFFF0), %esp // Align stack
+ PUSH ebp // Save old stack pointer
+ subl LITERAL(12), %esp // Align stack
+ movl LITERAL(0), (%esp) // Store null for ArtMethod* slot
+ call .Losr_entry
+
+ // Restore stack pointer.
+ addl LITERAL(12), %esp
+ POP ebp
+ mov %ebp, %esp
+
+ // Restore callee saves.
+ POP edi
+ POP esi
+ POP ebx
+ POP ebp
+ mov 16(%esp), %ecx // Get JValue result
+ mov %eax, (%ecx) // Store the result assuming it is a long, int or Object*
+ mov %edx, 4(%ecx) // Store the other half of the result
+ mov 20(%esp), %edx // Get the shorty
+ cmpb LITERAL(68), (%edx) // Test if result type char == 'D'
+ je .Losr_return_double_quick
+ cmpb LITERAL(70), (%edx) // Test if result type char == 'F'
+ je .Losr_return_float_quick
+ ret
+.Losr_return_double_quick:
+ movsd %xmm0, (%ecx) // Store the floating point result
+ ret
+.Losr_return_float_quick:
+ movss %xmm0, (%ecx) // Store the floating point result
+ ret
+.Losr_entry:
+ subl LITERAL(4), %ecx // Given stack size contains pushed frame pointer, substract it.
+ subl %ecx, %esp
+ mov %esp, %edi // EDI = beginning of stack
+ rep movsb // while (ecx--) { *edi++ = *esi++ }
+ jmp *%ebx
+END_FUNCTION art_quick_osr_stub
+
// TODO: implement these!
UNIMPLEMENTED art_quick_memcmp16
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 883da96..d6e0f1c 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -1744,3 +1744,62 @@
RESTORE_FP_CALLEE_SAVE_FRAME
ret
END_FUNCTION art_quick_read_barrier_for_root_slow
+
+ /*
+ * On stack replacement stub.
+ * On entry:
+ * [sp] = return address
+ * rdi = stack to copy
+ * rsi = size of stack
+ * rdx = pc to call
+ * rcx = JValue* result
+ * r8 = shorty
+ * r9 = thread
+ */
+DEFINE_FUNCTION art_quick_osr_stub
+ // Save the non-volatiles.
+ PUSH rbp // Save rbp.
+ PUSH rcx // Save rcx/result*.
+ PUSH r8 // Save r8/shorty*.
+
+ // Save callee saves.
+ PUSH rbx
+ PUSH r12
+ PUSH r13
+ PUSH r14
+ PUSH r15
+
+ pushq LITERAL(0) // Push null for ArtMethod*.
+ movl %esi, %ecx // rcx := size of stack
+ movq %rdi, %rsi // rsi := stack to copy
+ call .Losr_entry
+
+ // Restore stack and callee-saves.
+ addq LITERAL(8), %rsp
+ POP r15
+ POP r14
+ POP r13
+ POP r12
+ POP rbx
+ POP r8
+ POP rcx
+ POP rbp
+ cmpb LITERAL(68), (%r8) // Test if result type char == 'D'.
+ je .Losr_return_double_quick
+ cmpb LITERAL(70), (%r8) // Test if result type char == 'F'.
+ je .Losr_return_float_quick
+ movq %rax, (%rcx) // Store the result assuming its a long, int or Object*
+ ret
+.Losr_return_double_quick:
+ movsd %xmm0, (%rcx) // Store the double floating point result.
+ ret
+.Losr_return_float_quick:
+ movss %xmm0, (%rcx) // Store the floating point result.
+ ret
+.Losr_entry:
+ subl LITERAL(8), %ecx // Given stack size contains pushed frame pointer, substract it.
+ subq %rcx, %rsp
+ movq %rsp, %rdi // rdi := beginning of stack
+ rep movsb // while (rcx--) { *rdi++ = *rsi++ }
+ jmp *%rdx
+END_FUNCTION art_quick_osr_stub
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 6f36016..cd38e16 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -292,22 +292,7 @@
// Unusual case where we were running generated code and an
// exception was thrown to force the activations to be removed from the
// stack. Continue execution in the interpreter.
- self->ClearException();
- ShadowFrame* shadow_frame =
- self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
- mirror::Throwable* pending_exception = nullptr;
- bool from_code = false;
- self->PopDeoptimizationContext(result, &pending_exception, &from_code);
- CHECK(!from_code);
- self->SetTopOfStack(nullptr);
- self->SetTopOfShadowStack(shadow_frame);
-
- // Restore the exception that was pending before deoptimization then interpret the
- // deoptimized frames.
- if (pending_exception != nullptr) {
- self->SetException(pending_exception);
- }
- interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, from_code, result);
+ self->DeoptimizeWithDeoptimizationException(result);
}
if (kLogInvocationStartAndReturn) {
LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index b5a55bf..3dfad76 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -273,15 +273,15 @@
if (outer_method != nullptr) {
const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc);
if (current_code->IsOptimized()) {
- uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
- CodeInfo code_info = current_code->GetOptimizedCodeInfo();
- StackMapEncoding encoding = code_info.ExtractEncoding();
- StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
- DCHECK(stack_map.IsValid());
- if (stack_map.HasInlineInfo(encoding)) {
- InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
- caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
- }
+ uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc);
+ CodeInfo code_info = current_code->GetOptimizedCodeInfo();
+ StackMapEncoding encoding = code_info.ExtractEncoding();
+ StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding);
+ DCHECK(stack_map.IsValid());
+ if (stack_map.HasInlineInfo(encoding)) {
+ InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding);
+ caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1);
+ }
}
}
if (kIsDebugBuild && do_caller_check) {
diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc
index 940d344..ca8598e 100644
--- a/runtime/interpreter/interpreter_goto_table_impl.cc
+++ b/runtime/interpreter/interpreter_goto_table_impl.cc
@@ -21,6 +21,7 @@
#include "base/stl_util.h" // MakeUnique
#include "experimental_flags.h"
#include "interpreter_common.h"
+#include "jit/jit.h"
#include "safe_math.h"
#include <memory> // std::unique_ptr
@@ -63,10 +64,15 @@
currentHandlersTable = handlersTable[ \
Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()]
-#define BRANCH_INSTRUMENTATION(offset) \
- do { \
+#define BRANCH_INSTRUMENTATION(offset) \
+ do { \
+ ArtMethod* method = shadow_frame.GetMethod(); \
instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \
- instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
+ instrumentation->Branch(self, method, dex_pc, offset); \
+ JValue result; \
+ if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \
+ return result; \
+ } \
} while (false)
#define UNREACHABLE_CODE_CHECK() \
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index f606978..25dbab2 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -17,6 +17,7 @@
#include "base/stl_util.h" // MakeUnique
#include "experimental_flags.h"
#include "interpreter_common.h"
+#include "jit/jit.h"
#include "safe_math.h"
#include <memory> // std::unique_ptr
@@ -69,9 +70,14 @@
} \
} while (false)
-#define BRANCH_INSTRUMENTATION(offset) \
- do { \
- instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \
+#define BRANCH_INSTRUMENTATION(offset) \
+ do { \
+ ArtMethod* method = shadow_frame.GetMethod(); \
+ instrumentation->Branch(self, method, dex_pc, offset); \
+ JValue result; \
+ if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \
+ return result; \
+ } \
} while (false)
static bool IsExperimentalInstructionEnabled(const Instruction *inst) {
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index fa5c41d..3e152e1 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -25,10 +25,12 @@
#include "jit_code_cache.h"
#include "jit_instrumentation.h"
#include "oat_file_manager.h"
+#include "oat_quick_method_header.h"
#include "offline_profiling_info.h"
#include "profile_saver.h"
#include "runtime.h"
#include "runtime_options.h"
+#include "stack_map.h"
#include "utils.h"
namespace art {
@@ -43,6 +45,8 @@
options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity);
jit_options->compile_threshold_ =
options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold);
+ // TODO(ngeoffray): Make this a proper option.
+ jit_options->osr_threshold_ = jit_options->compile_threshold_ * 2;
jit_options->warmup_threshold_ =
options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold);
jit_options->dump_info_on_shutdown_ =
@@ -121,7 +125,7 @@
*error_msg = "JIT couldn't find jit_unload entry point";
return false;
}
- jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*)>(
+ jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*, bool)>(
dlsym(jit_library_handle_, "jit_compile_method"));
if (jit_compile_method_ == nullptr) {
dlclose(jit_library_handle_);
@@ -156,7 +160,7 @@
return true;
}
-bool Jit::CompileMethod(ArtMethod* method, Thread* self) {
+bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) {
DCHECK(!method->IsRuntimeMethod());
// Don't compile the method if it has breakpoints.
if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
@@ -171,10 +175,11 @@
return false;
}
- if (!code_cache_->NotifyCompilationOf(method, self)) {
+ if (!code_cache_->NotifyCompilationOf(method, self, osr)) {
+ VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to code cache";
return false;
}
- bool success = jit_compile_method_(jit_compiler_handle_, method, self);
+ bool success = jit_compile_method_(jit_compiler_handle_, method, self, osr);
code_cache_->DoneCompiling(method, self);
return success;
}
@@ -224,9 +229,11 @@
}
}
-void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) {
+void Jit::CreateInstrumentationCache(size_t compile_threshold,
+ size_t warmup_threshold,
+ size_t osr_threshold) {
instrumentation_cache_.reset(
- new jit::JitInstrumentationCache(compile_threshold, warmup_threshold));
+ new jit::JitInstrumentationCache(compile_threshold, warmup_threshold, osr_threshold));
}
void Jit::NewTypeLoadedIfUsingJit(mirror::Class* type) {
@@ -255,5 +262,120 @@
}
}
+extern "C" void art_quick_osr_stub(void** stack,
+ uint32_t stack_size_in_bytes,
+ const uint8_t* native_pc,
+ JValue* result,
+ const char* shorty,
+ Thread* self);
+
+bool Jit::MaybeDoOnStackReplacement(Thread* thread,
+ ArtMethod* method,
+ uint32_t dex_pc,
+ int32_t dex_pc_offset,
+ JValue* result) {
+ Jit* jit = Runtime::Current()->GetJit();
+ if (jit == nullptr) {
+ return false;
+ }
+
+ if (kRuntimeISA == kMips || kRuntimeISA == kMips64) {
+ VLOG(jit) << "OSR not supported on this platform";
+ return false;
+ }
+
+ // Cheap check if the method has been compiled already. That's an indicator that we should
+ // osr into it.
+ if (!jit->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+ return false;
+ }
+
+ const OatQuickMethodHeader* osr_method = jit->GetCodeCache()->LookupOsrMethodHeader(method);
+ if (osr_method == nullptr) {
+ // No osr method yet, just return to the interpreter.
+ return false;
+ }
+
+ const size_t number_of_vregs = method->GetCodeItem()->registers_size_;
+ CodeInfo code_info = osr_method->GetOptimizedCodeInfo();
+ StackMapEncoding encoding = code_info.ExtractEncoding();
+
+ // Find stack map starting at the target dex_pc.
+ StackMap stack_map = code_info.GetOsrStackMapForDexPc(dex_pc + dex_pc_offset, encoding);
+ if (!stack_map.IsValid()) {
+ // There is no OSR stack map for this dex pc offset. Just return to the interpreter in the
+ // hope that the next branch has one.
+ return false;
+ }
+
+ // We found a stack map, now fill the frame with dex register values from the interpreter's
+ // shadow frame.
+ DexRegisterMap vreg_map =
+ code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs);
+
+ ShadowFrame* shadow_frame = thread->PopShadowFrame();
+
+ size_t frame_size = osr_method->GetFrameSizeInBytes();
+ void** memory = reinterpret_cast<void**>(malloc(frame_size));
+ memset(memory, 0, frame_size);
+
+ // Art ABI: ArtMethod is at the bottom of the stack.
+ memory[0] = method;
+
+ if (!vreg_map.IsValid()) {
+ // If we don't have a dex register map, then there are no live dex registers at
+ // this dex pc.
+ } else {
+ for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) {
+ DexRegisterLocation::Kind location =
+ vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding);
+ if (location == DexRegisterLocation::Kind::kNone) {
+ // Dex register is dead or unitialized.
+ continue;
+ }
+
+ if (location == DexRegisterLocation::Kind::kConstant) {
+ // We skip constants because the compiled code knows how to handle them.
+ continue;
+ }
+
+ DCHECK(location == DexRegisterLocation::Kind::kInStack);
+
+ int32_t vreg_value = shadow_frame->GetVReg(vreg);
+ int32_t slot_offset = vreg_map.GetStackOffsetInBytes(vreg,
+ number_of_vregs,
+ code_info,
+ encoding);
+ DCHECK_LT(slot_offset, static_cast<int32_t>(frame_size));
+ DCHECK_GT(slot_offset, 0);
+ (reinterpret_cast<int32_t*>(memory))[slot_offset / sizeof(int32_t)] = vreg_value;
+ }
+ }
+
+ const uint8_t* native_pc = stack_map.GetNativePcOffset(encoding) + osr_method->GetEntryPoint();
+ VLOG(jit) << "Jumping to "
+ << PrettyMethod(method)
+ << "@"
+ << std::hex << reinterpret_cast<uintptr_t>(native_pc);
+ {
+ ManagedStack fragment;
+ thread->PushManagedStackFragment(&fragment);
+ (*art_quick_osr_stub)(memory,
+ frame_size,
+ native_pc,
+ result,
+ method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(),
+ thread);
+ if (UNLIKELY(thread->GetException() == Thread::GetDeoptimizationException())) {
+ thread->DeoptimizeWithDeoptimizationException(result);
+ }
+ thread->PopManagedStackFragment(fragment);
+ }
+ free(memory);
+ thread->PushShadowFrame(shadow_frame);
+ VLOG(jit) << "Done running OSR code for " << PrettyMethod(method);
+ return true;
+}
+
} // namespace jit
} // namespace art
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index a80f51f..042da92 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -49,9 +49,11 @@
virtual ~Jit();
static Jit* Create(JitOptions* options, std::string* error_msg);
- bool CompileMethod(ArtMethod* method, Thread* self)
+ bool CompileMethod(ArtMethod* method, Thread* self, bool osr)
SHARED_REQUIRES(Locks::mutator_lock_);
- void CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold);
+ void CreateInstrumentationCache(size_t compile_threshold,
+ size_t warmup_threshold,
+ size_t osr_threshold);
void CreateThreadPool();
CompilerCallbacks* GetCompilerCallbacks() {
return compiler_callbacks_;
@@ -88,6 +90,17 @@
bool JitAtFirstUse();
+ // If an OSR compiled version is available for `method`,
+ // and `dex_pc + dex_pc_offset` is an entry point of that compiled
+ // version, this method will jump to the compiled code, let it run,
+ // and return true afterwards. Return false otherwise.
+ static bool MaybeDoOnStackReplacement(Thread* thread,
+ ArtMethod* method,
+ uint32_t dex_pc,
+ int32_t dex_pc_offset,
+ JValue* result)
+ SHARED_REQUIRES(Locks::mutator_lock_);
+
private:
Jit();
bool LoadCompiler(std::string* error_msg);
@@ -97,7 +110,7 @@
void* jit_compiler_handle_;
void* (*jit_load_)(CompilerCallbacks**, bool*);
void (*jit_unload_)(void*);
- bool (*jit_compile_method_)(void*, ArtMethod*, Thread*);
+ bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool);
void (*jit_types_loaded_)(void*, mirror::Class**, size_t count);
// Performance monitoring.
@@ -123,6 +136,9 @@
size_t GetWarmupThreshold() const {
return warmup_threshold_;
}
+ size_t GetOsrThreshold() const {
+ return osr_threshold_;
+ }
size_t GetCodeCacheInitialCapacity() const {
return code_cache_initial_capacity_;
}
@@ -155,6 +171,7 @@
size_t code_cache_max_capacity_;
size_t compile_threshold_;
size_t warmup_threshold_;
+ size_t osr_threshold_;
bool dump_info_on_shutdown_;
bool save_profiling_info_;
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index f325949..464c441 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -184,7 +184,8 @@
size_t core_spill_mask,
size_t fp_spill_mask,
const uint8_t* code,
- size_t code_size) {
+ size_t code_size,
+ bool osr) {
uint8_t* result = CommitCodeInternal(self,
method,
mapping_table,
@@ -194,7 +195,8 @@
core_spill_mask,
fp_spill_mask,
code,
- code_size);
+ code_size,
+ osr);
if (result == nullptr) {
// Retry.
GarbageCollectCache(self);
@@ -207,7 +209,8 @@
core_spill_mask,
fp_spill_mask,
code,
- code_size);
+ code_size,
+ osr);
}
return result;
}
@@ -287,7 +290,8 @@
size_t core_spill_mask,
size_t fp_spill_mask,
const uint8_t* code,
- size_t code_size) {
+ size_t code_size,
+ bool osr) {
size_t alignment = GetInstructionSetAlignment(kRuntimeISA);
// Ensure the header ends up at expected instruction alignment.
size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment);
@@ -329,8 +333,12 @@
{
MutexLock mu(self, lock_);
method_code_map_.Put(code_ptr, method);
- Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
- method, method_header->GetEntryPoint());
+ if (osr) {
+ osr_code_map_.Put(method, code_ptr);
+ } else {
+ Runtime::Current()->GetInstrumentation()->UpdateMethodsCode(
+ method, method_header->GetEntryPoint());
+ }
if (collection_in_progress_) {
// We need to update the live bitmap if there is a GC to ensure it sees this new
// code.
@@ -338,7 +346,7 @@
}
last_update_time_ns_.StoreRelease(NanoTime());
VLOG(jit)
- << "JIT added "
+ << "JIT added (osr = " << std::boolalpha << osr << std::noboolalpha << ") "
<< PrettyMethod(method) << "@" << method
<< " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
<< " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
@@ -569,6 +577,10 @@
info->GetMethod()->SetProfilingInfo(nullptr);
}
}
+
+ // Empty osr method map, as osr compile code will be deleted (except the ones
+ // on thread stacks).
+ osr_code_map_.clear();
}
// Run a checkpoint on all threads to mark the JIT compiled code they are running.
@@ -662,6 +674,15 @@
return method_header;
}
+OatQuickMethodHeader* JitCodeCache::LookupOsrMethodHeader(ArtMethod* method) {
+ MutexLock mu(Thread::Current(), lock_);
+ auto it = osr_code_map_.find(method);
+ if (it == osr_code_map_.end()) {
+ return nullptr;
+ }
+ return OatQuickMethodHeader::FromCodePointer(it->second);
+}
+
ProfilingInfo* JitCodeCache::AddProfilingInfo(Thread* self,
ArtMethod* method,
const std::vector<uint32_t>& entries,
@@ -733,12 +754,15 @@
return last_update_time_ns_.LoadAcquire();
}
-bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) {
- if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
+bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr) {
+ if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) {
return false;
}
MutexLock mu(self, lock_);
+ if (osr && (osr_code_map_.find(method) != osr_code_map_.end())) {
+ return false;
+ }
ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*));
if (info == nullptr || info->IsMethodBeingCompiled()) {
return false;
diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h
index 69fc553..048f8d0 100644
--- a/runtime/jit/jit_code_cache.h
+++ b/runtime/jit/jit_code_cache.h
@@ -71,7 +71,7 @@
// Number of compilations done throughout the lifetime of the JIT.
size_t NumberOfCompilations() REQUIRES(!lock_);
- bool NotifyCompilationOf(ArtMethod* method, Thread* self)
+ bool NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr)
SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!lock_);
@@ -89,7 +89,8 @@
size_t core_spill_mask,
size_t fp_spill_mask,
const uint8_t* code,
- size_t code_size)
+ size_t code_size,
+ bool osr)
SHARED_REQUIRES(Locks::mutator_lock_)
REQUIRES(!lock_);
@@ -131,6 +132,10 @@
REQUIRES(!lock_)
SHARED_REQUIRES(Locks::mutator_lock_);
+ OatQuickMethodHeader* LookupOsrMethodHeader(ArtMethod* method)
+ REQUIRES(!lock_)
+ SHARED_REQUIRES(Locks::mutator_lock_);
+
// Remove all methods in our cache that were allocated by 'alloc'.
void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc)
REQUIRES(!lock_)
@@ -187,7 +192,8 @@
size_t core_spill_mask,
size_t fp_spill_mask,
const uint8_t* code,
- size_t code_size)
+ size_t code_size,
+ bool osr)
REQUIRES(!lock_)
SHARED_REQUIRES(Locks::mutator_lock_);
@@ -237,8 +243,10 @@
void* data_mspace_ GUARDED_BY(lock_);
// Bitmap for collecting code and data.
std::unique_ptr<CodeCacheBitmap> live_bitmap_;
- // This map holds compiled code associated to the ArtMethod.
+ // Holds compiled code associated to the ArtMethod.
SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_);
+ // Holds osr compiled code associated to the ArtMethod.
+ SafeMap<ArtMethod*, const void*> osr_code_map_ GUARDED_BY(lock_);
// ProfilingInfo objects we have allocated.
std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_);
diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc
index d597b36..a4e40ad 100644
--- a/runtime/jit/jit_instrumentation.cc
+++ b/runtime/jit/jit_instrumentation.cc
@@ -29,7 +29,8 @@
public:
enum TaskKind {
kAllocateProfile,
- kCompile
+ kCompile,
+ kCompileOsr
};
JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) {
@@ -48,9 +49,14 @@
ScopedObjectAccess soa(self);
if (kind_ == kCompile) {
VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_);
- if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) {
+ if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ false)) {
VLOG(jit) << "Failed to compile method " << PrettyMethod(method_);
}
+ } else if (kind_ == kCompileOsr) {
+ VLOG(jit) << "JitCompileTask compiling method osr " << PrettyMethod(method_);
+ if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ true)) {
+ VLOG(jit) << "Failed to compile method osr " << PrettyMethod(method_);
+ }
} else {
DCHECK(kind_ == kAllocateProfile);
if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
@@ -72,9 +78,11 @@
};
JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold,
- size_t warm_method_threshold)
+ size_t warm_method_threshold,
+ size_t osr_method_threshold)
: hot_method_threshold_(hot_method_threshold),
warm_method_threshold_(warm_method_threshold),
+ osr_method_threshold_(osr_method_threshold),
listener_(this) {
}
@@ -151,6 +159,11 @@
DCHECK(thread_pool_ != nullptr);
thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile));
}
+
+ if (sample_count == osr_method_threshold_) {
+ DCHECK(thread_pool_ != nullptr);
+ thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr));
+ }
}
JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache)
diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h
index 06559ad..d1c5c44 100644
--- a/runtime/jit/jit_instrumentation.h
+++ b/runtime/jit/jit_instrumentation.h
@@ -96,7 +96,9 @@
// Keeps track of which methods are hot.
class JitInstrumentationCache {
public:
- JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold);
+ JitInstrumentationCache(size_t hot_method_threshold,
+ size_t warm_method_threshold,
+ size_t osr_method_threshold);
void AddSamples(Thread* self, ArtMethod* method, size_t samples)
SHARED_REQUIRES(Locks::mutator_lock_);
void CreateThreadPool();
@@ -112,6 +114,7 @@
private:
size_t hot_method_threshold_;
size_t warm_method_threshold_;
+ size_t osr_method_threshold_;
JitInstrumentationListener listener_;
std::unique_ptr<ThreadPool> thread_pool_;
diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h
index 5643739..2b7eca2 100644
--- a/runtime/oat_quick_method_header.h
+++ b/runtime/oat_quick_method_header.h
@@ -108,7 +108,7 @@
}
template <bool kCheckFrameSize = true>
- uint32_t GetFrameSizeInBytes() {
+ uint32_t GetFrameSizeInBytes() const {
uint32_t result = frame_info_.FrameSizeInBytes();
if (kCheckFrameSize) {
DCHECK_LE(static_cast<size_t>(kStackAlignment), result);
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index b1b7473..1b59c6f 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -1887,7 +1887,8 @@
if (jit_.get() != nullptr) {
compiler_callbacks_ = jit_->GetCompilerCallbacks();
jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(),
- jit_options_->GetWarmupThreshold());
+ jit_options_->GetWarmupThreshold(),
+ jit_options_->GetOsrThreshold());
jit_->CreateThreadPool();
// Notify native debugger about the classes already loaded before the creation of the jit.
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 5faff93..1e82860 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -40,7 +40,7 @@
namespace art {
-static constexpr bool kDebugStackWalk = false;
+static constexpr bool kDebugStackWalk = true;
mirror::Object* ShadowFrame::GetThisObject() const {
ArtMethod* m = GetMethod();
diff --git a/runtime/stack_map.h b/runtime/stack_map.h
index 84185ce..97eb805 100644
--- a/runtime/stack_map.h
+++ b/runtime/stack_map.h
@@ -1195,6 +1195,35 @@
return StackMap();
}
+ StackMap GetOsrStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const {
+ size_t e = GetNumberOfStackMaps();
+ if (e == 0) {
+ // There cannot be OSR stack map if there is no stack map.
+ return StackMap();
+ }
+ // Walk over all stack maps. If two consecutive stack maps are identical, then we
+ // have found a stack map suitable for OSR.
+ for (size_t i = 0; i < e - 1; ++i) {
+ StackMap stack_map = GetStackMapAt(i, encoding);
+ if (stack_map.GetDexPc(encoding) == dex_pc) {
+ StackMap other = GetStackMapAt(i + 1, encoding);
+ if (other.GetDexPc(encoding) == dex_pc &&
+ other.GetNativePcOffset(encoding) == stack_map.GetNativePcOffset(encoding)) {
+ DCHECK_EQ(other.GetDexRegisterMapOffset(encoding),
+ stack_map.GetDexRegisterMapOffset(encoding));
+ DCHECK(!stack_map.HasInlineInfo(encoding));
+ if (i < e - 2) {
+ // Make sure there are not three identical stack maps following each other.
+ DCHECK_NE(stack_map.GetNativePcOffset(encoding),
+ GetStackMapAt(i + 2, encoding).GetNativePcOffset(encoding));
+ }
+ return stack_map;
+ }
+ }
+ }
+ return StackMap();
+ }
+
StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset,
const StackMapEncoding& encoding) const {
// TODO: Safepoint stack maps are sorted by native_pc_offset but catch stack
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 2abcd67..c0fb0cd 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -3012,4 +3012,25 @@
return count;
}
+
+void Thread::DeoptimizeWithDeoptimizationException(JValue* result) {
+ DCHECK_EQ(GetException(), Thread::GetDeoptimizationException());
+ ClearException();
+ ShadowFrame* shadow_frame =
+ PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame);
+ mirror::Throwable* pending_exception = nullptr;
+ bool from_code = false;
+ PopDeoptimizationContext(result, &pending_exception, &from_code);
+ CHECK(!from_code) << "Deoptimizing from code should be done with single frame deoptimization";
+ SetTopOfStack(nullptr);
+ SetTopOfShadowStack(shadow_frame);
+
+ // Restore the exception that was pending before deoptimization then interpret the
+ // deoptimized frames.
+ if (pending_exception != nullptr) {
+ SetException(pending_exception);
+ }
+ interpreter::EnterInterpreterFromDeoptimize(this, shadow_frame, from_code, result);
+}
+
} // namespace art
diff --git a/runtime/thread.h b/runtime/thread.h
index d7887ca..0660cd7 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -552,6 +552,9 @@
OFFSETOF_MEMBER(tls_32bit_sized_values, is_gc_marking));
}
+ // Deoptimize the Java stack.
+ void DeoptimizeWithDeoptimizationException(JValue* result) SHARED_REQUIRES(Locks::mutator_lock_);
+
private:
template<size_t pointer_size>
static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) {
diff --git a/test/570-checker-osr/expected.txt b/test/570-checker-osr/expected.txt
new file mode 100644
index 0000000..555c6a9
--- /dev/null
+++ b/test/570-checker-osr/expected.txt
@@ -0,0 +1,5 @@
+JNI_OnLoad called
+100000000
+200000000
+300000000
+400000000
diff --git a/test/570-checker-osr/info.txt b/test/570-checker-osr/info.txt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/570-checker-osr/info.txt
diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc
new file mode 100644
index 0000000..fb84687
--- /dev/null
+++ b/test/570-checker-osr/osr.cc
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "art_method.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+#include "oat_quick_method_header.h"
+#include "scoped_thread_state_change.h"
+#include "stack_map.h"
+
+namespace art {
+
+class OsrVisitor : public StackVisitor {
+ public:
+ explicit OsrVisitor(Thread* thread)
+ SHARED_REQUIRES(Locks::mutator_lock_)
+ : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+ in_osr_method_(false) {}
+
+ bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) {
+ ArtMethod* m = GetMethod();
+ std::string m_name(m->GetName());
+
+ if ((m_name.compare("$noinline$returnInt") == 0) ||
+ (m_name.compare("$noinline$returnFloat") == 0) ||
+ (m_name.compare("$noinline$returnDouble") == 0) ||
+ (m_name.compare("$noinline$returnLong") == 0) ||
+ (m_name.compare("$noinline$deopt") == 0)) {
+ const OatQuickMethodHeader* header =
+ Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m);
+ if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) {
+ in_osr_method_ = true;
+ }
+ return false;
+ }
+ return true;
+ }
+
+ bool in_osr_method_;
+};
+
+extern "C" JNIEXPORT jboolean JNICALL Java_Main_ensureInOsrCode(JNIEnv*, jclass) {
+ jit::Jit* jit = Runtime::Current()->GetJit();
+ if (jit == nullptr) {
+ // Just return true for non-jit configurations to stop the infinite loop.
+ return JNI_TRUE;
+ }
+ ScopedObjectAccess soa(Thread::Current());
+ OsrVisitor visitor(soa.Self());
+ visitor.WalkStack();
+ return visitor.in_osr_method_;
+}
+
+} // namespace art
diff --git a/test/570-checker-osr/smali/Osr.smali b/test/570-checker-osr/smali/Osr.smali
new file mode 100644
index 0000000..869c7c3
--- /dev/null
+++ b/test/570-checker-osr/smali/Osr.smali
@@ -0,0 +1,35 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LOsr;
+
+.super Ljava/lang/Object;
+
+# Check that blocks only havig nops are not merged when they are loop headers.
+# This ensures we can do on-stack replacement for branches to those nop blocks.
+
+## CHECK-START: int Osr.simpleLoop(int, int) dead_code_elimination_final (after)
+## CHECK-DAG: SuspendCheck loop:<<OuterLoop:B\d+>> outer_loop:none
+## CHECK-DAG: SuspendCheck loop:{{B\d+}} outer_loop:<<OuterLoop>>
+.method public static simpleLoop(II)I
+ .registers 3
+ const/16 v0, 0
+ :nop_entry
+ nop
+ :loop_entry
+ add-int v0, v0, v0
+ if-eq v0, v1, :loop_entry
+ if-eq v0, v2, :nop_entry
+ return v0
+.end method
diff --git a/test/570-checker-osr/src/DeoptimizationController.java b/test/570-checker-osr/src/DeoptimizationController.java
new file mode 100644
index 0000000..907d133
--- /dev/null
+++ b/test/570-checker-osr/src/DeoptimizationController.java
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// This file is a copy of 802-deoptimization/src/DeoptimizationController.java
+// because run-test requires standalone individual test.
+
+import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.Method;
+
+/**
+ * Controls deoptimization using dalvik.system.VMDebug class.
+ */
+public class DeoptimizationController {
+ private static final String TEMP_FILE_NAME_PREFIX = "test";
+ private static final String TEMP_FILE_NAME_SUFFIX = ".trace";
+
+ private static File createTempFile() throws Exception {
+ try {
+ return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+ } catch (IOException e) {
+ System.setProperty("java.io.tmpdir", "/data/local/tmp");
+ try {
+ return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+ } catch (IOException e2) {
+ System.setProperty("java.io.tmpdir", "/sdcard");
+ return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+ }
+ }
+ }
+
+ public static void startDeoptimization() {
+ File tempFile = null;
+ try {
+ tempFile = createTempFile();
+ String tempFileName = tempFile.getPath();
+
+ VMDebug.startMethodTracing(tempFileName, 0, 0, false, 1000);
+ if (VMDebug.getMethodTracingMode() == 0) {
+ throw new IllegalStateException("Not tracing.");
+ }
+ } catch (Exception exc) {
+ exc.printStackTrace(System.err);
+ } finally {
+ if (tempFile != null) {
+ tempFile.delete();
+ }
+ }
+ }
+
+ public static void stopDeoptimization() {
+ try {
+ VMDebug.stopMethodTracing();
+ if (VMDebug.getMethodTracingMode() != 0) {
+ throw new IllegalStateException("Still tracing.");
+ }
+ } catch (Exception exc) {
+ exc.printStackTrace(System.err);
+ }
+ }
+
+ private static class VMDebug {
+ private static final Method startMethodTracingMethod;
+ private static final Method stopMethodTracingMethod;
+ private static final Method getMethodTracingModeMethod;
+
+ static {
+ try {
+ Class<?> c = Class.forName("dalvik.system.VMDebug");
+ startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class,
+ Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE);
+ stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing");
+ getMethodTracingModeMethod = c.getDeclaredMethod("getMethodTracingMode");
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static void startMethodTracing(String filename, int bufferSize, int flags,
+ boolean samplingEnabled, int intervalUs) throws Exception {
+ startMethodTracingMethod.invoke(null, filename, bufferSize, flags, samplingEnabled,
+ intervalUs);
+ }
+ public static void stopMethodTracing() throws Exception {
+ stopMethodTracingMethod.invoke(null);
+ }
+ public static int getMethodTracingMode() throws Exception {
+ return (int) getMethodTracingModeMethod.invoke(null);
+ }
+ }
+}
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
new file mode 100644
index 0000000..7485163
--- /dev/null
+++ b/test/570-checker-osr/src/Main.java
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+ public static void main(String[] args) {
+ System.loadLibrary(args[0]);
+ if ($noinline$returnInt() != 53) {
+ throw new Error("Unexpected return value");
+ }
+ if ($noinline$returnFloat() != 42.2f) {
+ throw new Error("Unexpected return value");
+ }
+ if ($noinline$returnDouble() != Double.longBitsToDouble(0xF000000000001111L)) {
+ throw new Error("Unexpected return value ");
+ }
+ if ($noinline$returnLong() != 0xFFFF000000001111L) {
+ throw new Error("Unexpected return value");
+ }
+
+ try {
+ $noinline$deopt();
+ } catch (Exception e) {}
+ }
+
+ public static int $noinline$returnInt() {
+ if (doThrow) throw new Error("");
+ int i = 0;
+ for (; i < 100000000; ++i) {
+ }
+ while (!ensureInOsrCode()) {}
+ System.out.println(i);
+ return 53;
+ }
+
+ public static float $noinline$returnFloat() {
+ if (doThrow) throw new Error("");
+ int i = 0;
+ for (; i < 200000000; ++i) {
+ }
+ while (!ensureInOsrCode()) {}
+ System.out.println(i);
+ return 42.2f;
+ }
+
+ public static double $noinline$returnDouble() {
+ if (doThrow) throw new Error("");
+ int i = 0;
+ for (; i < 300000000; ++i) {
+ }
+ while (!ensureInOsrCode()) {}
+ System.out.println(i);
+ return Double.longBitsToDouble(0xF000000000001111L);
+ }
+
+ public static long $noinline$returnLong() {
+ if (doThrow) throw new Error("");
+ int i = 1000000;
+ for (; i < 400000000; ++i) {
+ }
+ while (!ensureInOsrCode()) {}
+ System.out.println(i);
+ return 0xFFFF000000001111L;
+ }
+
+ public static void $noinline$deopt() {
+ if (doThrow) throw new Error("");
+ int i = 0;
+ for (; i < 100000000; ++i) {
+ }
+ while (!ensureInOsrCode()) {}
+ DeoptimizationController.startDeoptimization();
+ }
+
+ public static int[] array = new int[4];
+
+ public static native boolean ensureInOsrCode();
+
+ public static boolean doThrow = false;
+}
diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk
index faaf1f0..e547c72 100644
--- a/test/Android.libarttest.mk
+++ b/test/Android.libarttest.mk
@@ -40,7 +40,8 @@
466-get-live-vreg/get_live_vreg_jni.cc \
497-inlining-and-class-loader/clear_dex_cache.cc \
543-env-long-ref/env_long_ref.cc \
- 566-polymorphic-inlining/polymorphic_inline.cc
+ 566-polymorphic-inlining/polymorphic_inline.cc \
+ 570-checker-osr/osr.cc
ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so
ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so