diff options
author | 2016-02-05 16:51:53 +0000 | |
---|---|---|
committer | 2016-02-05 17:11:54 +0000 | |
commit | b331febbab8e916680faba722cc84b66b84218a3 (patch) | |
tree | 35f985b021e476914bfe91492da23fee218014a7 | |
parent | 586996afc905518ed926e4680aab67bedabec9b7 (diff) |
Revert "Revert "Implement on-stack replacement for arm/arm64/x86/x86_64.""
This reverts commit bd89a5c556324062b7d841843b039392e84cfaf4.
Change-Id: I08d190431520baa7fcec8fbdb444519f25ac8d44
38 files changed, 1101 insertions, 154 deletions
diff --git a/compiler/compiler.h b/compiler/compiler.h index 3a9ce1bc0e..97c60de8c0 100644 --- a/compiler/compiler.h +++ b/compiler/compiler.h @@ -64,7 +64,8 @@ class Compiler { virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED, jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED, - ArtMethod* method ATTRIBUTE_UNUSED) + ArtMethod* method ATTRIBUTE_UNUSED, + bool osr ATTRIBUTE_UNUSED) SHARED_REQUIRES(Locks::mutator_lock_) { return false; } diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc index 67747586c4..68f4783741 100644 --- a/compiler/jit/jit_compiler.cc +++ b/compiler/jit/jit_compiler.cc @@ -60,11 +60,12 @@ extern "C" void jit_unload(void* handle) { delete reinterpret_cast<JitCompiler*>(handle); } -extern "C" bool jit_compile_method(void* handle, ArtMethod* method, Thread* self) +extern "C" bool jit_compile_method( + void* handle, ArtMethod* method, Thread* self, bool osr) SHARED_REQUIRES(Locks::mutator_lock_) { auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle); DCHECK(jit_compiler != nullptr); - return jit_compiler->CompileMethod(self, method); + return jit_compiler->CompileMethod(self, method, osr); } extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count) @@ -201,7 +202,7 @@ JitCompiler::~JitCompiler() { } } -bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { +bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) { TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit)); const uint64_t start_time = NanoTime(); StackHandleScope<2> hs(self); @@ -223,8 +224,8 @@ bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) { // of that proxy method, as the compiler does not expect a proxy method. ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*)); JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache(); - success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile); - if (success && perf_file_ != nullptr) { + success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile, osr); + if (success && (perf_file_ != nullptr)) { const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode(); std::ostringstream stream; stream << std::hex diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h index 037a18ac7a..5294d0ee35 100644 --- a/compiler/jit/jit_compiler.h +++ b/compiler/jit/jit_compiler.h @@ -37,7 +37,7 @@ class JitCompiler { public: static JitCompiler* Create(); virtual ~JitCompiler(); - bool CompileMethod(Thread* self, ArtMethod* method) + bool CompileMethod(Thread* self, ArtMethod* method, bool osr) SHARED_REQUIRES(Locks::mutator_lock_); CompilerCallbacks* GetCompilerCallbacks() const; size_t GetTotalCompileTime() const { diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index c7430e7eb6..8d77daf183 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -72,74 +72,6 @@ class Temporaries : public ValueObject { size_t index_; }; -class SwitchTable : public ValueObject { - public: - SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse) - : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) { - int32_t table_offset = instruction.VRegB_31t(); - const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset; - if (sparse) { - CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature)); - } else { - CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature)); - } - num_entries_ = table[1]; - values_ = reinterpret_cast<const int32_t*>(&table[2]); - } - - uint16_t GetNumEntries() const { - return num_entries_; - } - - void CheckIndex(size_t index) const { - if (sparse_) { - // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. - DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_)); - } else { - // In a packed table, we have the starting key and num_entries_ values. - DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_)); - } - } - - int32_t GetEntryAt(size_t index) const { - CheckIndex(index); - return values_[index]; - } - - uint32_t GetDexPcForIndex(size_t index) const { - CheckIndex(index); - return dex_pc_ + - (reinterpret_cast<const int16_t*>(values_ + index) - - reinterpret_cast<const int16_t*>(&instruction_)); - } - - // Index of the first value in the table. - size_t GetFirstValueIndex() const { - if (sparse_) { - // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. - return num_entries_; - } else { - // In a packed table, we have the starting key and num_entries_ values. - return 1; - } - } - - private: - const Instruction& instruction_; - const uint32_t dex_pc_; - - // Whether this is a sparse-switch table (or a packed-switch one). - const bool sparse_; - - // This can't be const as it needs to be computed off of the given instruction, and complicated - // expressions in the initializer list seemed very ugly. - uint16_t num_entries_; - - const int32_t* values_; - - DISALLOW_COPY_AND_ASSIGN(SwitchTable); -}; - void HGraphBuilder::InitializeLocals(uint16_t count) { graph_->SetNumberOfVRegs(count); locals_.resize(count); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 1d604e7135..93e17d6422 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -30,7 +30,6 @@ namespace art { class Instruction; -class SwitchTable; class HGraphBuilder : public ValueObject { public: diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index a3bbfdbd27..e1b83f05d6 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -629,8 +629,76 @@ size_t CodeGenerator::ComputeStackMapsSize() { return stack_map_stream_.PrepareForFillIn(); } -void CodeGenerator::BuildStackMaps(MemoryRegion region) { +static void CheckCovers(uint32_t dex_pc, + const HGraph& graph, + const CodeInfo& code_info, + const ArenaVector<HSuspendCheck*>& loop_headers, + ArenaVector<size_t>* covered) { + StackMapEncoding encoding = code_info.ExtractEncoding(); + for (size_t i = 0; i < loop_headers.size(); ++i) { + if (loop_headers[i]->GetDexPc() == dex_pc) { + if (graph.IsCompilingOsr()) { + DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid()); + } + ++(*covered)[i]; + } + } +} + +// Debug helper to ensure loop entries in compiled code are matched by +// dex branch instructions. +static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, + const CodeInfo& code_info, + const DexFile::CodeItem& code_item) { + if (graph.HasTryCatch()) { + // One can write loops through try/catch, which we do not support for OSR anyway. + return; + } + ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc)); + for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) { + if (it.Current()->IsLoopHeader()) { + HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck(); + if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { + loop_headers.push_back(suspend_check); + } + } + } + ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc)); + const uint16_t* code_ptr = code_item.insns_; + const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_; + + size_t dex_pc = 0; + while (code_ptr < code_end) { + const Instruction& instruction = *Instruction::At(code_ptr); + if (instruction.IsBranch()) { + uint32_t target = dex_pc + instruction.GetTargetOffset(); + CheckCovers(target, graph, code_info, loop_headers, &covered); + } else if (instruction.IsSwitch()) { + SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH); + uint16_t num_entries = table.GetNumEntries(); + size_t offset = table.GetFirstValueIndex(); + + // Use a larger loop counter type to avoid overflow issues. + for (size_t i = 0; i < num_entries; ++i) { + // The target of the case. + uint32_t target = dex_pc + table.GetEntryAt(i + offset); + CheckCovers(target, graph, code_info, loop_headers, &covered); + } + } + dex_pc += instruction.SizeInCodeUnits(); + code_ptr += instruction.SizeInCodeUnits(); + } + + for (size_t i = 0; i < covered.size(); ++i) { + DCHECK_NE(covered[i], 0u) << "Loop in compiled code has no dex branch equivalent"; + } +} + +void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) { stack_map_stream_.FillIn(region); + if (kIsDebugBuild) { + CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item); + } } void CodeGenerator::RecordPcInfo(HInstruction* instruction, @@ -705,6 +773,46 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, EmitEnvironment(instruction->GetEnvironment(), slow_path); stack_map_stream_.EndStackMapEntry(); + + HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); + if (instruction->IsSuspendCheck() && + (info != nullptr) && + graph_->IsCompilingOsr() && + (inlining_depth == 0)) { + DCHECK_EQ(info->GetSuspendCheck(), instruction); + // We duplicate the stack map as a marker that this stack map can be an OSR entry. + // Duplicating it avoids having the runtime recognize and skip an OSR stack map. + DCHECK(info->IsIrreducible()); + stack_map_stream_.BeginStackMapEntry( + dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0); + EmitEnvironment(instruction->GetEnvironment(), slow_path); + stack_map_stream_.EndStackMapEntry(); + if (kIsDebugBuild) { + HEnvironment* environment = instruction->GetEnvironment(); + for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { + HInstruction* in_environment = environment->GetInstructionAt(i); + if (in_environment != nullptr) { + DCHECK(in_environment->IsPhi() || in_environment->IsConstant()); + Location location = environment->GetLocationAt(i); + DCHECK(location.IsStackSlot() || + location.IsDoubleStackSlot() || + location.IsConstant() || + location.IsInvalid()); + if (location.IsStackSlot() || location.IsDoubleStackSlot()) { + DCHECK_LT(location.GetStackIndex(), static_cast<int32_t>(GetFrameSize())); + } + } + } + } + } else if (kIsDebugBuild) { + // Ensure stack maps are unique, by checking that the native pc in the stack map + // last emitted is different than the native pc of the stack map just emitted. + size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); + if (number_of_stack_maps > 1) { + DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_offset, + stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_offset); + } + } } bool CodeGenerator::HasStackMapAtCurrentPc() { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 4f8f146753..0a688cf649 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -288,7 +288,7 @@ class CodeGenerator { slow_paths_.push_back(slow_path); } - void BuildStackMaps(MemoryRegion region); + void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item); size_t ComputeStackMapsSize(); bool IsLeafMethod() const { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 9b91b53813..a8841d31c5 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -758,6 +758,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, compiler_driver_->GetInstructionSet(), invoke_type, graph_->IsDebuggable(), + /* osr */ false, graph_->GetCurrentInstructionId()); callee_graph->SetArtMethod(resolved_method); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 3dda8501d2..f269885907 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -647,6 +647,10 @@ void HLoopInformation::Populate() { header_->GetGraph()->SetHasIrreducibleLoops(true); PopulateIrreducibleRecursive(back_edge); } else { + if (header_->GetGraph()->IsCompilingOsr()) { + irreducible_ = true; + header_->GetGraph()->SetHasIrreducibleLoops(true); + } PopulateRecursive(back_edge); } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b8083477cf..116b1c6b1c 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -274,6 +274,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { InstructionSet instruction_set, InvokeType invoke_type = kInvalidInvokeType, bool debuggable = false, + bool osr = false, int start_instruction_id = 0) : arena_(arena), blocks_(arena->Adapter(kArenaAllocBlockList)), @@ -302,7 +303,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_current_method_(nullptr), - inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) { + inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()), + osr_(osr) { blocks_.reserve(kDefaultNumberOfBlocks); } @@ -478,6 +480,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return instruction_set_; } + bool IsCompilingOsr() const { return osr_; } + bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } @@ -606,6 +610,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // collection pointer to passes which may create NullConstant. ReferenceTypeInfo inexact_object_rti_; + // Whether we are compiling this graph for on stack replacement: this will + // make all loops seen as irreducible and emit special stack maps to mark + // compiled code entries which the interpreter can directly jump to. + const bool osr_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); @@ -6040,6 +6049,74 @@ inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) { FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) #undef INSTRUCTION_TYPE_CHECK +class SwitchTable : public ValueObject { + public: + SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse) + : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) { + int32_t table_offset = instruction.VRegB_31t(); + const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset; + if (sparse) { + CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature)); + } else { + CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature)); + } + num_entries_ = table[1]; + values_ = reinterpret_cast<const int32_t*>(&table[2]); + } + + uint16_t GetNumEntries() const { + return num_entries_; + } + + void CheckIndex(size_t index) const { + if (sparse_) { + // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. + DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_)); + } else { + // In a packed table, we have the starting key and num_entries_ values. + DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_)); + } + } + + int32_t GetEntryAt(size_t index) const { + CheckIndex(index); + return values_[index]; + } + + uint32_t GetDexPcForIndex(size_t index) const { + CheckIndex(index); + return dex_pc_ + + (reinterpret_cast<const int16_t*>(values_ + index) - + reinterpret_cast<const int16_t*>(&instruction_)); + } + + // Index of the first value in the table. + size_t GetFirstValueIndex() const { + if (sparse_) { + // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. + return num_entries_; + } else { + // In a packed table, we have the starting key and num_entries_ values. + return 1; + } + } + + private: + const Instruction& instruction_; + const uint32_t dex_pc_; + + // Whether this is a sparse-switch table (or a packed-switch one). + const bool sparse_; + + // This can't be const as it needs to be computed off of the given instruction, and complicated + // expressions in the initializer list seemed very ugly. + uint16_t num_entries_; + + const int32_t* values_; + + DISALLOW_COPY_AND_ASSIGN(SwitchTable); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bdc664b3eb..736ac32011 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -300,7 +300,7 @@ class OptimizingCompiler FINAL : public Compiler { } } - bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method) + bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_); @@ -309,7 +309,8 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* Emit(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* driver) const; + CompilerDriver* driver, + const DexFile::CodeItem* item) const; // Try compiling a method and return the code generator used for // compiling it. @@ -327,7 +328,8 @@ class OptimizingCompiler FINAL : public Compiler { uint32_t method_idx, jobject class_loader, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const; + Handle<mirror::DexCache> dex_cache, + bool osr) const; std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -580,11 +582,12 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* compiler_driver) const { + CompilerDriver* compiler_driver, + const DexFile::CodeItem* code_item) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps)); stack_map.resize(codegen->ComputeStackMapsSize()); - codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size())); + codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, @@ -615,7 +618,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, uint32_t method_idx, jobject class_loader, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const { + Handle<mirror::DexCache> dex_cache, + bool osr) const { MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); @@ -663,8 +667,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, dex_compilation_unit.GetDexFile(), dex_compilation_unit.GetClassDefIndex()); HGraph* graph = new (arena) HGraph( - arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(), - kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable()); + arena, + dex_file, + method_idx, + requires_barrier, + compiler_driver->GetInstructionSet(), + kInvalidInvokeType, + compiler_driver->GetCompilerOptions().GetDebuggable(), + osr); std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, @@ -797,10 +807,11 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, method_idx, jclass_loader, dex_file, - dex_cache)); + dex_cache, + /* osr */ false)); if (codegen.get() != nullptr) { MaybeRecordStat(MethodCompilationStat::kCompiled); - method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver); + method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item); } } else { if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { @@ -843,7 +854,8 @@ bool IsCompilingWithCoreImage() { bool OptimizingCompiler::JitCompile(Thread* self, jit::JitCodeCache* code_cache, - ArtMethod* method) { + ArtMethod* method, + bool osr) { StackHandleScope<2> hs(self); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( method->GetDeclaringClass()->GetClassLoader())); @@ -873,7 +885,8 @@ bool OptimizingCompiler::JitCompile(Thread* self, method_idx, jclass_loader, *dex_file, - dex_cache)); + dex_cache, + osr)); if (codegen.get() == nullptr) { return false; } @@ -885,7 +898,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, return false; } MaybeRecordStat(MethodCompilationStat::kCompiled); - codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size)); + codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item); const void* code = code_cache->CommitCode( self, method, @@ -896,7 +909,8 @@ bool OptimizingCompiler::JitCompile(Thread* self, codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), code_allocator.GetMemory().data(), - code_allocator.GetSize()); + code_allocator.GetSize(), + osr); if (code == nullptr) { code_cache->ClearData(self, stack_map_data); diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S index 631b784787..b3a2979a26 100644 --- a/runtime/arch/arm/quick_entrypoints_arm.S +++ b/runtime/arch/arm/quick_entrypoints_arm.S @@ -429,6 +429,56 @@ ENTRY art_quick_invoke_stub_internal END art_quick_invoke_stub_internal /* + * On stack replacement stub. + * On entry: + * r0 = stack to copy + * r1 = size of stack + * r2 = pc to call + * r3 = JValue* result + * [sp] = shorty + * [sp + 4] = thread + */ +ENTRY art_quick_osr_stub + SPILL_ALL_CALLEE_SAVE_GPRS @ Spill regs (9) + mov r11, sp @ Save the stack pointer + mov r10, r1 @ Save size of stack + ldr r9, [r11, #40] @ Move managed thread pointer into r9 + mov r8, r2 @ Save the pc to call + sub r7, sp, #12 @ Reserve space for stack pointer, JValue result, and ArtMethod* slot + and r7, #0xFFFFFFF0 @ Align stack pointer + mov sp, r7 @ Update stack pointer + str r11, [sp, #4] @ Save old stack pointer + str r3, [sp, #8] @ Save JValue result + mov ip, #0 + str ip, [sp] @ Store null for ArtMethod* at bottom of frame + sub sp, sp, r1 @ Reserve space for callee stack + mov r2, r1 + mov r1, r0 + mov r0, sp + bl memcpy @ memcpy (dest r0, src r1, bytes r2) + bl .Losr_entry @ Call the method + ldr r11, [sp, #4] @ Restore saved stack pointer + ldr r10, [sp, #8] @ Restire JValue result + mov sp, r11 @ Restore stack pointer. + ldr r4, [sp, #36] @ load shorty + ldr r4, [r4, #0] @ load return type + cmp r4, #68 @ Test if result type char == 'D'. + beq .Losr_fp_result + cmp r4, #70 @ Test if result type char == 'F'. + beq .Losr_fp_result + strd r0, [r10] @ Store r0/r1 into result pointer + b .Losr_exit +.Losr_fp_result: + vstr d0, [r10] @ Store s0-s1/d0 into result pointer +.Losr_exit: + pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} +.Losr_entry: + sub r10, r10, #4 + str lr, [sp, r10] @ Store link register per the compiler ABI + bx r8 +END art_quick_osr_stub + + /* * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_ */ ARM_ENTRY art_quick_do_long_jump diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S index 9ccabad1cc..e8480087a7 100644 --- a/runtime/arch/arm64/quick_entrypoints_arm64.S +++ b/runtime/arch/arm64/quick_entrypoints_arm64.S @@ -915,6 +915,105 @@ END art_quick_invoke_static_stub +/* extern"C" void art_quick_osr_stub(void** stack, x0 + * size_t stack_size_in_bytes, x1 + * const uin8_t* native_pc, x2 + * JValue *result, x3 + * char *shorty, x4 + * Thread *self) x5 + */ +ENTRY art_quick_osr_stub +SAVE_SIZE=15*8 // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved. + mov x9, sp // Save stack pointer. + .cfi_register sp,x9 + + sub x10, sp, # SAVE_SIZE + and x10, x10, # ~0xf // Enforce 16 byte stack alignment. + mov sp, x10 // Set new SP. + + str x28, [sp, #112] + stp x26, x27, [sp, #96] + stp x24, x25, [sp, #80] + stp x22, x23, [sp, #64] + stp x20, x21, [sp, #48] + stp x9, x19, [sp, #32] // Save old stack pointer and x19. + stp x3, x4, [sp, #16] // Save result and shorty addresses. + stp xFP, xLR, [sp] // Store LR & FP. + mov xSELF, x5 // Move thread pointer into SELF register. + + sub sp, sp, #16 + str xzr, [sp] // Store null for ArtMethod* slot + // Branch to stub. + bl .Losr_entry + add sp, sp, #16 + + // Restore return value address and shorty address. + ldp x3,x4, [sp, #16] + ldr x28, [sp, #112] + ldp x26, x27, [sp, #96] + ldp x24, x25, [sp, #80] + ldp x22, x23, [sp, #64] + ldp x20, x21, [sp, #48] + + // Store result (w0/x0/s0/d0) appropriately, depending on resultType. + ldrb w10, [x4] + + // Check the return type and store the correct register into the jvalue in memory. + + // Don't set anything for a void type. + cmp w10, #'V' + beq .Losr_exit + + // Is it a double? + cmp w10, #'D' + bne .Lno_double + str d0, [x3] + b .Losr_exit + +.Lno_double: // Is it a float? + cmp w10, #'F' + bne .Lno_float + str s0, [x3] + b .Losr_exit + +.Lno_float: // Just store x0. Doesn't matter if it is 64 or 32 bits. + str x0, [x3] + +.Losr_exit: // Finish up. + ldp x2, x19, [sp, #32] // Restore stack pointer and x19. + ldp xFP, xLR, [sp] // Restore old frame pointer and link register. + mov sp, x2 + ret + +.Losr_entry: + // Update stack pointer for the callee + sub sp, sp, x1 + + // Update link register slot expected by the callee. + sub w1, w1, #8 + str lr, [sp, x1] + + // Copy arguments into stack frame. + // Use simple copy routine for now. + // 4 bytes per slot. + // X0 - source address + // W1 - args length + // SP - destination address. + // W10 - temporary +.Losr_loop_entry: + cmp w1, #0 + beq .Losr_loop_exit + sub w1, w1, #4 + ldr w10, [x0, x1] + str w10, [sp, x1] + b .Losr_loop_entry + +.Losr_loop_exit: + // Branch to the OSR entry point. + br x2 + +END art_quick_osr_stub + /* * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_ */ diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S index da30331845..fbee5d7724 100644 --- a/runtime/arch/x86/quick_entrypoints_x86.S +++ b/runtime/arch/x86/quick_entrypoints_x86.S @@ -1712,5 +1712,65 @@ DEFINE_FUNCTION art_quick_read_barrier_for_root_slow ret END_FUNCTION art_quick_read_barrier_for_root_slow + /* + * On stack replacement stub. + * On entry: + * [sp] = return address + * [sp + 4] = stack to copy + * [sp + 8] = size of stack + * [sp + 12] = pc to call + * [sp + 16] = JValue* result + * [sp + 20] = shorty + * [sp + 24] = thread + */ +DEFINE_FUNCTION art_quick_osr_stub + // Save native callee saves. + PUSH ebp + PUSH ebx + PUSH esi + PUSH edi + mov 4+16(%esp), %esi // ESI = argument array + mov 8+16(%esp), %ecx // ECX = size of args + mov 12+16(%esp), %ebx // EBX = pc to call + mov %esp, %ebp // Save stack pointer + andl LITERAL(0xFFFFFFF0), %esp // Align stack + PUSH ebp // Save old stack pointer + subl LITERAL(12), %esp // Align stack + movl LITERAL(0), (%esp) // Store null for ArtMethod* slot + call .Losr_entry + + // Restore stack pointer. + addl LITERAL(12), %esp + POP ebp + mov %ebp, %esp + + // Restore callee saves. + POP edi + POP esi + POP ebx + POP ebp + mov 16(%esp), %ecx // Get JValue result + mov %eax, (%ecx) // Store the result assuming it is a long, int or Object* + mov %edx, 4(%ecx) // Store the other half of the result + mov 20(%esp), %edx // Get the shorty + cmpb LITERAL(68), (%edx) // Test if result type char == 'D' + je .Losr_return_double_quick + cmpb LITERAL(70), (%edx) // Test if result type char == 'F' + je .Losr_return_float_quick + ret +.Losr_return_double_quick: + movsd %xmm0, (%ecx) // Store the floating point result + ret +.Losr_return_float_quick: + movss %xmm0, (%ecx) // Store the floating point result + ret +.Losr_entry: + subl LITERAL(4), %ecx // Given stack size contains pushed frame pointer, substract it. + subl %ecx, %esp + mov %esp, %edi // EDI = beginning of stack + rep movsb // while (ecx--) { *edi++ = *esi++ } + jmp *%ebx +END_FUNCTION art_quick_osr_stub + // TODO: implement these! UNIMPLEMENTED art_quick_memcmp16 diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S index 883da96059..d6e0f1c1a3 100644 --- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S +++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S @@ -1744,3 +1744,62 @@ DEFINE_FUNCTION art_quick_read_barrier_for_root_slow RESTORE_FP_CALLEE_SAVE_FRAME ret END_FUNCTION art_quick_read_barrier_for_root_slow + + /* + * On stack replacement stub. + * On entry: + * [sp] = return address + * rdi = stack to copy + * rsi = size of stack + * rdx = pc to call + * rcx = JValue* result + * r8 = shorty + * r9 = thread + */ +DEFINE_FUNCTION art_quick_osr_stub + // Save the non-volatiles. + PUSH rbp // Save rbp. + PUSH rcx // Save rcx/result*. + PUSH r8 // Save r8/shorty*. + + // Save callee saves. + PUSH rbx + PUSH r12 + PUSH r13 + PUSH r14 + PUSH r15 + + pushq LITERAL(0) // Push null for ArtMethod*. + movl %esi, %ecx // rcx := size of stack + movq %rdi, %rsi // rsi := stack to copy + call .Losr_entry + + // Restore stack and callee-saves. + addq LITERAL(8), %rsp + POP r15 + POP r14 + POP r13 + POP r12 + POP rbx + POP r8 + POP rcx + POP rbp + cmpb LITERAL(68), (%r8) // Test if result type char == 'D'. + je .Losr_return_double_quick + cmpb LITERAL(70), (%r8) // Test if result type char == 'F'. + je .Losr_return_float_quick + movq %rax, (%rcx) // Store the result assuming its a long, int or Object* + ret +.Losr_return_double_quick: + movsd %xmm0, (%rcx) // Store the double floating point result. + ret +.Losr_return_float_quick: + movss %xmm0, (%rcx) // Store the floating point result. + ret +.Losr_entry: + subl LITERAL(8), %ecx // Given stack size contains pushed frame pointer, substract it. + subq %rcx, %rsp + movq %rsp, %rdi // rdi := beginning of stack + rep movsb // while (rcx--) { *rdi++ = *rsi++ } + jmp *%rdx +END_FUNCTION art_quick_osr_stub diff --git a/runtime/art_method.cc b/runtime/art_method.cc index 6f36016d25..cd38e16cf7 100644 --- a/runtime/art_method.cc +++ b/runtime/art_method.cc @@ -292,22 +292,7 @@ void ArtMethod::Invoke(Thread* self, uint32_t* args, uint32_t args_size, JValue* // Unusual case where we were running generated code and an // exception was thrown to force the activations to be removed from the // stack. Continue execution in the interpreter. - self->ClearException(); - ShadowFrame* shadow_frame = - self->PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame); - mirror::Throwable* pending_exception = nullptr; - bool from_code = false; - self->PopDeoptimizationContext(result, &pending_exception, &from_code); - CHECK(!from_code); - self->SetTopOfStack(nullptr); - self->SetTopOfShadowStack(shadow_frame); - - // Restore the exception that was pending before deoptimization then interpret the - // deoptimized frames. - if (pending_exception != nullptr) { - self->SetException(pending_exception); - } - interpreter::EnterInterpreterFromDeoptimize(self, shadow_frame, from_code, result); + self->DeoptimizeWithDeoptimizationException(result); } if (kLogInvocationStartAndReturn) { LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(), diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc index b5a55bfa44..3dfad767bd 100644 --- a/runtime/entrypoints/entrypoint_utils.cc +++ b/runtime/entrypoints/entrypoint_utils.cc @@ -273,15 +273,15 @@ ArtMethod* GetCalleeSaveMethodCaller(ArtMethod** sp, if (outer_method != nullptr) { const OatQuickMethodHeader* current_code = outer_method->GetOatQuickMethodHeader(caller_pc); if (current_code->IsOptimized()) { - uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc); - CodeInfo code_info = current_code->GetOptimizedCodeInfo(); - StackMapEncoding encoding = code_info.ExtractEncoding(); - StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding); - DCHECK(stack_map.IsValid()); - if (stack_map.HasInlineInfo(encoding)) { - InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); - caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1); - } + uintptr_t native_pc_offset = current_code->NativeQuickPcOffset(caller_pc); + CodeInfo code_info = current_code->GetOptimizedCodeInfo(); + StackMapEncoding encoding = code_info.ExtractEncoding(); + StackMap stack_map = code_info.GetStackMapForNativePcOffset(native_pc_offset, encoding); + DCHECK(stack_map.IsValid()); + if (stack_map.HasInlineInfo(encoding)) { + InlineInfo inline_info = code_info.GetInlineInfoOf(stack_map, encoding); + caller = GetResolvedMethod(outer_method, inline_info, inline_info.GetDepth() - 1); + } } } if (kIsDebugBuild && do_caller_check) { diff --git a/runtime/interpreter/interpreter_goto_table_impl.cc b/runtime/interpreter/interpreter_goto_table_impl.cc index 940d34449a..ca8598e5e6 100644 --- a/runtime/interpreter/interpreter_goto_table_impl.cc +++ b/runtime/interpreter/interpreter_goto_table_impl.cc @@ -21,6 +21,7 @@ #include "base/stl_util.h" // MakeUnique #include "experimental_flags.h" #include "interpreter_common.h" +#include "jit/jit.h" #include "safe_math.h" #include <memory> // std::unique_ptr @@ -63,10 +64,15 @@ namespace interpreter { currentHandlersTable = handlersTable[ \ Runtime::Current()->GetInstrumentation()->GetInterpreterHandlerTable()] -#define BRANCH_INSTRUMENTATION(offset) \ - do { \ +#define BRANCH_INSTRUMENTATION(offset) \ + do { \ + ArtMethod* method = shadow_frame.GetMethod(); \ instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation(); \ - instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \ + instrumentation->Branch(self, method, dex_pc, offset); \ + JValue result; \ + if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \ + return result; \ + } \ } while (false) #define UNREACHABLE_CODE_CHECK() \ diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc index f6069785a2..25dbab2494 100644 --- a/runtime/interpreter/interpreter_switch_impl.cc +++ b/runtime/interpreter/interpreter_switch_impl.cc @@ -17,6 +17,7 @@ #include "base/stl_util.h" // MakeUnique #include "experimental_flags.h" #include "interpreter_common.h" +#include "jit/jit.h" #include "safe_math.h" #include <memory> // std::unique_ptr @@ -69,9 +70,14 @@ namespace interpreter { } \ } while (false) -#define BRANCH_INSTRUMENTATION(offset) \ - do { \ - instrumentation->Branch(self, shadow_frame.GetMethod(), dex_pc, offset); \ +#define BRANCH_INSTRUMENTATION(offset) \ + do { \ + ArtMethod* method = shadow_frame.GetMethod(); \ + instrumentation->Branch(self, method, dex_pc, offset); \ + JValue result; \ + if (jit::Jit::MaybeDoOnStackReplacement(self, method, dex_pc, offset, &result)) { \ + return result; \ + } \ } while (false) static bool IsExperimentalInstructionEnabled(const Instruction *inst) { diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc index fa5c41d7ae..3e152e1ccd 100644 --- a/runtime/jit/jit.cc +++ b/runtime/jit/jit.cc @@ -25,10 +25,12 @@ #include "jit_code_cache.h" #include "jit_instrumentation.h" #include "oat_file_manager.h" +#include "oat_quick_method_header.h" #include "offline_profiling_info.h" #include "profile_saver.h" #include "runtime.h" #include "runtime_options.h" +#include "stack_map.h" #include "utils.h" namespace art { @@ -43,6 +45,8 @@ JitOptions* JitOptions::CreateFromRuntimeArguments(const RuntimeArgumentMap& opt options.GetOrDefault(RuntimeArgumentMap::JITCodeCacheMaxCapacity); jit_options->compile_threshold_ = options.GetOrDefault(RuntimeArgumentMap::JITCompileThreshold); + // TODO(ngeoffray): Make this a proper option. + jit_options->osr_threshold_ = jit_options->compile_threshold_ * 2; jit_options->warmup_threshold_ = options.GetOrDefault(RuntimeArgumentMap::JITWarmupThreshold); jit_options->dump_info_on_shutdown_ = @@ -121,7 +125,7 @@ bool Jit::LoadCompiler(std::string* error_msg) { *error_msg = "JIT couldn't find jit_unload entry point"; return false; } - jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*)>( + jit_compile_method_ = reinterpret_cast<bool (*)(void*, ArtMethod*, Thread*, bool)>( dlsym(jit_library_handle_, "jit_compile_method")); if (jit_compile_method_ == nullptr) { dlclose(jit_library_handle_); @@ -156,7 +160,7 @@ bool Jit::LoadCompiler(std::string* error_msg) { return true; } -bool Jit::CompileMethod(ArtMethod* method, Thread* self) { +bool Jit::CompileMethod(ArtMethod* method, Thread* self, bool osr) { DCHECK(!method->IsRuntimeMethod()); // Don't compile the method if it has breakpoints. if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) { @@ -171,10 +175,11 @@ bool Jit::CompileMethod(ArtMethod* method, Thread* self) { return false; } - if (!code_cache_->NotifyCompilationOf(method, self)) { + if (!code_cache_->NotifyCompilationOf(method, self, osr)) { + VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to code cache"; return false; } - bool success = jit_compile_method_(jit_compiler_handle_, method, self); + bool success = jit_compile_method_(jit_compiler_handle_, method, self, osr); code_cache_->DoneCompiling(method, self); return success; } @@ -224,9 +229,11 @@ Jit::~Jit() { } } -void Jit::CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold) { +void Jit::CreateInstrumentationCache(size_t compile_threshold, + size_t warmup_threshold, + size_t osr_threshold) { instrumentation_cache_.reset( - new jit::JitInstrumentationCache(compile_threshold, warmup_threshold)); + new jit::JitInstrumentationCache(compile_threshold, warmup_threshold, osr_threshold)); } void Jit::NewTypeLoadedIfUsingJit(mirror::Class* type) { @@ -255,5 +262,120 @@ void Jit::DumpTypeInfoForLoadedTypes(ClassLinker* linker) { } } +extern "C" void art_quick_osr_stub(void** stack, + uint32_t stack_size_in_bytes, + const uint8_t* native_pc, + JValue* result, + const char* shorty, + Thread* self); + +bool Jit::MaybeDoOnStackReplacement(Thread* thread, + ArtMethod* method, + uint32_t dex_pc, + int32_t dex_pc_offset, + JValue* result) { + Jit* jit = Runtime::Current()->GetJit(); + if (jit == nullptr) { + return false; + } + + if (kRuntimeISA == kMips || kRuntimeISA == kMips64) { + VLOG(jit) << "OSR not supported on this platform"; + return false; + } + + // Cheap check if the method has been compiled already. That's an indicator that we should + // osr into it. + if (!jit->GetCodeCache()->ContainsPc(method->GetEntryPointFromQuickCompiledCode())) { + return false; + } + + const OatQuickMethodHeader* osr_method = jit->GetCodeCache()->LookupOsrMethodHeader(method); + if (osr_method == nullptr) { + // No osr method yet, just return to the interpreter. + return false; + } + + const size_t number_of_vregs = method->GetCodeItem()->registers_size_; + CodeInfo code_info = osr_method->GetOptimizedCodeInfo(); + StackMapEncoding encoding = code_info.ExtractEncoding(); + + // Find stack map starting at the target dex_pc. + StackMap stack_map = code_info.GetOsrStackMapForDexPc(dex_pc + dex_pc_offset, encoding); + if (!stack_map.IsValid()) { + // There is no OSR stack map for this dex pc offset. Just return to the interpreter in the + // hope that the next branch has one. + return false; + } + + // We found a stack map, now fill the frame with dex register values from the interpreter's + // shadow frame. + DexRegisterMap vreg_map = + code_info.GetDexRegisterMapOf(stack_map, encoding, number_of_vregs); + + ShadowFrame* shadow_frame = thread->PopShadowFrame(); + + size_t frame_size = osr_method->GetFrameSizeInBytes(); + void** memory = reinterpret_cast<void**>(malloc(frame_size)); + memset(memory, 0, frame_size); + + // Art ABI: ArtMethod is at the bottom of the stack. + memory[0] = method; + + if (!vreg_map.IsValid()) { + // If we don't have a dex register map, then there are no live dex registers at + // this dex pc. + } else { + for (uint16_t vreg = 0; vreg < number_of_vregs; ++vreg) { + DexRegisterLocation::Kind location = + vreg_map.GetLocationKind(vreg, number_of_vregs, code_info, encoding); + if (location == DexRegisterLocation::Kind::kNone) { + // Dex register is dead or unitialized. + continue; + } + + if (location == DexRegisterLocation::Kind::kConstant) { + // We skip constants because the compiled code knows how to handle them. + continue; + } + + DCHECK(location == DexRegisterLocation::Kind::kInStack); + + int32_t vreg_value = shadow_frame->GetVReg(vreg); + int32_t slot_offset = vreg_map.GetStackOffsetInBytes(vreg, + number_of_vregs, + code_info, + encoding); + DCHECK_LT(slot_offset, static_cast<int32_t>(frame_size)); + DCHECK_GT(slot_offset, 0); + (reinterpret_cast<int32_t*>(memory))[slot_offset / sizeof(int32_t)] = vreg_value; + } + } + + const uint8_t* native_pc = stack_map.GetNativePcOffset(encoding) + osr_method->GetEntryPoint(); + VLOG(jit) << "Jumping to " + << PrettyMethod(method) + << "@" + << std::hex << reinterpret_cast<uintptr_t>(native_pc); + { + ManagedStack fragment; + thread->PushManagedStackFragment(&fragment); + (*art_quick_osr_stub)(memory, + frame_size, + native_pc, + result, + method->GetInterfaceMethodIfProxy(sizeof(void*))->GetShorty(), + thread); + if (UNLIKELY(thread->GetException() == Thread::GetDeoptimizationException())) { + thread->DeoptimizeWithDeoptimizationException(result); + } + thread->PopManagedStackFragment(fragment); + } + free(memory); + thread->PushShadowFrame(shadow_frame); + VLOG(jit) << "Done running OSR code for " << PrettyMethod(method); + return true; +} + } // namespace jit } // namespace art diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h index a80f51f397..042da92b3b 100644 --- a/runtime/jit/jit.h +++ b/runtime/jit/jit.h @@ -49,9 +49,11 @@ class Jit { virtual ~Jit(); static Jit* Create(JitOptions* options, std::string* error_msg); - bool CompileMethod(ArtMethod* method, Thread* self) + bool CompileMethod(ArtMethod* method, Thread* self, bool osr) SHARED_REQUIRES(Locks::mutator_lock_); - void CreateInstrumentationCache(size_t compile_threshold, size_t warmup_threshold); + void CreateInstrumentationCache(size_t compile_threshold, + size_t warmup_threshold, + size_t osr_threshold); void CreateThreadPool(); CompilerCallbacks* GetCompilerCallbacks() { return compiler_callbacks_; @@ -88,6 +90,17 @@ class Jit { bool JitAtFirstUse(); + // If an OSR compiled version is available for `method`, + // and `dex_pc + dex_pc_offset` is an entry point of that compiled + // version, this method will jump to the compiled code, let it run, + // and return true afterwards. Return false otherwise. + static bool MaybeDoOnStackReplacement(Thread* thread, + ArtMethod* method, + uint32_t dex_pc, + int32_t dex_pc_offset, + JValue* result) + SHARED_REQUIRES(Locks::mutator_lock_); + private: Jit(); bool LoadCompiler(std::string* error_msg); @@ -97,7 +110,7 @@ class Jit { void* jit_compiler_handle_; void* (*jit_load_)(CompilerCallbacks**, bool*); void (*jit_unload_)(void*); - bool (*jit_compile_method_)(void*, ArtMethod*, Thread*); + bool (*jit_compile_method_)(void*, ArtMethod*, Thread*, bool); void (*jit_types_loaded_)(void*, mirror::Class**, size_t count); // Performance monitoring. @@ -123,6 +136,9 @@ class JitOptions { size_t GetWarmupThreshold() const { return warmup_threshold_; } + size_t GetOsrThreshold() const { + return osr_threshold_; + } size_t GetCodeCacheInitialCapacity() const { return code_cache_initial_capacity_; } @@ -155,6 +171,7 @@ class JitOptions { size_t code_cache_max_capacity_; size_t compile_threshold_; size_t warmup_threshold_; + size_t osr_threshold_; bool dump_info_on_shutdown_; bool save_profiling_info_; diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc index f325949c6f..464c441e8e 100644 --- a/runtime/jit/jit_code_cache.cc +++ b/runtime/jit/jit_code_cache.cc @@ -184,7 +184,8 @@ uint8_t* JitCodeCache::CommitCode(Thread* self, size_t core_spill_mask, size_t fp_spill_mask, const uint8_t* code, - size_t code_size) { + size_t code_size, + bool osr) { uint8_t* result = CommitCodeInternal(self, method, mapping_table, @@ -194,7 +195,8 @@ uint8_t* JitCodeCache::CommitCode(Thread* self, core_spill_mask, fp_spill_mask, code, - code_size); + code_size, + osr); if (result == nullptr) { // Retry. GarbageCollectCache(self); @@ -207,7 +209,8 @@ uint8_t* JitCodeCache::CommitCode(Thread* self, core_spill_mask, fp_spill_mask, code, - code_size); + code_size, + osr); } return result; } @@ -287,7 +290,8 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self, size_t core_spill_mask, size_t fp_spill_mask, const uint8_t* code, - size_t code_size) { + size_t code_size, + bool osr) { size_t alignment = GetInstructionSetAlignment(kRuntimeISA); // Ensure the header ends up at expected instruction alignment. size_t header_size = RoundUp(sizeof(OatQuickMethodHeader), alignment); @@ -329,8 +333,12 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self, { MutexLock mu(self, lock_); method_code_map_.Put(code_ptr, method); - Runtime::Current()->GetInstrumentation()->UpdateMethodsCode( - method, method_header->GetEntryPoint()); + if (osr) { + osr_code_map_.Put(method, code_ptr); + } else { + Runtime::Current()->GetInstrumentation()->UpdateMethodsCode( + method, method_header->GetEntryPoint()); + } if (collection_in_progress_) { // We need to update the live bitmap if there is a GC to ensure it sees this new // code. @@ -338,7 +346,7 @@ uint8_t* JitCodeCache::CommitCodeInternal(Thread* self, } last_update_time_ns_.StoreRelease(NanoTime()); VLOG(jit) - << "JIT added " + << "JIT added (osr = " << std::boolalpha << osr << std::noboolalpha << ") " << PrettyMethod(method) << "@" << method << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": " << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": " @@ -569,6 +577,10 @@ void JitCodeCache::GarbageCollectCache(Thread* self) { info->GetMethod()->SetProfilingInfo(nullptr); } } + + // Empty osr method map, as osr compile code will be deleted (except the ones + // on thread stacks). + osr_code_map_.clear(); } // Run a checkpoint on all threads to mark the JIT compiled code they are running. @@ -662,6 +674,15 @@ OatQuickMethodHeader* JitCodeCache::LookupMethodHeader(uintptr_t pc, ArtMethod* return method_header; } +OatQuickMethodHeader* JitCodeCache::LookupOsrMethodHeader(ArtMethod* method) { + MutexLock mu(Thread::Current(), lock_); + auto it = osr_code_map_.find(method); + if (it == osr_code_map_.end()) { + return nullptr; + } + return OatQuickMethodHeader::FromCodePointer(it->second); +} + ProfilingInfo* JitCodeCache::AddProfilingInfo(Thread* self, ArtMethod* method, const std::vector<uint32_t>& entries, @@ -733,12 +754,15 @@ uint64_t JitCodeCache::GetLastUpdateTimeNs() const { return last_update_time_ns_.LoadAcquire(); } -bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self) { - if (ContainsPc(method->GetEntryPointFromQuickCompiledCode())) { +bool JitCodeCache::NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr) { + if (!osr && ContainsPc(method->GetEntryPointFromQuickCompiledCode())) { return false; } MutexLock mu(self, lock_); + if (osr && (osr_code_map_.find(method) != osr_code_map_.end())) { + return false; + } ProfilingInfo* info = method->GetProfilingInfo(sizeof(void*)); if (info == nullptr || info->IsMethodBeingCompiled()) { return false; diff --git a/runtime/jit/jit_code_cache.h b/runtime/jit/jit_code_cache.h index 69fc5532c1..048f8d064e 100644 --- a/runtime/jit/jit_code_cache.h +++ b/runtime/jit/jit_code_cache.h @@ -71,7 +71,7 @@ class JitCodeCache { // Number of compilations done throughout the lifetime of the JIT. size_t NumberOfCompilations() REQUIRES(!lock_); - bool NotifyCompilationOf(ArtMethod* method, Thread* self) + bool NotifyCompilationOf(ArtMethod* method, Thread* self, bool osr) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_); @@ -89,7 +89,8 @@ class JitCodeCache { size_t core_spill_mask, size_t fp_spill_mask, const uint8_t* code, - size_t code_size) + size_t code_size, + bool osr) SHARED_REQUIRES(Locks::mutator_lock_) REQUIRES(!lock_); @@ -131,6 +132,10 @@ class JitCodeCache { REQUIRES(!lock_) SHARED_REQUIRES(Locks::mutator_lock_); + OatQuickMethodHeader* LookupOsrMethodHeader(ArtMethod* method) + REQUIRES(!lock_) + SHARED_REQUIRES(Locks::mutator_lock_); + // Remove all methods in our cache that were allocated by 'alloc'. void RemoveMethodsIn(Thread* self, const LinearAlloc& alloc) REQUIRES(!lock_) @@ -187,7 +192,8 @@ class JitCodeCache { size_t core_spill_mask, size_t fp_spill_mask, const uint8_t* code, - size_t code_size) + size_t code_size, + bool osr) REQUIRES(!lock_) SHARED_REQUIRES(Locks::mutator_lock_); @@ -237,8 +243,10 @@ class JitCodeCache { void* data_mspace_ GUARDED_BY(lock_); // Bitmap for collecting code and data. std::unique_ptr<CodeCacheBitmap> live_bitmap_; - // This map holds compiled code associated to the ArtMethod. + // Holds compiled code associated to the ArtMethod. SafeMap<const void*, ArtMethod*> method_code_map_ GUARDED_BY(lock_); + // Holds osr compiled code associated to the ArtMethod. + SafeMap<ArtMethod*, const void*> osr_code_map_ GUARDED_BY(lock_); // ProfilingInfo objects we have allocated. std::vector<ProfilingInfo*> profiling_infos_ GUARDED_BY(lock_); diff --git a/runtime/jit/jit_instrumentation.cc b/runtime/jit/jit_instrumentation.cc index d597b36e95..a4e40ad3fd 100644 --- a/runtime/jit/jit_instrumentation.cc +++ b/runtime/jit/jit_instrumentation.cc @@ -29,7 +29,8 @@ class JitCompileTask FINAL : public Task { public: enum TaskKind { kAllocateProfile, - kCompile + kCompile, + kCompileOsr }; JitCompileTask(ArtMethod* method, TaskKind kind) : method_(method), kind_(kind) { @@ -48,9 +49,14 @@ class JitCompileTask FINAL : public Task { ScopedObjectAccess soa(self); if (kind_ == kCompile) { VLOG(jit) << "JitCompileTask compiling method " << PrettyMethod(method_); - if (!Runtime::Current()->GetJit()->CompileMethod(method_, self)) { + if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ false)) { VLOG(jit) << "Failed to compile method " << PrettyMethod(method_); } + } else if (kind_ == kCompileOsr) { + VLOG(jit) << "JitCompileTask compiling method osr " << PrettyMethod(method_); + if (!Runtime::Current()->GetJit()->CompileMethod(method_, self, /* osr */ true)) { + VLOG(jit) << "Failed to compile method osr " << PrettyMethod(method_); + } } else { DCHECK(kind_ == kAllocateProfile); if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) { @@ -72,9 +78,11 @@ class JitCompileTask FINAL : public Task { }; JitInstrumentationCache::JitInstrumentationCache(size_t hot_method_threshold, - size_t warm_method_threshold) + size_t warm_method_threshold, + size_t osr_method_threshold) : hot_method_threshold_(hot_method_threshold), warm_method_threshold_(warm_method_threshold), + osr_method_threshold_(osr_method_threshold), listener_(this) { } @@ -151,6 +159,11 @@ void JitInstrumentationCache::AddSamples(Thread* self, ArtMethod* method, size_t DCHECK(thread_pool_ != nullptr); thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompile)); } + + if (sample_count == osr_method_threshold_) { + DCHECK(thread_pool_ != nullptr); + thread_pool_->AddTask(self, new JitCompileTask(method, JitCompileTask::kCompileOsr)); + } } JitInstrumentationListener::JitInstrumentationListener(JitInstrumentationCache* cache) diff --git a/runtime/jit/jit_instrumentation.h b/runtime/jit/jit_instrumentation.h index 06559ad2e4..d1c5c44a07 100644 --- a/runtime/jit/jit_instrumentation.h +++ b/runtime/jit/jit_instrumentation.h @@ -96,7 +96,9 @@ class JitInstrumentationListener : public instrumentation::InstrumentationListen // Keeps track of which methods are hot. class JitInstrumentationCache { public: - JitInstrumentationCache(size_t hot_method_threshold, size_t warm_method_threshold); + JitInstrumentationCache(size_t hot_method_threshold, + size_t warm_method_threshold, + size_t osr_method_threshold); void AddSamples(Thread* self, ArtMethod* method, size_t samples) SHARED_REQUIRES(Locks::mutator_lock_); void CreateThreadPool(); @@ -112,6 +114,7 @@ class JitInstrumentationCache { private: size_t hot_method_threshold_; size_t warm_method_threshold_; + size_t osr_method_threshold_; JitInstrumentationListener listener_; std::unique_ptr<ThreadPool> thread_pool_; diff --git a/runtime/oat_quick_method_header.h b/runtime/oat_quick_method_header.h index 564373958a..2b7eca2859 100644 --- a/runtime/oat_quick_method_header.h +++ b/runtime/oat_quick_method_header.h @@ -108,7 +108,7 @@ class PACKED(4) OatQuickMethodHeader { } template <bool kCheckFrameSize = true> - uint32_t GetFrameSizeInBytes() { + uint32_t GetFrameSizeInBytes() const { uint32_t result = frame_info_.FrameSizeInBytes(); if (kCheckFrameSize) { DCHECK_LE(static_cast<size_t>(kStackAlignment), result); diff --git a/runtime/runtime.cc b/runtime/runtime.cc index b1b7473acb..1b59c6fde0 100644 --- a/runtime/runtime.cc +++ b/runtime/runtime.cc @@ -1887,7 +1887,8 @@ void Runtime::CreateJit() { if (jit_.get() != nullptr) { compiler_callbacks_ = jit_->GetCompilerCallbacks(); jit_->CreateInstrumentationCache(jit_options_->GetCompileThreshold(), - jit_options_->GetWarmupThreshold()); + jit_options_->GetWarmupThreshold(), + jit_options_->GetOsrThreshold()); jit_->CreateThreadPool(); // Notify native debugger about the classes already loaded before the creation of the jit. diff --git a/runtime/stack.cc b/runtime/stack.cc index 5faff93b97..1e82860b7b 100644 --- a/runtime/stack.cc +++ b/runtime/stack.cc @@ -40,7 +40,7 @@ namespace art { -static constexpr bool kDebugStackWalk = false; +static constexpr bool kDebugStackWalk = true; mirror::Object* ShadowFrame::GetThisObject() const { ArtMethod* m = GetMethod(); diff --git a/runtime/stack_map.h b/runtime/stack_map.h index 84185ce49f..97eb805501 100644 --- a/runtime/stack_map.h +++ b/runtime/stack_map.h @@ -1195,6 +1195,35 @@ class CodeInfo { return StackMap(); } + StackMap GetOsrStackMapForDexPc(uint32_t dex_pc, const StackMapEncoding& encoding) const { + size_t e = GetNumberOfStackMaps(); + if (e == 0) { + // There cannot be OSR stack map if there is no stack map. + return StackMap(); + } + // Walk over all stack maps. If two consecutive stack maps are identical, then we + // have found a stack map suitable for OSR. + for (size_t i = 0; i < e - 1; ++i) { + StackMap stack_map = GetStackMapAt(i, encoding); + if (stack_map.GetDexPc(encoding) == dex_pc) { + StackMap other = GetStackMapAt(i + 1, encoding); + if (other.GetDexPc(encoding) == dex_pc && + other.GetNativePcOffset(encoding) == stack_map.GetNativePcOffset(encoding)) { + DCHECK_EQ(other.GetDexRegisterMapOffset(encoding), + stack_map.GetDexRegisterMapOffset(encoding)); + DCHECK(!stack_map.HasInlineInfo(encoding)); + if (i < e - 2) { + // Make sure there are not three identical stack maps following each other. + DCHECK_NE(stack_map.GetNativePcOffset(encoding), + GetStackMapAt(i + 2, encoding).GetNativePcOffset(encoding)); + } + return stack_map; + } + } + } + return StackMap(); + } + StackMap GetStackMapForNativePcOffset(uint32_t native_pc_offset, const StackMapEncoding& encoding) const { // TODO: Safepoint stack maps are sorted by native_pc_offset but catch stack diff --git a/runtime/thread.cc b/runtime/thread.cc index 2abcd67c2d..c0fb0cd068 100644 --- a/runtime/thread.cc +++ b/runtime/thread.cc @@ -3012,4 +3012,25 @@ size_t Thread::NumberOfHeldMutexes() const { return count; } + +void Thread::DeoptimizeWithDeoptimizationException(JValue* result) { + DCHECK_EQ(GetException(), Thread::GetDeoptimizationException()); + ClearException(); + ShadowFrame* shadow_frame = + PopStackedShadowFrame(StackedShadowFrameType::kDeoptimizationShadowFrame); + mirror::Throwable* pending_exception = nullptr; + bool from_code = false; + PopDeoptimizationContext(result, &pending_exception, &from_code); + CHECK(!from_code) << "Deoptimizing from code should be done with single frame deoptimization"; + SetTopOfStack(nullptr); + SetTopOfShadowStack(shadow_frame); + + // Restore the exception that was pending before deoptimization then interpret the + // deoptimized frames. + if (pending_exception != nullptr) { + SetException(pending_exception); + } + interpreter::EnterInterpreterFromDeoptimize(this, shadow_frame, from_code, result); +} + } // namespace art diff --git a/runtime/thread.h b/runtime/thread.h index d7887ca42f..0660cd7db4 100644 --- a/runtime/thread.h +++ b/runtime/thread.h @@ -552,6 +552,9 @@ class Thread { OFFSETOF_MEMBER(tls_32bit_sized_values, is_gc_marking)); } + // Deoptimize the Java stack. + void DeoptimizeWithDeoptimizationException(JValue* result) SHARED_REQUIRES(Locks::mutator_lock_); + private: template<size_t pointer_size> static ThreadOffset<pointer_size> ThreadOffsetFromTlsPtr(size_t tls_ptr_offset) { diff --git a/test/570-checker-osr/expected.txt b/test/570-checker-osr/expected.txt new file mode 100644 index 0000000000..555c6a91d8 --- /dev/null +++ b/test/570-checker-osr/expected.txt @@ -0,0 +1,5 @@ +JNI_OnLoad called +100000000 +200000000 +300000000 +400000000 diff --git a/test/570-checker-osr/info.txt b/test/570-checker-osr/info.txt new file mode 100644 index 0000000000..e69de29bb2 --- /dev/null +++ b/test/570-checker-osr/info.txt diff --git a/test/570-checker-osr/osr.cc b/test/570-checker-osr/osr.cc new file mode 100644 index 0000000000..fb846872e6 --- /dev/null +++ b/test/570-checker-osr/osr.cc @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "art_method.h" +#include "jit/jit.h" +#include "jit/jit_code_cache.h" +#include "oat_quick_method_header.h" +#include "scoped_thread_state_change.h" +#include "stack_map.h" + +namespace art { + +class OsrVisitor : public StackVisitor { + public: + explicit OsrVisitor(Thread* thread) + SHARED_REQUIRES(Locks::mutator_lock_) + : StackVisitor(thread, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames), + in_osr_method_(false) {} + + bool VisitFrame() SHARED_REQUIRES(Locks::mutator_lock_) { + ArtMethod* m = GetMethod(); + std::string m_name(m->GetName()); + + if ((m_name.compare("$noinline$returnInt") == 0) || + (m_name.compare("$noinline$returnFloat") == 0) || + (m_name.compare("$noinline$returnDouble") == 0) || + (m_name.compare("$noinline$returnLong") == 0) || + (m_name.compare("$noinline$deopt") == 0)) { + const OatQuickMethodHeader* header = + Runtime::Current()->GetJit()->GetCodeCache()->LookupOsrMethodHeader(m); + if (header != nullptr && header == GetCurrentOatQuickMethodHeader()) { + in_osr_method_ = true; + } + return false; + } + return true; + } + + bool in_osr_method_; +}; + +extern "C" JNIEXPORT jboolean JNICALL Java_Main_ensureInOsrCode(JNIEnv*, jclass) { + jit::Jit* jit = Runtime::Current()->GetJit(); + if (jit == nullptr) { + // Just return true for non-jit configurations to stop the infinite loop. + return JNI_TRUE; + } + ScopedObjectAccess soa(Thread::Current()); + OsrVisitor visitor(soa.Self()); + visitor.WalkStack(); + return visitor.in_osr_method_; +} + +} // namespace art diff --git a/test/570-checker-osr/smali/Osr.smali b/test/570-checker-osr/smali/Osr.smali new file mode 100644 index 0000000000..869c7c31b8 --- /dev/null +++ b/test/570-checker-osr/smali/Osr.smali @@ -0,0 +1,35 @@ +# Copyright (C) 2016 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +.class public LOsr; + +.super Ljava/lang/Object; + +# Check that blocks only havig nops are not merged when they are loop headers. +# This ensures we can do on-stack replacement for branches to those nop blocks. + +## CHECK-START: int Osr.simpleLoop(int, int) dead_code_elimination_final (after) +## CHECK-DAG: SuspendCheck loop:<<OuterLoop:B\d+>> outer_loop:none +## CHECK-DAG: SuspendCheck loop:{{B\d+}} outer_loop:<<OuterLoop>> +.method public static simpleLoop(II)I + .registers 3 + const/16 v0, 0 + :nop_entry + nop + :loop_entry + add-int v0, v0, v0 + if-eq v0, v1, :loop_entry + if-eq v0, v2, :nop_entry + return v0 +.end method diff --git a/test/570-checker-osr/src/DeoptimizationController.java b/test/570-checker-osr/src/DeoptimizationController.java new file mode 100644 index 0000000000..907d133d3b --- /dev/null +++ b/test/570-checker-osr/src/DeoptimizationController.java @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2015 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// This file is a copy of 802-deoptimization/src/DeoptimizationController.java +// because run-test requires standalone individual test. + +import java.io.File; +import java.io.IOException; +import java.lang.reflect.Method; + +/** + * Controls deoptimization using dalvik.system.VMDebug class. + */ +public class DeoptimizationController { + private static final String TEMP_FILE_NAME_PREFIX = "test"; + private static final String TEMP_FILE_NAME_SUFFIX = ".trace"; + + private static File createTempFile() throws Exception { + try { + return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX); + } catch (IOException e) { + System.setProperty("java.io.tmpdir", "/data/local/tmp"); + try { + return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX); + } catch (IOException e2) { + System.setProperty("java.io.tmpdir", "/sdcard"); + return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX); + } + } + } + + public static void startDeoptimization() { + File tempFile = null; + try { + tempFile = createTempFile(); + String tempFileName = tempFile.getPath(); + + VMDebug.startMethodTracing(tempFileName, 0, 0, false, 1000); + if (VMDebug.getMethodTracingMode() == 0) { + throw new IllegalStateException("Not tracing."); + } + } catch (Exception exc) { + exc.printStackTrace(System.err); + } finally { + if (tempFile != null) { + tempFile.delete(); + } + } + } + + public static void stopDeoptimization() { + try { + VMDebug.stopMethodTracing(); + if (VMDebug.getMethodTracingMode() != 0) { + throw new IllegalStateException("Still tracing."); + } + } catch (Exception exc) { + exc.printStackTrace(System.err); + } + } + + private static class VMDebug { + private static final Method startMethodTracingMethod; + private static final Method stopMethodTracingMethod; + private static final Method getMethodTracingModeMethod; + + static { + try { + Class<?> c = Class.forName("dalvik.system.VMDebug"); + startMethodTracingMethod = c.getDeclaredMethod("startMethodTracing", String.class, + Integer.TYPE, Integer.TYPE, Boolean.TYPE, Integer.TYPE); + stopMethodTracingMethod = c.getDeclaredMethod("stopMethodTracing"); + getMethodTracingModeMethod = c.getDeclaredMethod("getMethodTracingMode"); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public static void startMethodTracing(String filename, int bufferSize, int flags, + boolean samplingEnabled, int intervalUs) throws Exception { + startMethodTracingMethod.invoke(null, filename, bufferSize, flags, samplingEnabled, + intervalUs); + } + public static void stopMethodTracing() throws Exception { + stopMethodTracingMethod.invoke(null); + } + public static int getMethodTracingMode() throws Exception { + return (int) getMethodTracingModeMethod.invoke(null); + } + } +} diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java new file mode 100644 index 0000000000..7485163314 --- /dev/null +++ b/test/570-checker-osr/src/Main.java @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2016 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public class Main { + public static void main(String[] args) { + System.loadLibrary(args[0]); + if ($noinline$returnInt() != 53) { + throw new Error("Unexpected return value"); + } + if ($noinline$returnFloat() != 42.2f) { + throw new Error("Unexpected return value"); + } + if ($noinline$returnDouble() != Double.longBitsToDouble(0xF000000000001111L)) { + throw new Error("Unexpected return value "); + } + if ($noinline$returnLong() != 0xFFFF000000001111L) { + throw new Error("Unexpected return value"); + } + + try { + $noinline$deopt(); + } catch (Exception e) {} + } + + public static int $noinline$returnInt() { + if (doThrow) throw new Error(""); + int i = 0; + for (; i < 100000000; ++i) { + } + while (!ensureInOsrCode()) {} + System.out.println(i); + return 53; + } + + public static float $noinline$returnFloat() { + if (doThrow) throw new Error(""); + int i = 0; + for (; i < 200000000; ++i) { + } + while (!ensureInOsrCode()) {} + System.out.println(i); + return 42.2f; + } + + public static double $noinline$returnDouble() { + if (doThrow) throw new Error(""); + int i = 0; + for (; i < 300000000; ++i) { + } + while (!ensureInOsrCode()) {} + System.out.println(i); + return Double.longBitsToDouble(0xF000000000001111L); + } + + public static long $noinline$returnLong() { + if (doThrow) throw new Error(""); + int i = 1000000; + for (; i < 400000000; ++i) { + } + while (!ensureInOsrCode()) {} + System.out.println(i); + return 0xFFFF000000001111L; + } + + public static void $noinline$deopt() { + if (doThrow) throw new Error(""); + int i = 0; + for (; i < 100000000; ++i) { + } + while (!ensureInOsrCode()) {} + DeoptimizationController.startDeoptimization(); + } + + public static int[] array = new int[4]; + + public static native boolean ensureInOsrCode(); + + public static boolean doThrow = false; +} diff --git a/test/Android.libarttest.mk b/test/Android.libarttest.mk index faaf1f0d78..e547c72c0e 100644 --- a/test/Android.libarttest.mk +++ b/test/Android.libarttest.mk @@ -40,7 +40,8 @@ LIBARTTEST_COMMON_SRC_FILES := \ 466-get-live-vreg/get_live_vreg_jni.cc \ 497-inlining-and-class-loader/clear_dex_cache.cc \ 543-env-long-ref/env_long_ref.cc \ - 566-polymorphic-inlining/polymorphic_inline.cc + 566-polymorphic-inlining/polymorphic_inline.cc \ + 570-checker-osr/osr.cc ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttest.so ART_TARGET_LIBARTTEST_$(ART_PHONY_TEST_TARGET_SUFFIX) += $(ART_TARGET_TEST_OUT)/$(TARGET_ARCH)/libarttestd.so |