diff options
author | 2016-02-05 16:51:53 +0000 | |
---|---|---|
committer | 2016-02-05 17:11:54 +0000 | |
commit | b331febbab8e916680faba722cc84b66b84218a3 (patch) | |
tree | 35f985b021e476914bfe91492da23fee218014a7 /compiler/optimizing | |
parent | 586996afc905518ed926e4680aab67bedabec9b7 (diff) |
Revert "Revert "Implement on-stack replacement for arm/arm64/x86/x86_64.""
This reverts commit bd89a5c556324062b7d841843b039392e84cfaf4.
Change-Id: I08d190431520baa7fcec8fbdb444519f25ac8d44
Diffstat (limited to 'compiler/optimizing')
-rw-r--r-- | compiler/optimizing/builder.cc | 68 | ||||
-rw-r--r-- | compiler/optimizing/builder.h | 1 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.cc | 110 | ||||
-rw-r--r-- | compiler/optimizing/code_generator.h | 2 | ||||
-rw-r--r-- | compiler/optimizing/inliner.cc | 1 | ||||
-rw-r--r-- | compiler/optimizing/nodes.cc | 4 | ||||
-rw-r--r-- | compiler/optimizing/nodes.h | 79 | ||||
-rw-r--r-- | compiler/optimizing/optimizing_compiler.cc | 42 |
8 files changed, 221 insertions, 86 deletions
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc index c7430e7eb6..8d77daf183 100644 --- a/compiler/optimizing/builder.cc +++ b/compiler/optimizing/builder.cc @@ -72,74 +72,6 @@ class Temporaries : public ValueObject { size_t index_; }; -class SwitchTable : public ValueObject { - public: - SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse) - : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) { - int32_t table_offset = instruction.VRegB_31t(); - const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset; - if (sparse) { - CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature)); - } else { - CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature)); - } - num_entries_ = table[1]; - values_ = reinterpret_cast<const int32_t*>(&table[2]); - } - - uint16_t GetNumEntries() const { - return num_entries_; - } - - void CheckIndex(size_t index) const { - if (sparse_) { - // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. - DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_)); - } else { - // In a packed table, we have the starting key and num_entries_ values. - DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_)); - } - } - - int32_t GetEntryAt(size_t index) const { - CheckIndex(index); - return values_[index]; - } - - uint32_t GetDexPcForIndex(size_t index) const { - CheckIndex(index); - return dex_pc_ + - (reinterpret_cast<const int16_t*>(values_ + index) - - reinterpret_cast<const int16_t*>(&instruction_)); - } - - // Index of the first value in the table. - size_t GetFirstValueIndex() const { - if (sparse_) { - // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. - return num_entries_; - } else { - // In a packed table, we have the starting key and num_entries_ values. - return 1; - } - } - - private: - const Instruction& instruction_; - const uint32_t dex_pc_; - - // Whether this is a sparse-switch table (or a packed-switch one). - const bool sparse_; - - // This can't be const as it needs to be computed off of the given instruction, and complicated - // expressions in the initializer list seemed very ugly. - uint16_t num_entries_; - - const int32_t* values_; - - DISALLOW_COPY_AND_ASSIGN(SwitchTable); -}; - void HGraphBuilder::InitializeLocals(uint16_t count) { graph_->SetNumberOfVRegs(count); locals_.resize(count); diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h index 1d604e7135..93e17d6422 100644 --- a/compiler/optimizing/builder.h +++ b/compiler/optimizing/builder.h @@ -30,7 +30,6 @@ namespace art { class Instruction; -class SwitchTable; class HGraphBuilder : public ValueObject { public: diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc index a3bbfdbd27..e1b83f05d6 100644 --- a/compiler/optimizing/code_generator.cc +++ b/compiler/optimizing/code_generator.cc @@ -629,8 +629,76 @@ size_t CodeGenerator::ComputeStackMapsSize() { return stack_map_stream_.PrepareForFillIn(); } -void CodeGenerator::BuildStackMaps(MemoryRegion region) { +static void CheckCovers(uint32_t dex_pc, + const HGraph& graph, + const CodeInfo& code_info, + const ArenaVector<HSuspendCheck*>& loop_headers, + ArenaVector<size_t>* covered) { + StackMapEncoding encoding = code_info.ExtractEncoding(); + for (size_t i = 0; i < loop_headers.size(); ++i) { + if (loop_headers[i]->GetDexPc() == dex_pc) { + if (graph.IsCompilingOsr()) { + DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid()); + } + ++(*covered)[i]; + } + } +} + +// Debug helper to ensure loop entries in compiled code are matched by +// dex branch instructions. +static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph, + const CodeInfo& code_info, + const DexFile::CodeItem& code_item) { + if (graph.HasTryCatch()) { + // One can write loops through try/catch, which we do not support for OSR anyway. + return; + } + ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc)); + for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) { + if (it.Current()->IsLoopHeader()) { + HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck(); + if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) { + loop_headers.push_back(suspend_check); + } + } + } + ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc)); + const uint16_t* code_ptr = code_item.insns_; + const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_; + + size_t dex_pc = 0; + while (code_ptr < code_end) { + const Instruction& instruction = *Instruction::At(code_ptr); + if (instruction.IsBranch()) { + uint32_t target = dex_pc + instruction.GetTargetOffset(); + CheckCovers(target, graph, code_info, loop_headers, &covered); + } else if (instruction.IsSwitch()) { + SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH); + uint16_t num_entries = table.GetNumEntries(); + size_t offset = table.GetFirstValueIndex(); + + // Use a larger loop counter type to avoid overflow issues. + for (size_t i = 0; i < num_entries; ++i) { + // The target of the case. + uint32_t target = dex_pc + table.GetEntryAt(i + offset); + CheckCovers(target, graph, code_info, loop_headers, &covered); + } + } + dex_pc += instruction.SizeInCodeUnits(); + code_ptr += instruction.SizeInCodeUnits(); + } + + for (size_t i = 0; i < covered.size(); ++i) { + DCHECK_NE(covered[i], 0u) << "Loop in compiled code has no dex branch equivalent"; + } +} + +void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) { stack_map_stream_.FillIn(region); + if (kIsDebugBuild) { + CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item); + } } void CodeGenerator::RecordPcInfo(HInstruction* instruction, @@ -705,6 +773,46 @@ void CodeGenerator::RecordPcInfo(HInstruction* instruction, EmitEnvironment(instruction->GetEnvironment(), slow_path); stack_map_stream_.EndStackMapEntry(); + + HLoopInformation* info = instruction->GetBlock()->GetLoopInformation(); + if (instruction->IsSuspendCheck() && + (info != nullptr) && + graph_->IsCompilingOsr() && + (inlining_depth == 0)) { + DCHECK_EQ(info->GetSuspendCheck(), instruction); + // We duplicate the stack map as a marker that this stack map can be an OSR entry. + // Duplicating it avoids having the runtime recognize and skip an OSR stack map. + DCHECK(info->IsIrreducible()); + stack_map_stream_.BeginStackMapEntry( + dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0); + EmitEnvironment(instruction->GetEnvironment(), slow_path); + stack_map_stream_.EndStackMapEntry(); + if (kIsDebugBuild) { + HEnvironment* environment = instruction->GetEnvironment(); + for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) { + HInstruction* in_environment = environment->GetInstructionAt(i); + if (in_environment != nullptr) { + DCHECK(in_environment->IsPhi() || in_environment->IsConstant()); + Location location = environment->GetLocationAt(i); + DCHECK(location.IsStackSlot() || + location.IsDoubleStackSlot() || + location.IsConstant() || + location.IsInvalid()); + if (location.IsStackSlot() || location.IsDoubleStackSlot()) { + DCHECK_LT(location.GetStackIndex(), static_cast<int32_t>(GetFrameSize())); + } + } + } + } + } else if (kIsDebugBuild) { + // Ensure stack maps are unique, by checking that the native pc in the stack map + // last emitted is different than the native pc of the stack map just emitted. + size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps(); + if (number_of_stack_maps > 1) { + DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_offset, + stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_offset); + } + } } bool CodeGenerator::HasStackMapAtCurrentPc() { diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h index 4f8f146753..0a688cf649 100644 --- a/compiler/optimizing/code_generator.h +++ b/compiler/optimizing/code_generator.h @@ -288,7 +288,7 @@ class CodeGenerator { slow_paths_.push_back(slow_path); } - void BuildStackMaps(MemoryRegion region); + void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item); size_t ComputeStackMapsSize(); bool IsLeafMethod() const { diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc index 9b91b53813..a8841d31c5 100644 --- a/compiler/optimizing/inliner.cc +++ b/compiler/optimizing/inliner.cc @@ -758,6 +758,7 @@ bool HInliner::TryBuildAndInline(ArtMethod* resolved_method, compiler_driver_->GetInstructionSet(), invoke_type, graph_->IsDebuggable(), + /* osr */ false, graph_->GetCurrentInstructionId()); callee_graph->SetArtMethod(resolved_method); diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc index 3dda8501d2..f269885907 100644 --- a/compiler/optimizing/nodes.cc +++ b/compiler/optimizing/nodes.cc @@ -647,6 +647,10 @@ void HLoopInformation::Populate() { header_->GetGraph()->SetHasIrreducibleLoops(true); PopulateIrreducibleRecursive(back_edge); } else { + if (header_->GetGraph()->IsCompilingOsr()) { + irreducible_ = true; + header_->GetGraph()->SetHasIrreducibleLoops(true); + } PopulateRecursive(back_edge); } } diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h index b8083477cf..116b1c6b1c 100644 --- a/compiler/optimizing/nodes.h +++ b/compiler/optimizing/nodes.h @@ -274,6 +274,7 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { InstructionSet instruction_set, InvokeType invoke_type = kInvalidInvokeType, bool debuggable = false, + bool osr = false, int start_instruction_id = 0) : arena_(arena), blocks_(arena->Adapter(kArenaAllocBlockList)), @@ -302,7 +303,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)), cached_current_method_(nullptr), - inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) { + inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()), + osr_(osr) { blocks_.reserve(kDefaultNumberOfBlocks); } @@ -478,6 +480,8 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { return instruction_set_; } + bool IsCompilingOsr() const { return osr_; } + bool HasTryCatch() const { return has_try_catch_; } void SetHasTryCatch(bool value) { has_try_catch_ = value; } @@ -606,6 +610,11 @@ class HGraph : public ArenaObject<kArenaAllocGraph> { // collection pointer to passes which may create NullConstant. ReferenceTypeInfo inexact_object_rti_; + // Whether we are compiling this graph for on stack replacement: this will + // make all loops seen as irreducible and emit special stack maps to mark + // compiled code entries which the interpreter can directly jump to. + const bool osr_; + friend class SsaBuilder; // For caching constants. friend class SsaLivenessAnalysis; // For the linear order. ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1); @@ -6040,6 +6049,74 @@ inline bool IsSameDexFile(const DexFile& lhs, const DexFile& rhs) { FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK) #undef INSTRUCTION_TYPE_CHECK +class SwitchTable : public ValueObject { + public: + SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse) + : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) { + int32_t table_offset = instruction.VRegB_31t(); + const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset; + if (sparse) { + CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature)); + } else { + CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature)); + } + num_entries_ = table[1]; + values_ = reinterpret_cast<const int32_t*>(&table[2]); + } + + uint16_t GetNumEntries() const { + return num_entries_; + } + + void CheckIndex(size_t index) const { + if (sparse_) { + // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. + DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_)); + } else { + // In a packed table, we have the starting key and num_entries_ values. + DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_)); + } + } + + int32_t GetEntryAt(size_t index) const { + CheckIndex(index); + return values_[index]; + } + + uint32_t GetDexPcForIndex(size_t index) const { + CheckIndex(index); + return dex_pc_ + + (reinterpret_cast<const int16_t*>(values_ + index) - + reinterpret_cast<const int16_t*>(&instruction_)); + } + + // Index of the first value in the table. + size_t GetFirstValueIndex() const { + if (sparse_) { + // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order. + return num_entries_; + } else { + // In a packed table, we have the starting key and num_entries_ values. + return 1; + } + } + + private: + const Instruction& instruction_; + const uint32_t dex_pc_; + + // Whether this is a sparse-switch table (or a packed-switch one). + const bool sparse_; + + // This can't be const as it needs to be computed off of the given instruction, and complicated + // expressions in the initializer list seemed very ugly. + uint16_t num_entries_; + + const int32_t* values_; + + DISALLOW_COPY_AND_ASSIGN(SwitchTable); +}; + } // namespace art #endif // ART_COMPILER_OPTIMIZING_NODES_H_ diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc index bdc664b3eb..736ac32011 100644 --- a/compiler/optimizing/optimizing_compiler.cc +++ b/compiler/optimizing/optimizing_compiler.cc @@ -300,7 +300,7 @@ class OptimizingCompiler FINAL : public Compiler { } } - bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method) + bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr) OVERRIDE SHARED_REQUIRES(Locks::mutator_lock_); @@ -309,7 +309,8 @@ class OptimizingCompiler FINAL : public Compiler { CompiledMethod* Emit(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* driver) const; + CompilerDriver* driver, + const DexFile::CodeItem* item) const; // Try compiling a method and return the code generator used for // compiling it. @@ -327,7 +328,8 @@ class OptimizingCompiler FINAL : public Compiler { uint32_t method_idx, jobject class_loader, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const; + Handle<mirror::DexCache> dex_cache, + bool osr) const; std::unique_ptr<OptimizingCompilerStats> compilation_stats_; @@ -580,11 +582,12 @@ static ArenaVector<LinkerPatch> EmitAndSortLinkerPatches(CodeGenerator* codegen) CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena, CodeVectorAllocator* code_allocator, CodeGenerator* codegen, - CompilerDriver* compiler_driver) const { + CompilerDriver* compiler_driver, + const DexFile::CodeItem* code_item) const { ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen); ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps)); stack_map.resize(codegen->ComputeStackMapsSize()); - codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size())); + codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item); CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod( compiler_driver, @@ -615,7 +618,8 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, uint32_t method_idx, jobject class_loader, const DexFile& dex_file, - Handle<mirror::DexCache> dex_cache) const { + Handle<mirror::DexCache> dex_cache, + bool osr) const { MaybeRecordStat(MethodCompilationStat::kAttemptCompilation); CompilerDriver* compiler_driver = GetCompilerDriver(); InstructionSet instruction_set = compiler_driver->GetInstructionSet(); @@ -663,8 +667,14 @@ CodeGenerator* OptimizingCompiler::TryCompile(ArenaAllocator* arena, dex_compilation_unit.GetDexFile(), dex_compilation_unit.GetClassDefIndex()); HGraph* graph = new (arena) HGraph( - arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(), - kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable()); + arena, + dex_file, + method_idx, + requires_barrier, + compiler_driver->GetInstructionSet(), + kInvalidInvokeType, + compiler_driver->GetCompilerOptions().GetDebuggable(), + osr); std::unique_ptr<CodeGenerator> codegen( CodeGenerator::Create(graph, @@ -797,10 +807,11 @@ CompiledMethod* OptimizingCompiler::Compile(const DexFile::CodeItem* code_item, method_idx, jclass_loader, dex_file, - dex_cache)); + dex_cache, + /* osr */ false)); if (codegen.get() != nullptr) { MaybeRecordStat(MethodCompilationStat::kCompiled); - method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver); + method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item); } } else { if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) { @@ -843,7 +854,8 @@ bool IsCompilingWithCoreImage() { bool OptimizingCompiler::JitCompile(Thread* self, jit::JitCodeCache* code_cache, - ArtMethod* method) { + ArtMethod* method, + bool osr) { StackHandleScope<2> hs(self); Handle<mirror::ClassLoader> class_loader(hs.NewHandle( method->GetDeclaringClass()->GetClassLoader())); @@ -873,7 +885,8 @@ bool OptimizingCompiler::JitCompile(Thread* self, method_idx, jclass_loader, *dex_file, - dex_cache)); + dex_cache, + osr)); if (codegen.get() == nullptr) { return false; } @@ -885,7 +898,7 @@ bool OptimizingCompiler::JitCompile(Thread* self, return false; } MaybeRecordStat(MethodCompilationStat::kCompiled); - codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size)); + codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item); const void* code = code_cache->CommitCode( self, method, @@ -896,7 +909,8 @@ bool OptimizingCompiler::JitCompile(Thread* self, codegen->GetCoreSpillMask(), codegen->GetFpuSpillMask(), code_allocator.GetMemory().data(), - code_allocator.GetSize()); + code_allocator.GetSize(), + osr); if (code == nullptr) { code_cache->ClearData(self, stack_map_data); |