Revert "Revert "Implement on-stack replacement for arm/arm64/x86/x86_64.""
This reverts commit bd89a5c556324062b7d841843b039392e84cfaf4.
Change-Id: I08d190431520baa7fcec8fbdb444519f25ac8d44
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 3a9ce1b..97c60de 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -64,7 +64,8 @@
virtual bool JitCompile(Thread* self ATTRIBUTE_UNUSED,
jit::JitCodeCache* code_cache ATTRIBUTE_UNUSED,
- ArtMethod* method ATTRIBUTE_UNUSED)
+ ArtMethod* method ATTRIBUTE_UNUSED,
+ bool osr ATTRIBUTE_UNUSED)
SHARED_REQUIRES(Locks::mutator_lock_) {
return false;
}
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 6774758..68f4783 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -60,11 +60,12 @@
delete reinterpret_cast<JitCompiler*>(handle);
}
-extern "C" bool jit_compile_method(void* handle, ArtMethod* method, Thread* self)
+extern "C" bool jit_compile_method(
+ void* handle, ArtMethod* method, Thread* self, bool osr)
SHARED_REQUIRES(Locks::mutator_lock_) {
auto* jit_compiler = reinterpret_cast<JitCompiler*>(handle);
DCHECK(jit_compiler != nullptr);
- return jit_compiler->CompileMethod(self, method);
+ return jit_compiler->CompileMethod(self, method, osr);
}
extern "C" void jit_types_loaded(void* handle, mirror::Class** types, size_t count)
@@ -201,7 +202,7 @@
}
}
-bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method) {
+bool JitCompiler::CompileMethod(Thread* self, ArtMethod* method, bool osr) {
TimingLogger logger("JIT compiler timing logger", true, VLOG_IS_ON(jit));
const uint64_t start_time = NanoTime();
StackHandleScope<2> hs(self);
@@ -223,8 +224,8 @@
// of that proxy method, as the compiler does not expect a proxy method.
ArtMethod* method_to_compile = method->GetInterfaceMethodIfProxy(sizeof(void*));
JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
- success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile);
- if (success && perf_file_ != nullptr) {
+ success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method_to_compile, osr);
+ if (success && (perf_file_ != nullptr)) {
const void* ptr = method_to_compile->GetEntryPointFromQuickCompiledCode();
std::ostringstream stream;
stream << std::hex
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index 037a18a..5294d0e 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -37,7 +37,7 @@
public:
static JitCompiler* Create();
virtual ~JitCompiler();
- bool CompileMethod(Thread* self, ArtMethod* method)
+ bool CompileMethod(Thread* self, ArtMethod* method, bool osr)
SHARED_REQUIRES(Locks::mutator_lock_);
CompilerCallbacks* GetCompilerCallbacks() const;
size_t GetTotalCompileTime() const {
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index c7430e7..8d77daf 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -72,74 +72,6 @@
size_t index_;
};
-class SwitchTable : public ValueObject {
- public:
- SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
- : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
- int32_t table_offset = instruction.VRegB_31t();
- const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
- if (sparse) {
- CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
- } else {
- CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
- }
- num_entries_ = table[1];
- values_ = reinterpret_cast<const int32_t*>(&table[2]);
- }
-
- uint16_t GetNumEntries() const {
- return num_entries_;
- }
-
- void CheckIndex(size_t index) const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
- }
- }
-
- int32_t GetEntryAt(size_t index) const {
- CheckIndex(index);
- return values_[index];
- }
-
- uint32_t GetDexPcForIndex(size_t index) const {
- CheckIndex(index);
- return dex_pc_ +
- (reinterpret_cast<const int16_t*>(values_ + index) -
- reinterpret_cast<const int16_t*>(&instruction_));
- }
-
- // Index of the first value in the table.
- size_t GetFirstValueIndex() const {
- if (sparse_) {
- // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
- return num_entries_;
- } else {
- // In a packed table, we have the starting key and num_entries_ values.
- return 1;
- }
- }
-
- private:
- const Instruction& instruction_;
- const uint32_t dex_pc_;
-
- // Whether this is a sparse-switch table (or a packed-switch one).
- const bool sparse_;
-
- // This can't be const as it needs to be computed off of the given instruction, and complicated
- // expressions in the initializer list seemed very ugly.
- uint16_t num_entries_;
-
- const int32_t* values_;
-
- DISALLOW_COPY_AND_ASSIGN(SwitchTable);
-};
-
void HGraphBuilder::InitializeLocals(uint16_t count) {
graph_->SetNumberOfVRegs(count);
locals_.resize(count);
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 1d604e7..93e17d6 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -30,7 +30,6 @@
namespace art {
class Instruction;
-class SwitchTable;
class HGraphBuilder : public ValueObject {
public:
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index a3bbfdb..e1b83f0 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -629,8 +629,76 @@
return stack_map_stream_.PrepareForFillIn();
}
-void CodeGenerator::BuildStackMaps(MemoryRegion region) {
+static void CheckCovers(uint32_t dex_pc,
+ const HGraph& graph,
+ const CodeInfo& code_info,
+ const ArenaVector<HSuspendCheck*>& loop_headers,
+ ArenaVector<size_t>* covered) {
+ StackMapEncoding encoding = code_info.ExtractEncoding();
+ for (size_t i = 0; i < loop_headers.size(); ++i) {
+ if (loop_headers[i]->GetDexPc() == dex_pc) {
+ if (graph.IsCompilingOsr()) {
+ DCHECK(code_info.GetOsrStackMapForDexPc(dex_pc, encoding).IsValid());
+ }
+ ++(*covered)[i];
+ }
+ }
+}
+
+// Debug helper to ensure loop entries in compiled code are matched by
+// dex branch instructions.
+static void CheckLoopEntriesCanBeUsedForOsr(const HGraph& graph,
+ const CodeInfo& code_info,
+ const DexFile::CodeItem& code_item) {
+ if (graph.HasTryCatch()) {
+ // One can write loops through try/catch, which we do not support for OSR anyway.
+ return;
+ }
+ ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc));
+ for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) {
+ if (it.Current()->IsLoopHeader()) {
+ HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck();
+ if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
+ loop_headers.push_back(suspend_check);
+ }
+ }
+ }
+ ArenaVector<size_t> covered(loop_headers.size(), 0, graph.GetArena()->Adapter(kArenaAllocMisc));
+ const uint16_t* code_ptr = code_item.insns_;
+ const uint16_t* code_end = code_item.insns_ + code_item.insns_size_in_code_units_;
+
+ size_t dex_pc = 0;
+ while (code_ptr < code_end) {
+ const Instruction& instruction = *Instruction::At(code_ptr);
+ if (instruction.IsBranch()) {
+ uint32_t target = dex_pc + instruction.GetTargetOffset();
+ CheckCovers(target, graph, code_info, loop_headers, &covered);
+ } else if (instruction.IsSwitch()) {
+ SwitchTable table(instruction, dex_pc, instruction.Opcode() == Instruction::SPARSE_SWITCH);
+ uint16_t num_entries = table.GetNumEntries();
+ size_t offset = table.GetFirstValueIndex();
+
+ // Use a larger loop counter type to avoid overflow issues.
+ for (size_t i = 0; i < num_entries; ++i) {
+ // The target of the case.
+ uint32_t target = dex_pc + table.GetEntryAt(i + offset);
+ CheckCovers(target, graph, code_info, loop_headers, &covered);
+ }
+ }
+ dex_pc += instruction.SizeInCodeUnits();
+ code_ptr += instruction.SizeInCodeUnits();
+ }
+
+ for (size_t i = 0; i < covered.size(); ++i) {
+ DCHECK_NE(covered[i], 0u) << "Loop in compiled code has no dex branch equivalent";
+ }
+}
+
+void CodeGenerator::BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item) {
stack_map_stream_.FillIn(region);
+ if (kIsDebugBuild) {
+ CheckLoopEntriesCanBeUsedForOsr(*graph_, CodeInfo(region), code_item);
+ }
}
void CodeGenerator::RecordPcInfo(HInstruction* instruction,
@@ -705,6 +773,46 @@
EmitEnvironment(instruction->GetEnvironment(), slow_path);
stack_map_stream_.EndStackMapEntry();
+
+ HLoopInformation* info = instruction->GetBlock()->GetLoopInformation();
+ if (instruction->IsSuspendCheck() &&
+ (info != nullptr) &&
+ graph_->IsCompilingOsr() &&
+ (inlining_depth == 0)) {
+ DCHECK_EQ(info->GetSuspendCheck(), instruction);
+ // We duplicate the stack map as a marker that this stack map can be an OSR entry.
+ // Duplicating it avoids having the runtime recognize and skip an OSR stack map.
+ DCHECK(info->IsIrreducible());
+ stack_map_stream_.BeginStackMapEntry(
+ dex_pc, native_pc, register_mask, locations->GetStackMask(), outer_environment_size, 0);
+ EmitEnvironment(instruction->GetEnvironment(), slow_path);
+ stack_map_stream_.EndStackMapEntry();
+ if (kIsDebugBuild) {
+ HEnvironment* environment = instruction->GetEnvironment();
+ for (size_t i = 0, environment_size = environment->Size(); i < environment_size; ++i) {
+ HInstruction* in_environment = environment->GetInstructionAt(i);
+ if (in_environment != nullptr) {
+ DCHECK(in_environment->IsPhi() || in_environment->IsConstant());
+ Location location = environment->GetLocationAt(i);
+ DCHECK(location.IsStackSlot() ||
+ location.IsDoubleStackSlot() ||
+ location.IsConstant() ||
+ location.IsInvalid());
+ if (location.IsStackSlot() || location.IsDoubleStackSlot()) {
+ DCHECK_LT(location.GetStackIndex(), static_cast<int32_t>(GetFrameSize()));
+ }
+ }
+ }
+ }
+ } else if (kIsDebugBuild) {
+ // Ensure stack maps are unique, by checking that the native pc in the stack map
+ // last emitted is different than the native pc of the stack map just emitted.
+ size_t number_of_stack_maps = stack_map_stream_.GetNumberOfStackMaps();
+ if (number_of_stack_maps > 1) {
+ DCHECK_NE(stack_map_stream_.GetStackMap(number_of_stack_maps - 1).native_pc_offset,
+ stack_map_stream_.GetStackMap(number_of_stack_maps - 2).native_pc_offset);
+ }
+ }
}
bool CodeGenerator::HasStackMapAtCurrentPc() {
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 4f8f146..0a688cf 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -288,7 +288,7 @@
slow_paths_.push_back(slow_path);
}
- void BuildStackMaps(MemoryRegion region);
+ void BuildStackMaps(MemoryRegion region, const DexFile::CodeItem& code_item);
size_t ComputeStackMapsSize();
bool IsLeafMethod() const {
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 9b91b53..a8841d3 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -758,6 +758,7 @@
compiler_driver_->GetInstructionSet(),
invoke_type,
graph_->IsDebuggable(),
+ /* osr */ false,
graph_->GetCurrentInstructionId());
callee_graph->SetArtMethod(resolved_method);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 3dda850..f269885 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -647,6 +647,10 @@
header_->GetGraph()->SetHasIrreducibleLoops(true);
PopulateIrreducibleRecursive(back_edge);
} else {
+ if (header_->GetGraph()->IsCompilingOsr()) {
+ irreducible_ = true;
+ header_->GetGraph()->SetHasIrreducibleLoops(true);
+ }
PopulateRecursive(back_edge);
}
}
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index b808347..116b1c6 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -274,6 +274,7 @@
InstructionSet instruction_set,
InvokeType invoke_type = kInvalidInvokeType,
bool debuggable = false,
+ bool osr = false,
int start_instruction_id = 0)
: arena_(arena),
blocks_(arena->Adapter(kArenaAllocBlockList)),
@@ -302,7 +303,8 @@
cached_long_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_double_constants_(std::less<int64_t>(), arena->Adapter(kArenaAllocConstantsMap)),
cached_current_method_(nullptr),
- inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()) {
+ inexact_object_rti_(ReferenceTypeInfo::CreateInvalid()),
+ osr_(osr) {
blocks_.reserve(kDefaultNumberOfBlocks);
}
@@ -478,6 +480,8 @@
return instruction_set_;
}
+ bool IsCompilingOsr() const { return osr_; }
+
bool HasTryCatch() const { return has_try_catch_; }
void SetHasTryCatch(bool value) { has_try_catch_ = value; }
@@ -606,6 +610,11 @@
// collection pointer to passes which may create NullConstant.
ReferenceTypeInfo inexact_object_rti_;
+ // Whether we are compiling this graph for on stack replacement: this will
+ // make all loops seen as irreducible and emit special stack maps to mark
+ // compiled code entries which the interpreter can directly jump to.
+ const bool osr_;
+
friend class SsaBuilder; // For caching constants.
friend class SsaLivenessAnalysis; // For the linear order.
ART_FRIEND_TEST(GraphTest, IfSuccessorSimpleJoinBlock1);
@@ -6040,6 +6049,74 @@
FOR_EACH_CONCRETE_INSTRUCTION(INSTRUCTION_TYPE_CHECK)
#undef INSTRUCTION_TYPE_CHECK
+class SwitchTable : public ValueObject {
+ public:
+ SwitchTable(const Instruction& instruction, uint32_t dex_pc, bool sparse)
+ : instruction_(instruction), dex_pc_(dex_pc), sparse_(sparse) {
+ int32_t table_offset = instruction.VRegB_31t();
+ const uint16_t* table = reinterpret_cast<const uint16_t*>(&instruction) + table_offset;
+ if (sparse) {
+ CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kSparseSwitchSignature));
+ } else {
+ CHECK_EQ(table[0], static_cast<uint16_t>(Instruction::kPackedSwitchSignature));
+ }
+ num_entries_ = table[1];
+ values_ = reinterpret_cast<const int32_t*>(&table[2]);
+ }
+
+ uint16_t GetNumEntries() const {
+ return num_entries_;
+ }
+
+ void CheckIndex(size_t index) const {
+ if (sparse_) {
+ // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+ DCHECK_LT(index, 2 * static_cast<size_t>(num_entries_));
+ } else {
+ // In a packed table, we have the starting key and num_entries_ values.
+ DCHECK_LT(index, 1 + static_cast<size_t>(num_entries_));
+ }
+ }
+
+ int32_t GetEntryAt(size_t index) const {
+ CheckIndex(index);
+ return values_[index];
+ }
+
+ uint32_t GetDexPcForIndex(size_t index) const {
+ CheckIndex(index);
+ return dex_pc_ +
+ (reinterpret_cast<const int16_t*>(values_ + index) -
+ reinterpret_cast<const int16_t*>(&instruction_));
+ }
+
+ // Index of the first value in the table.
+ size_t GetFirstValueIndex() const {
+ if (sparse_) {
+ // In a sparse table, we have num_entries_ keys and num_entries_ values, in that order.
+ return num_entries_;
+ } else {
+ // In a packed table, we have the starting key and num_entries_ values.
+ return 1;
+ }
+ }
+
+ private:
+ const Instruction& instruction_;
+ const uint32_t dex_pc_;
+
+ // Whether this is a sparse-switch table (or a packed-switch one).
+ const bool sparse_;
+
+ // This can't be const as it needs to be computed off of the given instruction, and complicated
+ // expressions in the initializer list seemed very ugly.
+ uint16_t num_entries_;
+
+ const int32_t* values_;
+
+ DISALLOW_COPY_AND_ASSIGN(SwitchTable);
+};
+
} // namespace art
#endif // ART_COMPILER_OPTIMIZING_NODES_H_
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index bdc664b..736ac32 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -300,7 +300,7 @@
}
}
- bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method)
+ bool JitCompile(Thread* self, jit::JitCodeCache* code_cache, ArtMethod* method, bool osr)
OVERRIDE
SHARED_REQUIRES(Locks::mutator_lock_);
@@ -309,7 +309,8 @@
CompiledMethod* Emit(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
- CompilerDriver* driver) const;
+ CompilerDriver* driver,
+ const DexFile::CodeItem* item) const;
// Try compiling a method and return the code generator used for
// compiling it.
@@ -327,7 +328,8 @@
uint32_t method_idx,
jobject class_loader,
const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const;
+ Handle<mirror::DexCache> dex_cache,
+ bool osr) const;
std::unique_ptr<OptimizingCompilerStats> compilation_stats_;
@@ -580,11 +582,12 @@
CompiledMethod* OptimizingCompiler::Emit(ArenaAllocator* arena,
CodeVectorAllocator* code_allocator,
CodeGenerator* codegen,
- CompilerDriver* compiler_driver) const {
+ CompilerDriver* compiler_driver,
+ const DexFile::CodeItem* code_item) const {
ArenaVector<LinkerPatch> linker_patches = EmitAndSortLinkerPatches(codegen);
ArenaVector<uint8_t> stack_map(arena->Adapter(kArenaAllocStackMaps));
stack_map.resize(codegen->ComputeStackMapsSize());
- codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()));
+ codegen->BuildStackMaps(MemoryRegion(stack_map.data(), stack_map.size()), *code_item);
CompiledMethod* compiled_method = CompiledMethod::SwapAllocCompiledMethod(
compiler_driver,
@@ -615,7 +618,8 @@
uint32_t method_idx,
jobject class_loader,
const DexFile& dex_file,
- Handle<mirror::DexCache> dex_cache) const {
+ Handle<mirror::DexCache> dex_cache,
+ bool osr) const {
MaybeRecordStat(MethodCompilationStat::kAttemptCompilation);
CompilerDriver* compiler_driver = GetCompilerDriver();
InstructionSet instruction_set = compiler_driver->GetInstructionSet();
@@ -663,8 +667,14 @@
dex_compilation_unit.GetDexFile(),
dex_compilation_unit.GetClassDefIndex());
HGraph* graph = new (arena) HGraph(
- arena, dex_file, method_idx, requires_barrier, compiler_driver->GetInstructionSet(),
- kInvalidInvokeType, compiler_driver->GetCompilerOptions().GetDebuggable());
+ arena,
+ dex_file,
+ method_idx,
+ requires_barrier,
+ compiler_driver->GetInstructionSet(),
+ kInvalidInvokeType,
+ compiler_driver->GetCompilerOptions().GetDebuggable(),
+ osr);
std::unique_ptr<CodeGenerator> codegen(
CodeGenerator::Create(graph,
@@ -797,10 +807,11 @@
method_idx,
jclass_loader,
dex_file,
- dex_cache));
+ dex_cache,
+ /* osr */ false));
if (codegen.get() != nullptr) {
MaybeRecordStat(MethodCompilationStat::kCompiled);
- method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver);
+ method = Emit(&arena, &code_allocator, codegen.get(), compiler_driver, code_item);
}
} else {
if (compiler_driver->GetCompilerOptions().VerifyAtRuntime()) {
@@ -843,7 +854,8 @@
bool OptimizingCompiler::JitCompile(Thread* self,
jit::JitCodeCache* code_cache,
- ArtMethod* method) {
+ ArtMethod* method,
+ bool osr) {
StackHandleScope<2> hs(self);
Handle<mirror::ClassLoader> class_loader(hs.NewHandle(
method->GetDeclaringClass()->GetClassLoader()));
@@ -873,7 +885,8 @@
method_idx,
jclass_loader,
*dex_file,
- dex_cache));
+ dex_cache,
+ osr));
if (codegen.get() == nullptr) {
return false;
}
@@ -885,7 +898,7 @@
return false;
}
MaybeRecordStat(MethodCompilationStat::kCompiled);
- codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size));
+ codegen->BuildStackMaps(MemoryRegion(stack_map_data, stack_map_size), *code_item);
const void* code = code_cache->CommitCode(
self,
method,
@@ -896,7 +909,8 @@
codegen->GetCoreSpillMask(),
codegen->GetFpuSpillMask(),
code_allocator.GetMemory().data(),
- code_allocator.GetSize());
+ code_allocator.GetSize(),
+ osr);
if (code == nullptr) {
code_cache->ClearData(self, stack_map_data);