Enable multi-threaded Quick compilation
Reuse thread-local copies of llvm context data for Quick compiler
(while continuing to regenerate fresh ones per method for Portable).
This is a transitional CL - the upcoming compiler driver change
is expected to pass pass a thread context structure to each compiler
worker thread rather than use the pthread_key mechanism.
Change-Id: I277920a5c2705748c3a9f37ceace53c903747ec2
diff --git a/src/compiler.cc b/src/compiler.cc
index 7bec994..b0d373b 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -322,6 +322,8 @@
}
VLOG(compiler) << "dlopen(\"" << compiler_so_name << "\", RTLD_LAZY) returned " << compiler_library_;
+ CHECK_PTHREAD_CALL(pthread_key_create, (&tls_key_, NULL), "compiler tls key");
+
#if defined(ART_USE_LLVM_COMPILER) || defined(ART_USE_GREENLAND_COMPILER)
// Initialize compiler_context_
typedef void (*InitCompilerContextFn)(Compiler&);
@@ -431,6 +433,16 @@
}
}
+CompilerTls* Compiler::GetTls() {
+ // Lazily create thread-local storage
+ CompilerTls* res = static_cast<CompilerTls*>(pthread_getspecific(tls_key_));
+ if (res == NULL) {
+ res = new CompilerTls();
+ CHECK_PTHREAD_CALL(pthread_setspecific, (tls_key_, res), "compiler tls");
+ }
+ return res;
+}
+
ByteArray* Compiler::CreateResolutionStub(InstructionSet instruction_set,
Runtime::TrampolineType type) {
switch (instruction_set) {
diff --git a/src/compiler.h b/src/compiler.h
index c5f19f7..5e9dbd7 100644
--- a/src/compiler.h
+++ b/src/compiler.h
@@ -40,6 +40,22 @@
class OatCompilationUnit;
class TimingLogger;
+// Thread-local storage compiler worker threads
+class CompilerTls {
+#if defined(ART_USE_QUICK_COMPILER)
+ public:
+ CompilerTls() : llvm_info_(NULL) {}
+ ~CompilerTls() {}
+
+ void* GetLLVMInfo() { return llvm_info_; }
+
+ void SetLLVMInfo(void* llvm_info) { llvm_info_ = llvm_info; }
+
+ private:
+ void* llvm_info_;
+#endif
+};
+
class Compiler {
public:
// Create a compiler targeting the requested "instruction_set".
@@ -72,6 +88,8 @@
return image_;
}
+ CompilerTls* GetTls();
+
// Stub to throw AbstractMethodError
static ByteArray* CreateAbstractMethodErrorStub(InstructionSet instruction_set)
SHARED_LOCKS_REQUIRED(Locks::mutator_lock_);
@@ -347,6 +365,8 @@
const char* shorty, uint32_t shorty_len);
CreateInvokeStubFn create_invoke_stub_;
+ pthread_key_t tls_key_;
+
#if defined(ART_USE_LLVM_COMPILER)
typedef CompiledInvokeStub* (*CreateProxyStubFn)
(Compiler& compiler, const char* shorty, uint32_t shorty_len);
diff --git a/src/compiler/Compiler.h b/src/compiler/Compiler.h
index 8bda3fe..7eb32c2 100644
--- a/src/compiler/Compiler.h
+++ b/src/compiler/Compiler.h
@@ -178,10 +178,10 @@
};
#if defined(ART_USE_QUICK_COMPILER)
-class QuickCompiler {
+class LLVMInfo {
public:
- QuickCompiler();
- ~QuickCompiler();
+ LLVMInfo();
+ ~LLVMInfo();
llvm::LLVMContext* GetLLVMContext() {
return llvm_context_.get();
diff --git a/src/compiler/CompilerIR.h b/src/compiler/CompilerIR.h
index 593fce5..5a10831 100644
--- a/src/compiler/CompilerIR.h
+++ b/src/compiler/CompilerIR.h
@@ -549,7 +549,7 @@
Checkstats* checkstats;
#if defined(ART_USE_QUICK_COMPILER)
bool genBitcode;
- QuickCompiler* quick_compiler;
+ LLVMInfo* llvm_info;
llvm::LLVMContext* context;
llvm::Module* module;
llvm::Function* func;
diff --git a/src/compiler/Frontend.cc b/src/compiler/Frontend.cc
index d1259b7..72eb8a1 100644
--- a/src/compiler/Frontend.cc
+++ b/src/compiler/Frontend.cc
@@ -21,10 +21,24 @@
#include "object.h"
#include "runtime.h"
+#if defined(ART_USE_QUICK_COMPILER)
+#include <llvm/Support/Threading.h>
+
+namespace {
+ pthread_once_t llvm_multi_init = PTHREAD_ONCE_INIT;
+ void InitializeLLVMForQuick() {
+ llvm::llvm_start_multithreaded();
+ }
+}
+#endif
+
namespace art {
#if defined(ART_USE_QUICK_COMPILER)
-QuickCompiler::QuickCompiler() {
+LLVMInfo::LLVMInfo() {
+#if !defined(ART_USE_LLVM_COMPILER)
+ pthread_once(&llvm_multi_init, InitializeLLVMForQuick);
+#endif
// Create context, module, intrinsic helper & ir builder
llvm_context_.reset(new llvm::LLVMContext());
llvm_module_ = new llvm::Module("art", *llvm_context_);
@@ -33,17 +47,17 @@
ir_builder_.reset(new greenland::IRBuilder(*llvm_context_, *llvm_module_, *intrinsic_helper_));
}
-QuickCompiler::~QuickCompiler() {
+LLVMInfo::~LLVMInfo() {
}
extern "C" void ArtInitQuickCompilerContext(art::Compiler& compiler) {
CHECK(compiler.GetCompilerContext() == NULL);
- QuickCompiler* quickCompiler = new QuickCompiler();
- compiler.SetCompilerContext(quickCompiler);
+ LLVMInfo* llvmInfo = new LLVMInfo();
+ compiler.SetCompilerContext(llvmInfo);
}
extern "C" void ArtUnInitQuickCompilerContext(art::Compiler& compiler) {
- delete reinterpret_cast<QuickCompiler*>(compiler.GetCompilerContext());
+ delete reinterpret_cast<LLVMInfo*>(compiler.GetCompilerContext());
compiler.SetCompilerContext(NULL);
}
#endif
@@ -777,7 +791,7 @@
uint32_t method_idx, jobject class_loader,
const DexFile& dex_file
#if defined(ART_USE_QUICK_COMPILER)
- , QuickCompiler* quick_compiler,
+ , LLVMInfo* llvm_info,
bool gbcOnly
#endif
)
@@ -812,14 +826,7 @@
DCHECK((cUnit->instructionSet == kThumb2) ||
(cUnit->instructionSet == kX86) ||
(cUnit->instructionSet == kMips));
- if (gbcOnly) {
- cUnit->quick_compiler = quick_compiler;
- } else {
- // TODO: We need one LLVMContext per thread.
- cUnit->quick_compiler =
- reinterpret_cast<QuickCompiler*>(compiler.GetCompilerContext());
- }
- DCHECK(cUnit->quick_compiler != NULL);
+ cUnit->llvm_info = llvm_info;
if (cUnit->instructionSet == kThumb2) {
// TODO: remove this once x86 is tested
cUnit->genBitcode = true;
@@ -1257,10 +1264,10 @@
uint32_t access_flags, InvokeType invoke_type,
uint32_t method_idx, jobject class_loader,
const DexFile& dex_file,
- QuickCompiler* quick_compiler)
+ LLVMInfo* llvm_info)
{
compileMethod(compiler, code_item, access_flags, invoke_type, method_idx, class_loader,
- dex_file, quick_compiler, true);
+ dex_file, llvm_info, true);
}
#else
CompiledMethod* oatCompileMethod(Compiler& compiler,
diff --git a/src/compiler/codegen/MethodBitcode.cc b/src/compiler/codegen/MethodBitcode.cc
index 58678a0..cf07ea4 100644
--- a/src/compiler/codegen/MethodBitcode.cc
+++ b/src/compiler/codegen/MethodBitcode.cc
@@ -170,11 +170,20 @@
}
void initIR(CompilationUnit* cUnit)
{
- QuickCompiler* quick = cUnit->quick_compiler;
- cUnit->context = quick->GetLLVMContext();
- cUnit->module = quick->GetLLVMModule();
- cUnit->intrinsic_helper = quick->GetIntrinsicHelper();
- cUnit->irb = quick->GetIRBuilder();
+ LLVMInfo* llvmInfo = cUnit->llvm_info;
+ if (llvmInfo == NULL) {
+ CompilerTls* tls = cUnit->compiler->GetTls();
+ CHECK(tls != NULL);
+ llvmInfo = static_cast<LLVMInfo*>(tls->GetLLVMInfo());
+ if (llvmInfo == NULL) {
+ llvmInfo = new LLVMInfo();
+ tls->SetLLVMInfo(llvmInfo);
+ }
+ }
+ cUnit->context = llvmInfo->GetLLVMContext();
+ cUnit->module = llvmInfo->GetLLVMModule();
+ cUnit->intrinsic_helper = llvmInfo->GetIntrinsicHelper();
+ cUnit->irb = llvmInfo->GetIRBuilder();
}
const char* llvmSSAName(CompilationUnit* cUnit, int ssaReg) {
diff --git a/src/compiler_llvm/compilation_unit.cc b/src/compiler_llvm/compilation_unit.cc
index 95073ea..ba71aee 100644
--- a/src/compiler_llvm/compilation_unit.cc
+++ b/src/compiler_llvm/compilation_unit.cc
@@ -172,9 +172,9 @@
#else
compiler_ = NULL;
oat_compilation_unit_ = NULL;
- quick_ctx_.reset(new QuickCompiler());
- context_.reset(quick_ctx_->GetLLVMContext());
- module_ = quick_ctx_->GetLLVMModule();
+ llvm_info_.reset(new LLVMInfo());
+ context_.reset(llvm_info_->GetLLVMContext());
+ module_ = llvm_info_->GetLLVMModule();
#endif
// Include the runtime function declaration
@@ -211,7 +211,7 @@
#if defined(ART_USE_DEXLANG_FRONTEND)
delete dex_lang_ctx_;
#elif defined(ART_USE_QUICK_COMPILER)
- llvm::LLVMContext* llvm_context = context_.release(); // Managed by quick_ctx_
+ llvm::LLVMContext* llvm_context = context_.release(); // Managed by llvm_info_
CHECK(llvm_context != NULL);
#endif
}
@@ -331,7 +331,7 @@
#if defined(ART_USE_DEXLANG_FRONTEND)
fpm.add(CreateGBCExpanderPass(dex_lang_ctx_->GetIntrinsicHelper(), *irb_.get()));
#elif defined(ART_USE_QUICK_COMPILER)
- fpm.add(CreateGBCExpanderPass(*quick_ctx_->GetIntrinsicHelper(), *irb_.get(),
+ fpm.add(CreateGBCExpanderPass(*llvm_info_->GetIntrinsicHelper(), *irb_.get(),
compiler_, oat_compilation_unit_));
#endif
fpm.add(new ::AddSuspendCheckToLoopLatchPass(irb_.get()));
@@ -341,7 +341,7 @@
#if defined(ART_USE_DEXLANG_FRONTEND)
fpm2.add(CreateGBCExpanderPass(dex_lang_ctx_->GetIntrinsicHelper(), *irb_.get()));
#elif defined(ART_USE_QUICK_COMPILER)
- fpm2.add(CreateGBCExpanderPass(*quick_ctx_->GetIntrinsicHelper(), *irb_.get(),
+ fpm2.add(CreateGBCExpanderPass(*llvm_info_->GetIntrinsicHelper(), *irb_.get(),
compiler_, oat_compilation_unit_));
#endif
fpm2.add(new ::AddSuspendCheckToLoopLatchPass(irb_.get()));
diff --git a/src/compiler_llvm/compilation_unit.h b/src/compiler_llvm/compilation_unit.h
index c4fbae4..6ad7ee1 100644
--- a/src/compiler_llvm/compilation_unit.h
+++ b/src/compiler_llvm/compilation_unit.h
@@ -91,8 +91,8 @@
}
#if defined(ART_USE_QUICK_COMPILER)
- QuickCompiler* GetQuickContext() const {
- return quick_ctx_.get();
+ LLVMInfo* GetQuickContext() const {
+ return llvm_info_.get();
}
void SetCompiler(Compiler* compiler) {
compiler_ = compiler;
@@ -125,7 +125,7 @@
greenland::DexLang::Context* dex_lang_ctx_;
#endif
#if defined(ART_USE_QUICK_COMPILER)
- UniquePtr<QuickCompiler> quick_ctx_;
+ UniquePtr<LLVMInfo> llvm_info_;
Compiler* compiler_;
OatCompilationUnit* oat_compilation_unit_;
#endif
diff --git a/src/compiler_llvm/compiler_llvm.cc b/src/compiler_llvm/compiler_llvm.cc
index 85ae794..a964b40 100644
--- a/src/compiler_llvm/compiler_llvm.cc
+++ b/src/compiler_llvm/compiler_llvm.cc
@@ -45,7 +45,7 @@
uint32_t access_flags, InvokeType invoke_type,
uint32_t method_idx, jobject class_loader,
const DexFile& dex_file,
- QuickCompiler* quick_compiler);
+ LLVMInfo* llvm_info);
}
#endif
diff --git a/src/dex2oat.cc b/src/dex2oat.cc
index c96620e..d20d7ab 100644
--- a/src/dex2oat.cc
+++ b/src/dex2oat.cc
@@ -493,11 +493,7 @@
uintptr_t image_base = 0;
UniquePtr<std::string> host_prefix;
std::vector<const char*> runtime_args;
-#if defined(ART_USE_QUICK_COMPILER) || defined(__APPLE__)
- int thread_count = 1;
-#else
int thread_count = sysconf(_SC_NPROCESSORS_CONF);
-#endif
bool support_debugging = false;
#if defined(__arm__)
InstructionSet instruction_set = kThumb2;