Parellel mark stack processing
Enabled parallel mark stack processing by using a thread pool.
Optimized object scanning by removing dependent loads for IsClass.
Performance:
Prime: ~10% speedup of partial GC.
Nakasi: ~50% speedup of partial GC.
Change-Id: I43256a068efc47cb52d93108458ea18d4e02fccc
diff --git a/src/compiler.cc b/src/compiler.cc
index b096912..4c9860c 100644
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -993,7 +993,7 @@
self->AssertNoPendingException();
CHECK_GT(work_units, 0U);
- std::vector<Closure*> closures(work_units);
+ std::vector<ForAllClosure*> closures(work_units);
for (size_t i = 0; i < work_units; ++i) {
closures[i] = new ForAllClosure(this, begin + i, end, callback, work_units);
thread_pool_->AddTask(self, closures[i]);
@@ -1006,13 +1006,11 @@
// Wait for all the worker threads to finish.
thread_pool_->Wait(self);
-
- STLDeleteElements(&closures);
}
private:
- class ForAllClosure : public Closure {
+ class ForAllClosure : public Task {
public:
ForAllClosure(CompilationContext* context, size_t begin, size_t end, Callback* callback,
size_t stripe)
@@ -1031,6 +1029,10 @@
self->AssertNoPendingException();
}
}
+
+ virtual void Finalize() {
+ delete this;
+ }
private:
CompilationContext* const context_;
const size_t begin_;