Add thread pool class

Added a thread pool class loosely based on google3 code.

Modified the compiler to have a single thread pool instead of creating new threads in ForAll.

Moved barrier to be in top level directory as it is not GC specific code.

Performance Timings:

Reference:
boot.oat: 14.306596s
time mm oat-target:
real    2m33.748s
user    10m23.190s
sys 5m54.140s

Thread pool:
boot.oat: 13.111049s
time mm oat-target:
real    2m29.372s
user    10m3.130s
sys 5m46.290s

The speed increase is probably just noise.

Change-Id: If3c1280cbaa4c7e4361127d064ac744ea12cdf49
diff --git a/src/thread.h b/src/thread.h
index abfd719..8dbfb55 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -25,6 +25,7 @@
 #include <string>
 #include <vector>
 
+#include "closure.h"
 #include "globals.h"
 #include "macros.h"
 #include "oat/runtime/oat_support_entrypoints.h"
@@ -106,12 +107,6 @@
 
 class PACKED Thread {
  public:
-  class CheckpointFunction {
-   public:
-    virtual ~CheckpointFunction() { }
-    virtual void Run(Thread* self) = 0;
-  };
-
   // Space to throw a StackOverflowError in.
 #if !defined(ART_USE_LLVM_COMPILER)
   static const size_t kStackOverflowReservedBytes = 4 * KB;
@@ -183,7 +178,7 @@
   void ModifySuspendCount(Thread* self, int delta, bool for_debugger)
       EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_suspend_count_lock_);
 
-  bool RequestCheckpoint(CheckpointFunction* function);
+  bool RequestCheckpoint(Closure* function);
 
   // Called when thread detected that the thread_suspend_count_ was non-zero. Gives up share of
   // mutator_lock_ and waits until it is resumed and thread_suspend_count_ is zero.
@@ -776,7 +771,7 @@
   const char* last_no_thread_suspension_cause_;
 
   // Pending checkpoint functions.
-  CheckpointFunction* checkpoint_function_;
+  Closure* checkpoint_function_;
 
  public:
   // Runtime support function pointers