Add thread unsafe allocation methods to spaces.

Used by SS/GSS collectors since these run with mutators suspended and
only allocate from a single thread. Added AllocThreadUnsafe to
BumpPointerSpace and RosAllocSpace. Added AllocThreadUnsafe which uses
current runs as thread local runs for a thread unsafe allocation.
Added code to revoke current runs which are the same idx as thread
local runs.

Changed:
The number of thread local runs in each thread is now the the number
of thread local runs in RosAlloc instead of the number of size
brackets.

Total GC time / time on EvaluateAndApplyChanges.
TLAB SS:
Before: 36.7s / 7254
After: 16.1s / 4837

TLAB GSS:
Before: 6.9s / 3973
After: 5.7s / 3778

Bug: 8981901

Change-Id: Id1d264ade3799f431bf7ebbdcca6146aefbeb632
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index f7fa2da..21044f3 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -405,11 +405,6 @@
   // at a page-granularity.
   static const size_t kLargeSizeThreshold = 2048;
 
-  // We use use thread-local runs for the size Brackets whose indexes
-  // are less than or equal to this index. We use shared (current)
-  // runs for the rest.
-  static const size_t kMaxThreadLocalSizeBracketIdx = 10;
-
   // If true, check that the returned memory is actually zero.
   static constexpr bool kCheckZeroMemory = kIsDebugBuild;
 
@@ -442,6 +437,10 @@
   // The default value for page_release_size_threshold_.
   static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
 
+  // We use thread-local runs for the size Brackets whose indexes
+  // are less than this index. We use shared (current) runs for the rest.
+  static const size_t kNumThreadLocalSizeBrackets = 11;
+
  private:
   // The base address of the memory region that's managed by this allocator.
   byte* base_;
@@ -526,6 +525,12 @@
   // Allocate/free a run slot.
   void* AllocFromRun(Thread* self, size_t size, size_t* bytes_allocated)
       LOCKS_EXCLUDED(lock_);
+  // Allocate/free a run slot without acquiring locks.
+  // TODO: EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
+  void* AllocFromRunThreadUnsafe(Thread* self, size_t size, size_t* bytes_allocated)
+      LOCKS_EXCLUDED(lock_);
+  void* AllocFromCurrentRunUnlocked(Thread* self, size_t idx);
+
   // Returns the bracket size.
   size_t FreeFromRun(Thread* self, void* ptr, Run* run)
       LOCKS_EXCLUDED(lock_);
@@ -543,11 +548,20 @@
   // Allocates large objects.
   void* AllocLargeObject(Thread* self, size_t size, size_t* bytes_allocated) LOCKS_EXCLUDED(lock_);
 
+  // Revoke a run by adding it to non_full_runs_ or freeing the pages.
+  void RevokeRun(Thread* self, size_t idx, Run* run);
+
+  // Revoke the current runs which share an index with the thread local runs.
+  void RevokeThreadUnsafeCurrentRuns();
+
  public:
   RosAlloc(void* base, size_t capacity, size_t max_capacity,
            PageReleaseMode page_release_mode,
            size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
   ~RosAlloc();
+  // If kThreadUnsafe is true then the allocator may avoid acquiring some locks as an optimization.
+  // If used, this may cause race conditions if multiple threads are allocating at the same time.
+  template<bool kThreadSafe = true>
   void* Alloc(Thread* self, size_t size, size_t* bytes_allocated)
       LOCKS_EXCLUDED(lock_);
   size_t Free(Thread* self, void* ptr)