Use 8-byte increment bracket sizes for rosalloc thread local runs.

Very small space savings (< 1%) after device boot and up to 10%
allocation speedup.

Some minor cleanup.

Bug: 9986565

Change-Id: I51d791c4674d6944fe9a7ee78537ac3490c1a02c
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h
index 2510514..d1c81e3 100644
--- a/runtime/gc/allocator/rosalloc-inl.h
+++ b/runtime/gc/allocator/rosalloc-inl.h
@@ -62,11 +62,6 @@
   }
   size_t bracket_size;
   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
-  DCHECK_EQ(idx, SizeToIndex(size));
-  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-  DCHECK_EQ(bracket_size, bracketSizes[idx]);
-  DCHECK_LE(size, bracket_size);
-  DCHECK(size > 512 || bracket_size - size < 16);
   DCHECK_LT(idx, kNumThreadLocalSizeBrackets);
   Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
   if (kIsDebugBuild) {
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index 7d00094..8b125dd 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -638,11 +638,6 @@
   DCHECK_LE(size, kLargeSizeThreshold);
   size_t bracket_size;
   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
-  DCHECK_EQ(idx, SizeToIndex(size));
-  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-  DCHECK_EQ(bracket_size, bracketSizes[idx]);
-  DCHECK_LE(size, bracket_size);
-  DCHECK(size > 512 || bracket_size - size < 16);
   Locks::mutator_lock_->AssertExclusiveHeld(self);
   void* slot_addr = AllocFromCurrentRunUnlocked(self, idx);
   if (LIKELY(slot_addr != nullptr)) {
@@ -662,14 +657,7 @@
   DCHECK_LE(size, kLargeSizeThreshold);
   size_t bracket_size;
   size_t idx = SizeToIndexAndBracketSize(size, &bracket_size);
-  DCHECK_EQ(idx, SizeToIndex(size));
-  DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-  DCHECK_EQ(bracket_size, bracketSizes[idx]);
-  DCHECK_LE(size, bracket_size);
-  DCHECK(size > 512 || bracket_size - size < 16);
-
   void* slot_addr;
-
   if (LIKELY(idx < kNumThreadLocalSizeBrackets)) {
     // Use a thread-local run.
     Run* thread_local_run = reinterpret_cast<Run*>(self->GetRosAllocRun(idx));
@@ -881,17 +869,6 @@
   return stream.str();
 }
 
-inline size_t RosAlloc::Run::SlotIndex(Slot* slot) {
-  const uint8_t idx = size_bracket_idx_;
-  const size_t bracket_size = bracketSizes[idx];
-  const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
-      - reinterpret_cast<uint8_t*>(FirstSlot());
-  DCHECK_EQ(offset_from_slot_base % bracket_size, static_cast<size_t>(0));
-  size_t slot_idx = offset_from_slot_base / bracket_size;
-  DCHECK_LT(slot_idx, numOfSlots[idx]);
-  return slot_idx;
-}
-
 void RosAlloc::Run::FreeSlot(void* ptr) {
   DCHECK(!IsThreadLocal());
   const uint8_t idx = size_bracket_idx_;
@@ -1647,9 +1624,14 @@
 
 void RosAlloc::Initialize() {
   // bracketSizes.
+  static_assert(kNumRegularSizeBrackets == kNumOfSizeBrackets - 2,
+                "There should be two non-regular brackets");
   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
-    if (i < kNumOfSizeBrackets - 2) {
-      bracketSizes[i] = 16 * (i + 1);
+    if (i < kNumThreadLocalSizeBrackets) {
+      bracketSizes[i] = kThreadLocalBracketQuantumSize * (i + 1);
+    } else if (i < kNumRegularSizeBrackets) {
+      bracketSizes[i] = kBracketQuantumSize * (i - kNumThreadLocalSizeBrackets + 1) +
+          (kThreadLocalBracketQuantumSize *  kNumThreadLocalSizeBrackets);
     } else if (i == kNumOfSizeBrackets - 2) {
       bracketSizes[i] = 1 * KB;
     } else {
@@ -1662,16 +1644,13 @@
   }
   // numOfPages.
   for (size_t i = 0; i < kNumOfSizeBrackets; i++) {
-    if (i < 4) {
+    if (i < kNumThreadLocalSizeBrackets) {
       numOfPages[i] = 1;
-    } else if (i < 8) {
-      numOfPages[i] = 1;
-    } else if (i < 16) {
+    } else if (i < (kNumThreadLocalSizeBrackets + kNumRegularSizeBrackets) / 2) {
       numOfPages[i] = 4;
-    } else if (i < 32) {
+    } else if (i < kNumRegularSizeBrackets) {
       numOfPages[i] = 8;
-    } else if (i == 32) {
-      DCHECK_EQ(i, kNumOfSizeBrackets - 2);
+    } else if (i == kNumOfSizeBrackets - 2) {
       numOfPages[i] = 16;
     } else {
       DCHECK_EQ(i, kNumOfSizeBrackets - 1);
@@ -1701,8 +1680,8 @@
       size_t tmp_header_size = (tmp_unaligned_header_size % bracket_size == 0) ?
           tmp_unaligned_header_size :
           tmp_unaligned_header_size + (bracket_size - tmp_unaligned_header_size % bracket_size);
-      DCHECK_EQ(tmp_header_size % bracket_size, static_cast<size_t>(0));
-      DCHECK_EQ(tmp_header_size % 8, static_cast<size_t>(0));
+      DCHECK_EQ(tmp_header_size % bracket_size, 0U);
+      DCHECK_EQ(tmp_header_size % sizeof(uint64_t), 0U);
       if (tmp_slots_size + tmp_header_size <= run_size) {
         // Found the right number of slots, that is, there was enough
         // space for the header (including the bit maps.)
@@ -1711,8 +1690,8 @@
         break;
       }
     }
-    DCHECK_GT(num_of_slots, 0U);
-    DCHECK_GT(header_size, 0U);
+    DCHECK_GT(num_of_slots, 0U) << i;
+    DCHECK_GT(header_size, 0U) << i;
     // Add the padding for the alignment remainder.
     header_size += run_size % bracket_size;
     DCHECK_EQ(header_size + num_of_slots * bracket_size, run_size);
@@ -1723,7 +1702,7 @@
                 << ", headerSizes[" << i << "]=" << headerSizes[i];
     }
   }
-  // Fill the alloc bitmap so nobody can successfully allocate from it.
+  // Set up the dedicated full run so that nobody can successfully allocate from it.
   if (kIsDebugBuild) {
     dedicated_full_run_->magic_num_ = kMagicNum;
   }
@@ -1735,6 +1714,9 @@
 
   // The smallest bracket size must be at least as large as the sizeof(Slot).
   DCHECK_LE(sizeof(Slot), bracketSizes[0]) << "sizeof(Slot) <= the smallest bracket size";
+  // Check the invariants between the max bracket sizes and the number of brackets.
+  DCHECK_EQ(kMaxThreadLocalBracketSize, bracketSizes[kNumThreadLocalSizeBrackets - 1]);
+  DCHECK_EQ(kMaxRegularBracketSize, bracketSizes[kNumRegularSizeBrackets - 1]);
 }
 
 void RosAlloc::BytesAllocatedCallback(void* start ATTRIBUTE_UNUSED, void* end ATTRIBUTE_UNUSED,
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 3ce3d63..a472a8b 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -366,7 +366,7 @@
     static size_t fixed_header_size() {
       return sizeof(Run);
     }
-    Slot* FirstSlot() {
+    Slot* FirstSlot() const {
       const uint8_t idx = size_bracket_idx_;
       return reinterpret_cast<Slot*>(reinterpret_cast<uintptr_t>(this) + headerSizes[idx]);
     }
@@ -473,7 +473,16 @@
       DCHECK_LT(slot_idx, numOfSlots[idx]);
       return reinterpret_cast<Slot*>(ptr);
     }
-    size_t SlotIndex(Slot* slot);
+    size_t SlotIndex(Slot* slot) const {
+      const uint8_t idx = size_bracket_idx_;
+      const size_t bracket_size = bracketSizes[idx];
+      const size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(slot)
+          - reinterpret_cast<uint8_t*>(FirstSlot());
+      DCHECK_EQ(offset_from_slot_base % bracket_size, 0U);
+      size_t slot_idx = offset_from_slot_base / bracket_size;
+      DCHECK_LT(slot_idx, numOfSlots[idx]);
+      return slot_idx;
+    }
 
     // TODO: DISALLOW_COPY_AND_ASSIGN(Run);
   };
@@ -482,10 +491,8 @@
   static constexpr uint8_t kMagicNum = 42;
   // The magic number for free pages.
   static constexpr uint8_t kMagicNumFree = 43;
-  // The number of size brackets. Sync this with the length of Thread::rosalloc_runs_.
-  static constexpr size_t kNumOfSizeBrackets = kNumRosAllocThreadLocalSizeBrackets;
-  // The number of smaller size brackets that are the quantum size apart.
-  static constexpr size_t kNumOfQuantumSizeBrackets = 32;
+  // The number of size brackets.
+  static constexpr size_t kNumOfSizeBrackets = 42;
   // The sizes (the slot sizes, in bytes) of the size brackets.
   static size_t bracketSizes[kNumOfSizeBrackets];
   // The numbers of pages that are used for runs for each size bracket.
@@ -506,16 +513,23 @@
   }
   // Returns the index of the size bracket from the bracket size.
   static size_t BracketSizeToIndex(size_t size) {
-    DCHECK(16 <= size && ((size < 1 * KB && size % 16 == 0) || size == 1 * KB || size == 2 * KB));
+    DCHECK(8 <= size &&
+           ((size <= kMaxThreadLocalBracketSize && size % kThreadLocalBracketQuantumSize == 0) ||
+            (size <= kMaxRegularBracketSize && size % kBracketQuantumSize == 0) ||
+            size == 1 * KB || size == 2 * KB));
     size_t idx;
     if (UNLIKELY(size == 1 * KB)) {
       idx = kNumOfSizeBrackets - 2;
     } else if (UNLIKELY(size == 2 * KB)) {
       idx = kNumOfSizeBrackets - 1;
+    } else if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      DCHECK_EQ(size % kThreadLocalBracketQuantumSize, 0U);
+      idx = size / kThreadLocalBracketQuantumSize - 1;
     } else {
-      DCHECK(size < 1 * KB);
-      DCHECK_EQ(size % 16, static_cast<size_t>(0));
-      idx = size / 16 - 1;
+      DCHECK(size <= kMaxRegularBracketSize);
+      DCHECK_EQ((size - kMaxThreadLocalBracketSize) % kBracketQuantumSize, 0U);
+      idx = ((size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
+          + kNumThreadLocalSizeBrackets;
     }
     DCHECK(bracketSizes[idx] == size);
     return idx;
@@ -530,51 +544,64 @@
   // Rounds up the size up the nearest bracket size.
   static size_t RoundToBracketSize(size_t size) {
     DCHECK(size <= kLargeSizeThreshold);
-    if (LIKELY(size <= 512)) {
-      return RoundUp(size, 16);
-    } else if (512 < size && size <= 1 * KB) {
+    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      return RoundUp(size, kThreadLocalBracketQuantumSize);
+    } else if (size <= kMaxRegularBracketSize) {
+      return RoundUp(size, kBracketQuantumSize);
+    } else if (UNLIKELY(size <= 1 * KB)) {
       return 1 * KB;
     } else {
-      DCHECK(1 * KB < size && size <= 2 * KB);
+      DCHECK_LE(size, 2 * KB);
       return 2 * KB;
     }
   }
   // Returns the size bracket index from the byte size with rounding.
   static size_t SizeToIndex(size_t size) {
     DCHECK(size <= kLargeSizeThreshold);
-    if (LIKELY(size <= 512)) {
-      return RoundUp(size, 16) / 16 - 1;
-    } else if (512 < size && size <= 1 * KB) {
+    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      return RoundUp(size, kThreadLocalBracketQuantumSize) / kThreadLocalBracketQuantumSize - 1;
+    } else if (size <= kMaxRegularBracketSize) {
+      return (RoundUp(size, kBracketQuantumSize) - kMaxThreadLocalBracketSize) / kBracketQuantumSize
+          - 1 + kNumThreadLocalSizeBrackets;
+    } else if (size <= 1 * KB) {
       return kNumOfSizeBrackets - 2;
     } else {
-      DCHECK(1 * KB < size && size <= 2 * KB);
+      DCHECK_LE(size, 2 * KB);
       return kNumOfSizeBrackets - 1;
     }
   }
   // A combination of SizeToIndex() and RoundToBracketSize().
   static size_t SizeToIndexAndBracketSize(size_t size, size_t* bracket_size_out) {
     DCHECK(size <= kLargeSizeThreshold);
-    if (LIKELY(size <= 512)) {
-      size_t bracket_size = RoundUp(size, 16);
-      *bracket_size_out = bracket_size;
-      size_t idx = bracket_size / 16 - 1;
-      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-      return idx;
-    } else if (512 < size && size <= 1 * KB) {
-      size_t bracket_size = 1024;
-      *bracket_size_out = bracket_size;
-      size_t idx = kNumOfSizeBrackets - 2;
-      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-      return idx;
+    size_t idx;
+    size_t bracket_size;
+    if (LIKELY(size <= kMaxThreadLocalBracketSize)) {
+      bracket_size = RoundUp(size, kThreadLocalBracketQuantumSize);
+      idx = bracket_size / kThreadLocalBracketQuantumSize - 1;
+    } else if (size <= kMaxRegularBracketSize) {
+      bracket_size = RoundUp(size, kBracketQuantumSize);
+      idx = ((bracket_size - kMaxThreadLocalBracketSize) / kBracketQuantumSize - 1)
+          + kNumThreadLocalSizeBrackets;
+    } else if (size <= 1 * KB) {
+      bracket_size = 1 * KB;
+      idx = kNumOfSizeBrackets - 2;
     } else {
-      DCHECK(1 * KB < size && size <= 2 * KB);
-      size_t bracket_size = 2048;
-      *bracket_size_out = bracket_size;
-      size_t idx = kNumOfSizeBrackets - 1;
-      DCHECK_EQ(bracket_size, IndexToBracketSize(idx));
-      return idx;
+      DCHECK(size <= 2 * KB);
+      bracket_size = 2 * KB;
+      idx = kNumOfSizeBrackets - 1;
     }
+    DCHECK_EQ(idx, SizeToIndex(size)) << idx;
+    DCHECK_EQ(bracket_size, IndexToBracketSize(idx)) << idx;
+    DCHECK_EQ(bracket_size, bracketSizes[idx]) << idx;
+    DCHECK_LE(size, bracket_size) << idx;
+    DCHECK(size > kMaxRegularBracketSize ||
+           (size <= kMaxThreadLocalBracketSize &&
+            bracket_size - size < kThreadLocalBracketQuantumSize) ||
+           (size <= kMaxRegularBracketSize && bracket_size - size < kBracketQuantumSize)) << idx;
+    *bracket_size_out = bracket_size;
+    return idx;
   }
+
   // Returns the page map index from an address. Requires that the
   // address is page size aligned.
   size_t ToPageMapIndex(const void* addr) const {
@@ -630,18 +657,37 @@
   // The default value for page_release_size_threshold_.
   static constexpr size_t kDefaultPageReleaseSizeThreshold = 4 * MB;
 
-  // We use thread-local runs for the size Brackets whose indexes
+  // We use thread-local runs for the size brackets whose indexes
   // are less than this index. We use shared (current) runs for the rest.
-  static const size_t kNumThreadLocalSizeBrackets = 8;
+  // Sync this with the length of Thread::rosalloc_runs_.
+  static const size_t kNumThreadLocalSizeBrackets = 16;
+  static_assert(kNumThreadLocalSizeBrackets == kNumRosAllocThreadLocalSizeBracketsInThread,
+                "Mismatch between kNumThreadLocalSizeBrackets and "
+                "kNumRosAllocThreadLocalSizeBracketsInThread");
 
   // The size of the largest bracket we use thread-local runs for.
   // This should be equal to bracketSizes[kNumThreadLocalSizeBrackets - 1].
   static const size_t kMaxThreadLocalBracketSize = 128;
 
-  // The bracket size increment for the brackets of size <= 512 bytes.
+  // We use regular (8 or 16-bytes increment) runs for the size brackets whose indexes are less than
+  // this index.
+  static const size_t kNumRegularSizeBrackets = 40;
+
+  // The size of the largest regular (8 or 16-byte increment) bracket. Non-regular brackets are the
+  // 1 KB and the 2 KB brackets. This should be equal to bracketSizes[kNumRegularSizeBrackets - 1].
+  static const size_t kMaxRegularBracketSize = 512;
+
+  // The bracket size increment for the thread-local brackets (<= kMaxThreadLocalBracketSize bytes).
+  static constexpr size_t kThreadLocalBracketQuantumSize = 8;
+
+  // Equal to Log2(kThreadLocalBracketQuantumSize).
+  static constexpr size_t kThreadLocalBracketQuantumSizeShift = 3;
+
+  // The bracket size increment for the non-thread-local, regular brackets (of size <=
+  // kMaxRegularBracketSize bytes and > kMaxThreadLocalBracketSize bytes).
   static constexpr size_t kBracketQuantumSize = 16;
 
-  // Equal to Log2(kQuantumBracketSizeIncrement).
+  // Equal to Log2(kBracketQuantumSize).
   static constexpr size_t kBracketQuantumSizeShift = 4;
 
  private: