ART: Fix valgrind

Allow ValgrindMallocSpace wrapper for RosAlloc.Requires refactoring,
as ValgrindMallocSpace was bound to the signature of DlMallocSpace.

Also turn of native stack dumping when running under Valgrind to
work around b/18119146.

Ritzperf before and after
Mean               3190.725   3082.475
Standard Error     11.68407   10.37911
Mode               3069       2980
Median             3182.5     3051.5
Variance           16382.117  12927.125
Standard Deviation 127.99264  113.69751
Kurtosis           1.1065632  0.3657799
Skewness           0.9013805  0.9117792
Range              644        528
Minimum            2991       2928
Maximum            3635       3456
Count              120        120

Bug: 18119146
Change-Id: I25558ea7cb578406011dede9d3d0bdbfee4ff4d5
diff --git a/runtime/gc/allocator/rosalloc-inl.h b/runtime/gc/allocator/rosalloc-inl.h
index dd419a4..f6c9d3c 100644
--- a/runtime/gc/allocator/rosalloc-inl.h
+++ b/runtime/gc/allocator/rosalloc-inl.h
@@ -23,6 +23,10 @@
 namespace gc {
 namespace allocator {
 
+inline ALWAYS_INLINE bool RosAlloc::ShouldCheckZeroMemory() {
+  return kCheckZeroMemory && !running_on_valgrind_;
+}
+
 template<bool kThreadSafe>
 inline ALWAYS_INLINE void* RosAlloc::Alloc(Thread* self, size_t size, size_t* bytes_allocated) {
   if (UNLIKELY(size > kLargeSizeThreshold)) {
@@ -35,7 +39,7 @@
     m = AllocFromRunThreadUnsafe(self, size, bytes_allocated);
   }
   // Check if the returned memory is really all zero.
-  if (kCheckZeroMemory && m != nullptr) {
+  if (ShouldCheckZeroMemory() && m != nullptr) {
     uint8_t* bytes = reinterpret_cast<uint8_t*>(m);
     for (size_t i = 0; i < size; ++i) {
       DCHECK_EQ(bytes[i], 0);
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index f9d6a51..991b956 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -14,13 +14,15 @@
  * limitations under the License.
  */
 
+#include "rosalloc.h"
+
 #include "base/mutex-inl.h"
+#include "gc/space/valgrind_settings.h"
 #include "mirror/class-inl.h"
 #include "mirror/object.h"
 #include "mirror/object-inl.h"
 #include "thread-inl.h"
 #include "thread_list.h"
-#include "rosalloc.h"
 
 #include <map>
 #include <list>
@@ -47,13 +49,15 @@
     reinterpret_cast<RosAlloc::Run*>(dedicated_full_run_storage_);
 
 RosAlloc::RosAlloc(void* base, size_t capacity, size_t max_capacity,
-                   PageReleaseMode page_release_mode, size_t page_release_size_threshold)
+                   PageReleaseMode page_release_mode, bool running_on_valgrind,
+                   size_t page_release_size_threshold)
     : base_(reinterpret_cast<uint8_t*>(base)), footprint_(capacity),
       capacity_(capacity), max_capacity_(max_capacity),
       lock_("rosalloc global lock", kRosAllocGlobalLock),
       bulk_free_lock_("rosalloc bulk free lock", kRosAllocBulkFreeLock),
       page_release_mode_(page_release_mode),
-      page_release_size_threshold_(page_release_size_threshold) {
+      page_release_size_threshold_(page_release_size_threshold),
+      running_on_valgrind_(running_on_valgrind) {
   DCHECK_EQ(RoundUp(capacity, kPageSize), capacity);
   DCHECK_EQ(RoundUp(max_capacity, kPageSize), max_capacity);
   CHECK_LE(capacity, max_capacity);
@@ -317,7 +321,7 @@
   }
   const size_t byte_size = num_pages * kPageSize;
   if (already_zero) {
-    if (kCheckZeroMemory) {
+    if (ShouldCheckZeroMemory()) {
       const uintptr_t* word_ptr = reinterpret_cast<uintptr_t*>(ptr);
       for (size_t i = 0; i < byte_size / sizeof(uintptr_t); ++i) {
         CHECK_EQ(word_ptr[i], 0U) << "words don't match at index " << i;
@@ -471,7 +475,7 @@
               << "(" << std::dec << (num_pages * kPageSize) << ")";
   }
   // Check if the returned memory is really all zero.
-  if (kCheckZeroMemory) {
+  if (ShouldCheckZeroMemory()) {
     CHECK_EQ(total_bytes % sizeof(uintptr_t), 0U);
     const uintptr_t* words = reinterpret_cast<uintptr_t*>(r);
     for (size_t i = 0; i < total_bytes / sizeof(uintptr_t); ++i) {
@@ -1433,7 +1437,7 @@
   return stream.str();
 }
 
-size_t RosAlloc::UsableSize(void* ptr) {
+size_t RosAlloc::UsableSize(const void* ptr) {
   DCHECK_LE(base_, ptr);
   DCHECK_LT(ptr, base_ + footprint_);
   size_t pm_idx = RoundDownToPageMapIndex(ptr);
@@ -1470,7 +1474,7 @@
       Run* run = reinterpret_cast<Run*>(base_ + pm_idx * kPageSize);
       DCHECK_EQ(run->magic_num_, kMagicNum);
       size_t idx = run->size_bracket_idx_;
-      size_t offset_from_slot_base = reinterpret_cast<uint8_t*>(ptr)
+      size_t offset_from_slot_base = reinterpret_cast<const uint8_t*>(ptr)
           - (reinterpret_cast<uint8_t*>(run) + headerSizes[idx]);
       DCHECK_EQ(offset_from_slot_base % bracketSizes[idx], static_cast<size_t>(0));
       return IndexToBracketSize(idx);
@@ -1915,10 +1919,15 @@
             num_pages++;
             idx++;
           }
-          void* start = base_ + i * kPageSize;
+          uint8_t* start = base_ + i * kPageSize;
+          if (running_on_valgrind_) {
+            start += ::art::gc::space::kDefaultValgrindRedZoneBytes;
+          }
           mirror::Object* obj = reinterpret_cast<mirror::Object*>(start);
           size_t obj_size = obj->SizeOf();
-          CHECK_GT(obj_size, kLargeSizeThreshold)
+          CHECK_GT(obj_size +
+                   (running_on_valgrind_ ? 2 * ::art::gc::space::kDefaultValgrindRedZoneBytes : 0),
+                   kLargeSizeThreshold)
               << "A rosalloc large object size must be > " << kLargeSizeThreshold;
           CHECK_EQ(num_pages, RoundUp(obj_size, kPageSize) / kPageSize)
               << "A rosalloc large object size " << obj_size
@@ -1986,11 +1995,11 @@
   }
   // Call Verify() here for the lock order.
   for (auto& run : runs) {
-    run->Verify(self, this);
+    run->Verify(self, this, running_on_valgrind_);
   }
 }
 
-void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc) {
+void RosAlloc::Run::Verify(Thread* self, RosAlloc* rosalloc, bool running_on_valgrind) {
   DCHECK_EQ(magic_num_, kMagicNum) << "Bad magic number : " << Dump();
   const size_t idx = size_bracket_idx_;
   CHECK_LT(idx, kNumOfSizeBrackets) << "Out of range size bracket index : " << Dump();
@@ -2073,6 +2082,9 @@
   }
   // Check each slot.
   size_t slots = 0;
+  size_t valgrind_modifier = running_on_valgrind ?
+      2 * ::art::gc::space::kDefaultValgrindRedZoneBytes :
+      0U;
   for (size_t v = 0; v < num_vec; v++, slots += 32) {
     DCHECK_GE(num_slots, slots) << "Out of bounds";
     uint32_t vec = alloc_bit_map_[v];
@@ -2085,14 +2097,17 @@
       bool is_thread_local_freed = IsThreadLocal() && ((thread_local_free_vec >> i) & 0x1) != 0;
       if (is_allocated && !is_thread_local_freed) {
         uint8_t* slot_addr = slot_base + (slots + i) * bracket_size;
+        if (running_on_valgrind) {
+          slot_addr += ::art::gc::space::kDefaultValgrindRedZoneBytes;
+        }
         mirror::Object* obj = reinterpret_cast<mirror::Object*>(slot_addr);
         size_t obj_size = obj->SizeOf();
-        CHECK_LE(obj_size, kLargeSizeThreshold)
+        CHECK_LE(obj_size + valgrind_modifier, kLargeSizeThreshold)
             << "A run slot contains a large object " << Dump();
-        CHECK_EQ(SizeToIndex(obj_size), idx)
+        CHECK_EQ(SizeToIndex(obj_size + valgrind_modifier), idx)
             << PrettyTypeOf(obj) << " "
-            << "obj_size=" << obj_size << ", idx=" << idx << " "
-            << "A run slot contains an object with wrong size " << Dump();
+            << "obj_size=" << obj_size << "(" << obj_size + valgrind_modifier << "), idx=" << idx
+            << " A run slot contains an object with wrong size " << Dump();
       }
     }
   }
@@ -2162,6 +2177,11 @@
     // In the debug build, the first page of a free page run
     // contains a magic number for debugging. Exclude it.
     start += kPageSize;
+
+    // Single pages won't be released.
+    if (start == end) {
+      return 0;
+    }
   }
   if (!kMadviseZeroes) {
     // TODO: Do this when we resurrect the page instead.
diff --git a/runtime/gc/allocator/rosalloc.h b/runtime/gc/allocator/rosalloc.h
index 2a0bf10..431686a 100644
--- a/runtime/gc/allocator/rosalloc.h
+++ b/runtime/gc/allocator/rosalloc.h
@@ -249,7 +249,7 @@
     // Dump the run metadata for debugging.
     std::string Dump();
     // Verify for debugging.
-    void Verify(Thread* self, RosAlloc* rosalloc)
+    void Verify(Thread* self, RosAlloc* rosalloc, bool running_on_valgrind)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::mutator_lock_)
         EXCLUSIVE_LOCKS_REQUIRED(Locks::thread_list_lock_);
 
@@ -360,13 +360,14 @@
   // Returns the page map index from an address. Requires that the
   // address is page size aligned.
   size_t ToPageMapIndex(const void* addr) const {
-    DCHECK(base_ <= addr && addr < base_ + capacity_);
+    DCHECK_LE(base_, addr);
+    DCHECK_LT(addr, base_ + capacity_);
     size_t byte_offset = reinterpret_cast<const uint8_t*>(addr) - base_;
     DCHECK_EQ(byte_offset % static_cast<size_t>(kPageSize), static_cast<size_t>(0));
     return byte_offset / kPageSize;
   }
   // Returns the page map index from an address with rounding.
-  size_t RoundDownToPageMapIndex(void* addr) const {
+  size_t RoundDownToPageMapIndex(const void* addr) const {
     DCHECK(base_ <= addr && addr < reinterpret_cast<uint8_t*>(base_) + capacity_);
     return (reinterpret_cast<uintptr_t>(addr) - reinterpret_cast<uintptr_t>(base_)) / kPageSize;
   }
@@ -377,6 +378,10 @@
 
   // If true, check that the returned memory is actually zero.
   static constexpr bool kCheckZeroMemory = kIsDebugBuild;
+  // Valgrind protects memory, so do not check memory when running under valgrind. In a normal
+  // build with kCheckZeroMemory the whole test should be optimized away.
+  // TODO: Unprotect before checks.
+  ALWAYS_INLINE bool ShouldCheckZeroMemory();
 
   // If true, log verbose details of operations.
   static constexpr bool kTraceRosAlloc = false;
@@ -485,6 +490,9 @@
   // greater than or equal to this value, release pages.
   const size_t page_release_size_threshold_;
 
+  // Whether this allocator is running under Valgrind.
+  bool running_on_valgrind_;
+
   // The base address of the memory region that's managed by this allocator.
   uint8_t* Begin() { return base_; }
   // The end address of the memory region that's managed by this allocator.
@@ -537,6 +545,7 @@
  public:
   RosAlloc(void* base, size_t capacity, size_t max_capacity,
            PageReleaseMode page_release_mode,
+           bool running_on_valgrind,
            size_t page_release_size_threshold = kDefaultPageReleaseSizeThreshold);
   ~RosAlloc();
 
@@ -551,7 +560,7 @@
       LOCKS_EXCLUDED(bulk_free_lock_);
 
   // Returns the size of the allocated slot for a given allocated memory chunk.
-  size_t UsableSize(void* ptr);
+  size_t UsableSize(const void* ptr);
   // Returns the size of the allocated slot for a given size.
   size_t UsableSize(size_t bytes) {
     if (UNLIKELY(bytes > kLargeSizeThreshold)) {