separate madvise calling from critical section

madvise at large space could spend more time,
and let critical section takes longer

separate it out to speed up.

Bug: 158620760
Bug: 158861776
Test: art/test.py --host
Change-Id: I6adfb0e1f962c2fbf6ee559c56755285761b4220
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 9616ec1..d1b4d7c 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -425,7 +425,6 @@
 }
 
 size_t FreeListSpace::Free(Thread* self, mirror::Object* obj) {
-  MutexLock mu(self, lock_);
   DCHECK(Contains(obj)) << reinterpret_cast<void*>(Begin()) << " " << obj << " "
                         << reinterpret_cast<void*>(End());
   DCHECK_ALIGNED(obj, kAlignment);
@@ -434,6 +433,15 @@
   const size_t allocation_size = info->ByteSize();
   DCHECK_GT(allocation_size, 0U);
   DCHECK_ALIGNED(allocation_size, kAlignment);
+
+  // madvise the pages without lock
+  madvise(obj, allocation_size, MADV_DONTNEED);
+  if (kIsDebugBuild) {
+    // Can't disallow reads since we use them to find next chunks during coalescing.
+    CheckedCall(mprotect, __FUNCTION__, obj, allocation_size, PROT_READ);
+  }
+
+  MutexLock mu(self, lock_);
   info->SetByteSize(allocation_size, true);  // Mark as free.
   // Look at the next chunk.
   AllocationInfo* next_info = info->GetNextInfo();
@@ -475,11 +483,6 @@
   --num_objects_allocated_;
   DCHECK_LE(allocation_size, num_bytes_allocated_);
   num_bytes_allocated_ -= allocation_size;
-  madvise(obj, allocation_size, MADV_DONTNEED);
-  if (kIsDebugBuild) {
-    // Can't disallow reads since we use them to find next chunks during coalescing.
-    CheckedCall(mprotect, __FUNCTION__, obj, allocation_size, PROT_READ);
-  }
   return allocation_size;
 }