android: binder: Add global lru shrinker to binder

Hold on to the pages allocated and mapped for transaction
buffers until the system is under memory pressure. When
that happens, use linux shrinker to free pages. Without
using shrinker, patch "android: binder: Move buffer out
of area shared with user space" will cause a significant
slow down for small transactions that fit into the first
page because free list buffer header used to be inlined
with buffer data.

In addition to prevent the performance regression for
small transactions, this patch improves the performance
for transactions that take up more than one page.

Modify alloc selftest to work with the shrinker change.

Test: Run memory intensive applications (Chrome and Camera)
to trigger shrinker callbacks. Binder frees memory as expected.
Test: Run binderThroughputTest with high memory pressure
option enabled.

Signed-off-by: Sherry Yang <sherryy@android.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index e4b6055..ba9e613 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -5243,6 +5243,8 @@
 	struct binder_device *device;
 	struct hlist_node *tmp;
 
+	binder_alloc_shrinker_init();
+
 	atomic_set(&binder_transaction_log.cur, ~0U);
 	atomic_set(&binder_transaction_log_failed.cur, ~0U);
 
diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c
index e966592..11a08bf 100644
--- a/drivers/android/binder_alloc.c
+++ b/drivers/android/binder_alloc.c
@@ -27,9 +27,12 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/list_lru.h>
 #include "binder_alloc.h"
 #include "binder_trace.h"
 
+struct list_lru binder_alloc_lru;
+
 static DEFINE_MUTEX(binder_alloc_mmap_lock);
 
 enum {
@@ -188,8 +191,9 @@
 {
 	void *page_addr;
 	unsigned long user_page_addr;
-	struct page **page;
-	struct mm_struct *mm;
+	struct binder_lru_page *page;
+	struct mm_struct *mm = NULL;
+	bool need_mm = false;
 
 	binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
 		     "%d: %s pages %pK-%pK\n", alloc->pid,
@@ -200,9 +204,18 @@
 
 	trace_binder_update_page_range(alloc, allocate, start, end);
 
-	if (vma)
-		mm = NULL;
-	else
+	if (allocate == 0)
+		goto free_range;
+
+	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
+		page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE];
+		if (!page->page_ptr) {
+			need_mm = true;
+			break;
+		}
+	}
+
+	if (!vma && need_mm)
 		mm = get_task_mm(alloc->tsk);
 
 	if (mm) {
@@ -215,10 +228,7 @@
 		}
 	}
 
-	if (allocate == 0)
-		goto free_range;
-
-	if (vma == NULL) {
+	if (!vma && need_mm) {
 		pr_err("%d: binder_alloc_buf failed to map pages in userspace, no vma\n",
 			alloc->pid);
 		goto err_no_vma;
@@ -226,18 +236,33 @@
 
 	for (page_addr = start; page_addr < end; page_addr += PAGE_SIZE) {
 		int ret;
+		bool on_lru;
 
 		page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE];
 
-		BUG_ON(*page);
-		*page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
-		if (*page == NULL) {
+		if (page->page_ptr) {
+			on_lru = list_lru_del(&binder_alloc_lru, &page->lru);
+			WARN_ON(!on_lru);
+			continue;
+		}
+
+		if (WARN_ON(!vma))
+			goto err_page_ptr_cleared;
+
+		page->page_ptr = alloc_page(GFP_KERNEL |
+					    __GFP_HIGHMEM |
+					    __GFP_ZERO);
+		if (!page->page_ptr) {
 			pr_err("%d: binder_alloc_buf failed for page at %pK\n",
 				alloc->pid, page_addr);
 			goto err_alloc_page_failed;
 		}
+		page->alloc = alloc;
+		INIT_LIST_HEAD(&page->lru);
+
 		ret = map_kernel_range_noflush((unsigned long)page_addr,
-					PAGE_SIZE, PAGE_KERNEL, page);
+					       PAGE_SIZE, PAGE_KERNEL,
+					       &page->page_ptr);
 		flush_cache_vmap((unsigned long)page_addr,
 				(unsigned long)page_addr + PAGE_SIZE);
 		if (ret != 1) {
@@ -247,7 +272,7 @@
 		}
 		user_page_addr =
 			(uintptr_t)page_addr + alloc->user_buffer_offset;
-		ret = vm_insert_page(vma, user_page_addr, page[0]);
+		ret = vm_insert_page(vma, user_page_addr, page[0].page_ptr);
 		if (ret) {
 			pr_err("%d: binder_alloc_buf failed to map page at %lx in userspace\n",
 			       alloc->pid, user_page_addr);
@@ -264,16 +289,21 @@
 free_range:
 	for (page_addr = end - PAGE_SIZE; page_addr >= start;
 	     page_addr -= PAGE_SIZE) {
+		bool ret;
+
 		page = &alloc->pages[(page_addr - alloc->buffer) / PAGE_SIZE];
-		if (vma)
-			zap_page_range(vma, (uintptr_t)page_addr +
-				alloc->user_buffer_offset, PAGE_SIZE);
+
+		ret = list_lru_add(&binder_alloc_lru, &page->lru);
+		WARN_ON(!ret);
+		continue;
+
 err_vm_insert_page_failed:
 		unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
 err_map_kernel_failed:
-		__free_page(*page);
-		*page = NULL;
+		__free_page(page->page_ptr);
+		page->page_ptr = NULL;
 err_alloc_page_failed:
+err_page_ptr_cleared:
 		;
 	}
 err_no_vma:
@@ -731,16 +761,20 @@
 
 		for (i = 0; i < alloc->buffer_size / PAGE_SIZE; i++) {
 			void *page_addr;
+			bool on_lru;
 
-			if (!alloc->pages[i])
+			if (!alloc->pages[i].page_ptr)
 				continue;
 
+			on_lru = list_lru_del(&binder_alloc_lru,
+					      &alloc->pages[i].lru);
 			page_addr = alloc->buffer + i * PAGE_SIZE;
 			binder_alloc_debug(BINDER_DEBUG_BUFFER_ALLOC,
-				     "%s: %d: page %d at %pK not freed\n",
-				     __func__, alloc->pid, i, page_addr);
+				     "%s: %d: page %d at %pK %s\n",
+				     __func__, alloc->pid, i, page_addr,
+				     on_lru ? "on lru" : "active");
 			unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE);
-			__free_page(alloc->pages[i]);
+			__free_page(alloc->pages[i].page_ptr);
 			page_count++;
 		}
 		kfree(alloc->pages);
@@ -817,6 +851,93 @@
 }
 
 /**
+ * binder_alloc_free_page() - shrinker callback to free pages
+ * @item:   item to free
+ * @lock:   lock protecting the item
+ * @cb_arg: callback argument
+ *
+ * Called from list_lru_walk() in binder_shrink_scan() to free
+ * up pages when the system is under memory pressure.
+ */
+enum lru_status binder_alloc_free_page(struct list_head *item,
+				       struct list_lru_one *lru,
+				       spinlock_t *lock,
+				       void *cb_arg)
+{
+	struct mm_struct *mm = NULL;
+	struct binder_lru_page *page = container_of(item,
+						    struct binder_lru_page,
+						    lru);
+	struct binder_alloc *alloc;
+	uintptr_t page_addr;
+	size_t index;
+
+	alloc = page->alloc;
+	if (!mutex_trylock(&alloc->mutex))
+		goto err_get_alloc_mutex_failed;
+
+	if (!page->page_ptr)
+		goto err_page_already_freed;
+
+	index = page - alloc->pages;
+	page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE;
+	if (alloc->vma) {
+		mm = get_task_mm(alloc->tsk);
+		if (!mm)
+			goto err_get_task_mm_failed;
+		if (!down_write_trylock(&mm->mmap_sem))
+			goto err_down_write_mmap_sem_failed;
+
+		zap_page_range(alloc->vma,
+			       page_addr + alloc->user_buffer_offset,
+			       PAGE_SIZE);
+
+		up_write(&mm->mmap_sem);
+		mmput(mm);
+	}
+
+	unmap_kernel_range(page_addr, PAGE_SIZE);
+	__free_page(page->page_ptr);
+	page->page_ptr = NULL;
+
+	list_lru_isolate(lru, item);
+
+	mutex_unlock(&alloc->mutex);
+	return LRU_REMOVED;
+
+err_down_write_mmap_sem_failed:
+	mmput(mm);
+err_get_task_mm_failed:
+err_page_already_freed:
+	mutex_unlock(&alloc->mutex);
+err_get_alloc_mutex_failed:
+	return LRU_SKIP;
+}
+
+static unsigned long
+binder_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+	unsigned long ret = list_lru_count(&binder_alloc_lru);
+	return ret;
+}
+
+static unsigned long
+binder_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+	unsigned long ret;
+
+	ret = list_lru_walk(&binder_alloc_lru, binder_alloc_free_page,
+			    NULL, sc->nr_to_scan);
+	return ret;
+}
+
+struct shrinker binder_shrinker = {
+	.count_objects = binder_shrink_count,
+	.scan_objects = binder_shrink_scan,
+	.seeks = DEFAULT_SEEKS,
+};
+
+/**
  * binder_alloc_init() - called by binder_open() for per-proc initialization
  * @alloc: binder_alloc for this proc
  *
@@ -830,3 +951,8 @@
 	mutex_init(&alloc->mutex);
 }
 
+void binder_alloc_shrinker_init(void)
+{
+	list_lru_init(&binder_alloc_lru);
+	register_shrinker(&binder_shrinker);
+}
diff --git a/drivers/android/binder_alloc.h b/drivers/android/binder_alloc.h
index dd5649b..fa707cc 100644
--- a/drivers/android/binder_alloc.h
+++ b/drivers/android/binder_alloc.h
@@ -21,7 +21,9 @@
 #include <linux/rtmutex.h>
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
+#include <linux/list_lru.h>
 
+extern struct list_lru binder_alloc_lru;
 struct binder_transaction;
 
 /**
@@ -61,6 +63,18 @@
 };
 
 /**
+ * struct binder_lru_page - page object used for binder shrinker
+ * @page_ptr: pointer to physical page in mmap'd space
+ * @lru:      entry in binder_alloc_lru
+ * @alloc:    binder_alloc for a proc
+ */
+struct binder_lru_page {
+	struct list_head lru;
+	struct page *page_ptr;
+	struct binder_alloc *alloc;
+};
+
+/**
  * struct binder_alloc - per-binder proc state for binder allocator
  * @vma:                vm_area_struct passed to mmap_handler
  *                      (invarient after mmap)
@@ -75,8 +89,7 @@
  * @allocated_buffers:  rb tree of allocated buffers sorted by address
  * @free_async_space:   VA space available for async buffers. This is
  *                      initialized at mmap time to 1/2 the full VA space
- * @pages:              array of physical page addresses for each
- *                      page of mmap'd space
+ * @pages:              array of binder_lru_page
  * @buffer_size:        size of address space specified via mmap
  * @pid:                pid for associated binder_proc (invariant after init)
  *
@@ -96,7 +109,7 @@
 	struct rb_root free_buffers;
 	struct rb_root allocated_buffers;
 	size_t free_async_space;
-	struct page **pages;
+	struct binder_lru_page *pages;
 	size_t buffer_size;
 	uint32_t buffer_free;
 	int pid;
@@ -107,12 +120,16 @@
 #else
 static inline void binder_selftest_alloc(struct binder_alloc *alloc) {}
 #endif
+enum lru_status binder_alloc_free_page(struct list_head *item,
+				       struct list_lru_one *lru,
+				       spinlock_t *lock, void *cb_arg);
 extern struct binder_buffer *binder_alloc_new_buf(struct binder_alloc *alloc,
 						  size_t data_size,
 						  size_t offsets_size,
 						  size_t extra_buffers_size,
 						  int is_async);
 extern void binder_alloc_init(struct binder_alloc *alloc);
+void binder_alloc_shrinker_init(void);
 extern void binder_alloc_vma_close(struct binder_alloc *alloc);
 extern struct binder_buffer *
 binder_alloc_prepare_to_free(struct binder_alloc *alloc,
diff --git a/drivers/android/binder_alloc_selftest.c b/drivers/android/binder_alloc_selftest.c
index 0bf7207..8bd7bce 100644
--- a/drivers/android/binder_alloc_selftest.c
+++ b/drivers/android/binder_alloc_selftest.c
@@ -109,9 +109,11 @@
 	page_addr = buffer->data;
 	for (; page_addr < end; page_addr += PAGE_SIZE) {
 		page_index = (page_addr - alloc->buffer) / PAGE_SIZE;
-		if (!alloc->pages[page_index]) {
-			pr_err("incorrect alloc state at page index %d\n",
-			       page_index);
+		if (!alloc->pages[page_index].page_ptr ||
+		    !list_empty(&alloc->pages[page_index].lru)) {
+			pr_err("expect alloc but is %s at page index %d\n",
+			       alloc->pages[page_index].page_ptr ?
+			       "lru" : "free", page_index);
 			return false;
 		}
 	}
@@ -137,28 +139,63 @@
 
 static void binder_selftest_free_buf(struct binder_alloc *alloc,
 				     struct binder_buffer *buffers[],
-				     size_t *sizes, int *seq)
+				     size_t *sizes, int *seq, size_t end)
 {
 	int i;
 
 	for (i = 0; i < BUFFER_NUM; i++)
 		binder_alloc_free_buf(alloc, buffers[seq[i]]);
 
+	for (i = 0; i < end / PAGE_SIZE; i++) {
+		/**
+		 * Error message on a free page can be false positive
+		 * if binder shrinker ran during binder_alloc_free_buf
+		 * calls above.
+		 */
+		if (list_empty(&alloc->pages[i].lru)) {
+			pr_err_size_seq(sizes, seq);
+			pr_err("expect lru but is %s at page index %d\n",
+			       alloc->pages[i].page_ptr ? "alloc" : "free", i);
+			binder_selftest_failures++;
+		}
+	}
+}
+
+static void binder_selftest_free_page(struct binder_alloc *alloc)
+{
+	int i;
+	unsigned long count;
+
+	while ((count = list_lru_count(&binder_alloc_lru))) {
+		list_lru_walk(&binder_alloc_lru, binder_alloc_free_page,
+			      NULL, count);
+	}
+
 	for (i = 0; i < (alloc->buffer_size / PAGE_SIZE); i++) {
-		if ((!alloc->pages[i]) == (i == 0)) {
-			pr_err("incorrect free state at page index %d\n", i);
+		if (alloc->pages[i].page_ptr) {
+			pr_err("expect free but is %s at page index %d\n",
+			       list_empty(&alloc->pages[i].lru) ?
+			       "alloc" : "lru", i);
 			binder_selftest_failures++;
 		}
 	}
 }
 
 static void binder_selftest_alloc_free(struct binder_alloc *alloc,
-				       size_t *sizes, int *seq)
+				       size_t *sizes, int *seq, size_t end)
 {
 	struct binder_buffer *buffers[BUFFER_NUM];
 
 	binder_selftest_alloc_buf(alloc, buffers, sizes, seq);
-	binder_selftest_free_buf(alloc, buffers, sizes, seq);
+	binder_selftest_free_buf(alloc, buffers, sizes, seq, end);
+
+	/* Allocate from lru. */
+	binder_selftest_alloc_buf(alloc, buffers, sizes, seq);
+	if (list_lru_count(&binder_alloc_lru))
+		pr_err("lru list should be empty but is not\n");
+
+	binder_selftest_free_buf(alloc, buffers, sizes, seq, end);
+	binder_selftest_free_page(alloc);
 }
 
 static bool is_dup(int *seq, int index, int val)
@@ -174,19 +211,20 @@
 
 /* Generate BUFFER_NUM factorial free orders. */
 static void binder_selftest_free_seq(struct binder_alloc *alloc,
-				     size_t *sizes, int *seq, int index)
+				     size_t *sizes, int *seq,
+				     int index, size_t end)
 {
 	int i;
 
 	if (index == BUFFER_NUM) {
-		binder_selftest_alloc_free(alloc, sizes, seq);
+		binder_selftest_alloc_free(alloc, sizes, seq, end);
 		return;
 	}
 	for (i = 0; i < BUFFER_NUM; i++) {
 		if (is_dup(seq, index, i))
 			continue;
 		seq[index] = i;
-		binder_selftest_free_seq(alloc, sizes, seq, index + 1);
+		binder_selftest_free_seq(alloc, sizes, seq, index + 1, end);
 	}
 }
 
@@ -211,8 +249,9 @@
 	 * we need one giant buffer before getting to the last page.
 	 */
 	back_sizes[0] += alloc->buffer_size - end_offset[BUFFER_NUM - 1];
-	binder_selftest_free_seq(alloc, front_sizes, seq, 0);
-	binder_selftest_free_seq(alloc, back_sizes, seq, 0);
+	binder_selftest_free_seq(alloc, front_sizes, seq, 0,
+				 end_offset[BUFFER_NUM - 1]);
+	binder_selftest_free_seq(alloc, back_sizes, seq, 0, alloc->buffer_size);
 }
 
 static void binder_selftest_alloc_offset(struct binder_alloc *alloc,
@@ -246,7 +285,8 @@
  *
  * Allocate BUFFER_NUM buffers to cover all page alignment cases,
  * then free them in all orders possible. Check that pages are
- * allocated after buffer alloc and freed after freeing buffer.
+ * correctly allocated, put onto lru when buffers are freed, and
+ * are freed when binder_alloc_free_page is called.
  */
 void binder_selftest_alloc(struct binder_alloc *alloc)
 {