percpu: introduce nr_empty_pop_pages to help empty page accounting

pcpu_nr_empty_pop_pages is used to ensure there are a handful of free
pages around to serve atomic allocations. A new field, nr_empty_pop_pages,
is added to the pcpu_chunk struct to keep track of the number of empty
pages. This field is needed as the number of empty populated pages is
globally tracked and deltas are used to update in the bitmap allocator.
Pages that contain a hidden area are not considered to be empty. This
new field is exposed in percpu_stats.

Signed-off-by: Dennis Zhou <dennisszhou@gmail.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
diff --git a/mm/percpu.c b/mm/percpu.c
index 773dafe..657ab08 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -757,11 +757,14 @@
 	chunk->immutable = true;
 	bitmap_fill(chunk->populated, chunk->nr_pages);
 	chunk->nr_populated = chunk->nr_pages;
+	chunk->nr_empty_pop_pages = chunk->nr_pages;
 
 	chunk->contig_hint = chunk->free_size = map_size;
 
 	if (chunk->start_offset) {
 		/* hide the beginning of the bitmap */
+		chunk->nr_empty_pop_pages--;
+
 		chunk->map[0] = 1;
 		chunk->map[1] = chunk->start_offset;
 		chunk->map_used = 1;
@@ -773,6 +776,8 @@
 
 	if (chunk->end_offset) {
 		/* hide the end of the bitmap */
+		chunk->nr_empty_pop_pages--;
+
 		chunk->map[++chunk->map_used] = region_size | 1;
 	}
 
@@ -836,6 +841,7 @@
 
 	bitmap_set(chunk->populated, page_start, nr);
 	chunk->nr_populated += nr;
+	chunk->nr_empty_pop_pages += nr;
 	pcpu_nr_empty_pop_pages += nr;
 }
 
@@ -858,6 +864,7 @@
 
 	bitmap_clear(chunk->populated, page_start, nr);
 	chunk->nr_populated -= nr;
+	chunk->nr_empty_pop_pages -= nr;
 	pcpu_nr_empty_pop_pages -= nr;
 }
 
@@ -1782,9 +1789,7 @@
 
 	/* link the first chunk in */
 	pcpu_first_chunk = chunk;
-	i = (pcpu_first_chunk->start_offset) ? 1 : 0;
-	pcpu_nr_empty_pop_pages +=
-		pcpu_count_occupied_pages(pcpu_first_chunk, i);
+	pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages;
 	pcpu_chunk_relocate(pcpu_first_chunk, -1);
 
 	pcpu_stats_chunk_alloc();