| /* |
| * linux/mm/hpa.c |
| * |
| * Copyright (C) 2015 Samsung Electronics, Inc. All Rights Reserved. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; either version |
| * 2 of the License, or (at your option) any later version. |
| |
| * Does best efforts to allocate required high-order pages. |
| */ |
| |
| #include <linux/list.h> |
| #include <linux/bootmem.h> |
| #include <linux/memblock.h> |
| #include <linux/mm.h> |
| #include <linux/mm_types.h> |
| #include <linux/mmzone.h> |
| #include <linux/migrate.h> |
| #include <linux/memcontrol.h> |
| #include <linux/page-isolation.h> |
| #include <linux/mm_inline.h> |
| #include <linux/swap.h> |
| #include <linux/scatterlist.h> |
| #include <linux/debugfs.h> |
| #include <linux/vmalloc.h> |
| #include <linux/device.h> |
| #include <linux/oom.h> |
| #include <linux/sched/task.h> |
| #include <linux/sched/mm.h> |
| |
| #include "internal.h" |
| |
| #define MAX_SCAN_TRY (2) |
| |
| static unsigned long start_pfn, end_pfn; |
| static unsigned long cached_scan_pfn; |
| |
| #define HPA_MIN_OOMADJ 100 |
| |
| static bool oom_unkillable_task(struct task_struct *p) |
| { |
| if (is_global_init(p)) |
| return true; |
| if (p->flags & PF_KTHREAD) |
| return true; |
| return false; |
| } |
| |
| static bool oom_skip_task(struct task_struct *p, int selected_adj) |
| { |
| if (same_thread_group(p, current)) |
| return true; |
| if (p->signal->oom_score_adj <= HPA_MIN_OOMADJ) |
| return true; |
| if ((p->signal->oom_score_adj < selected_adj) && |
| (selected_adj <= OOM_SCORE_ADJ_MAX)) |
| return true; |
| if (test_bit(MMF_OOM_SKIP, &p->mm->flags)) |
| return true; |
| if (in_vfork(p)) |
| return true; |
| if (p->state & TASK_UNINTERRUPTIBLE) |
| return true; |
| return false; |
| } |
| |
| static int hpa_killer(void) |
| { |
| struct task_struct *tsk, *p; |
| struct task_struct *selected = NULL; |
| unsigned long selected_tasksize = 0; |
| int selected_adj = OOM_SCORE_ADJ_MAX + 1; |
| |
| rcu_read_lock(); |
| for_each_process(tsk) { |
| int tasksize; |
| int current_adj; |
| |
| if (oom_unkillable_task(tsk)) |
| continue; |
| |
| p = find_lock_task_mm(tsk); |
| if (!p) |
| continue; |
| |
| if (oom_skip_task(p, selected_adj)) { |
| task_unlock(p); |
| continue; |
| } |
| |
| tasksize = get_mm_rss(p->mm); |
| tasksize += get_mm_counter(p->mm, MM_SWAPENTS); |
| tasksize += atomic_long_read(&p->mm->nr_ptes); |
| tasksize += mm_nr_pmds(p->mm); |
| current_adj = p->signal->oom_score_adj; |
| |
| task_unlock(p); |
| |
| if (selected && |
| (current_adj == selected_adj) && |
| (tasksize <= selected_tasksize)) |
| continue; |
| |
| if (selected) |
| put_task_struct(selected); |
| |
| selected = p; |
| selected_tasksize = tasksize; |
| selected_adj = current_adj; |
| get_task_struct(selected); |
| } |
| rcu_read_unlock(); |
| |
| if (!selected) { |
| pr_info("HPA: no killable task\n"); |
| return -ESRCH; |
| } |
| |
| pr_info("HPA: Killing '%s' (%d), adj %hd to free %lukB\n", |
| selected->comm, task_pid_nr(selected), selected_adj, |
| selected_tasksize * (PAGE_SIZE / SZ_1K)); |
| |
| do_send_sig_info(SIGKILL, SEND_SIG_FORCED, selected, true); |
| |
| put_task_struct(selected); |
| |
| return 0; |
| } |
| |
| static bool is_movable_chunk(unsigned long pfn, unsigned int order) |
| { |
| struct page *page = pfn_to_page(pfn); |
| struct page *page_end = pfn_to_page(pfn + (1 << order)); |
| |
| while (page != page_end) { |
| if (PageCompound(page) || PageReserved(page)) |
| return false; |
| if (!PageLRU(page) && !__PageMovable(page)) |
| return false; |
| |
| page += PageBuddy(page) ? 1 << page_order(page) : 1; |
| } |
| |
| return true; |
| } |
| |
| static int get_exception_of_page(phys_addr_t phys, |
| phys_addr_t exception_areas[][2], |
| int nr_exception) |
| { |
| int i; |
| |
| for (i = 0; i < nr_exception; i++) |
| if ((exception_areas[i][0] <= phys) && |
| (phys <= exception_areas[i][1])) |
| return i; |
| return -1; |
| } |
| |
| static inline void expand(struct zone *zone, struct page *page, |
| int low, int high, struct free_area *area, |
| int migratetype) |
| { |
| unsigned long size = 1 << high; |
| |
| while (high > low) { |
| area--; |
| high--; |
| size >>= 1; |
| |
| list_add(&page[size].lru, &area->free_list[migratetype]); |
| area->nr_free++; |
| set_page_private(&page[size], high); |
| __SetPageBuddy(&page[size]); |
| } |
| } |
| |
| static struct page *alloc_freepage_one(struct zone *zone, unsigned int order, |
| phys_addr_t exception_areas[][2], |
| int nr_exception) |
| { |
| unsigned int current_order; |
| struct free_area *area; |
| struct page *page; |
| int mt; |
| |
| for (mt = MIGRATE_UNMOVABLE; mt < MIGRATE_PCPTYPES; ++mt) { |
| for (current_order = order; |
| current_order < MAX_ORDER; ++current_order) { |
| area = &(zone->free_area[current_order]); |
| |
| list_for_each_entry(page, &area->free_list[mt], lru) { |
| if (get_exception_of_page(page_to_phys(page), |
| exception_areas, |
| nr_exception) >= 0) |
| continue; |
| |
| list_del(&page->lru); |
| |
| __ClearPageBuddy(page); |
| set_page_private(page, 0); |
| area->nr_free--; |
| expand(zone, page, order, |
| current_order, area, mt); |
| set_pcppage_migratetype(page, mt); |
| |
| return page; |
| } |
| } |
| } |
| |
| return NULL; |
| } |
| |
| static int alloc_freepages_range(struct zone *zone, unsigned int order, |
| struct page **pages, int required, |
| phys_addr_t exception_areas[][2], |
| int nr_exception) |
| |
| { |
| unsigned long wmark; |
| unsigned long flags; |
| struct page *page; |
| int count = 0; |
| |
| spin_lock_irqsave(&zone->lock, flags); |
| |
| while (required > count) { |
| wmark = min_wmark_pages(zone) + (1 << order); |
| if (!zone_watermark_ok(zone, order, wmark, 0, 0)) |
| goto wmark_fail; |
| |
| page = alloc_freepage_one(zone, order, exception_areas, |
| nr_exception); |
| if (!page) |
| break; |
| |
| post_alloc_hook(page, order, GFP_KERNEL); |
| __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order)); |
| pages[count++] = page; |
| __count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order); |
| } |
| |
| wmark_fail: |
| spin_unlock_irqrestore(&zone->lock, flags); |
| |
| return count; |
| } |
| |
| static void prep_highorder_pages(unsigned long base_pfn, int order) |
| { |
| int nr_pages = 1 << order; |
| unsigned long pfn; |
| |
| for (pfn = base_pfn + 1; pfn < base_pfn + nr_pages; pfn++) |
| set_page_count(pfn_to_page(pfn), 0); |
| } |
| |
| /** |
| * alloc_pages_highorder_except() - allocate large order pages |
| * @order: required page order |
| * @pages: array to store allocated @order order pages |
| * @nents: number of @order order pages |
| * @exception_areas: memory areas that should not include pages in @pages |
| * @nr_exception: number of memory areas in @exception_areas |
| * |
| * Returns 0 on allocation success. -error otherwise. |
| * |
| * Allocates @nents pages of @order << PAGE_SHIFT number of consecutive pages |
| * and store the page descriptors of the allocated pages to @pages. Every page |
| * in @pages should also be aligned by @order << PAGE_SHIFT. |
| * |
| * If @nr_exception is larger than 0, alloc_page_highorder_except() does not |
| * allocate pages in the areas described in @exception_areas. @exception_areas |
| * is an array of array with two elements: The first element is the start |
| * address of an area and the last element is the end address. The end address |
| * is the last byte address in the area, that is "[start address] + [size] - 1". |
| */ |
| int alloc_pages_highorder_except(int order, struct page **pages, int nents, |
| phys_addr_t exception_areas[][2], |
| int nr_exception) |
| { |
| struct zone *zone; |
| unsigned int nr_pages = 1 << order; |
| unsigned long total_scanned = 0; |
| unsigned long pfn, tmp; |
| int remained = nents; |
| int ret; |
| int retry_count = 0; |
| int allocated; |
| |
| retry: |
| for_each_zone(zone) { |
| if (zone->spanned_pages == 0) |
| continue; |
| |
| allocated = alloc_freepages_range(zone, order, |
| pages + nents - remained, remained, |
| exception_areas, nr_exception); |
| remained -= allocated; |
| |
| if (remained == 0) |
| return 0; |
| } |
| |
| migrate_prep(); |
| |
| for (pfn = ALIGN(cached_scan_pfn, nr_pages); |
| (total_scanned < (end_pfn - start_pfn) * MAX_SCAN_TRY) |
| && (remained > 0); |
| pfn += nr_pages, total_scanned += nr_pages) { |
| int mt; |
| |
| if (pfn + nr_pages > end_pfn) { |
| pfn = start_pfn; |
| continue; |
| } |
| |
| /* pfn validation check in the range */ |
| tmp = pfn; |
| do { |
| if (!pfn_valid(tmp)) |
| break; |
| } while (++tmp < (pfn + nr_pages)); |
| |
| if (tmp < (pfn + nr_pages)) |
| continue; |
| |
| mt = get_pageblock_migratetype(pfn_to_page(pfn)); |
| /* |
| * CMA pages should not be reclaimed. |
| * Isolated page blocks should not be tried again because it |
| * causes isolated page block remained in isolated state |
| * forever. |
| */ |
| if (is_migrate_cma(mt) || is_migrate_isolate(mt)) { |
| /* nr_pages is added before next iteration */ |
| pfn = ALIGN(pfn + 1, pageblock_nr_pages) - nr_pages; |
| continue; |
| } |
| |
| ret = get_exception_of_page(pfn << PAGE_SHIFT, |
| exception_areas, nr_exception); |
| if (ret >= 0) { |
| pfn = (exception_areas[ret][1] + 1) >> PAGE_SHIFT; |
| pfn -= nr_pages; |
| continue; |
| } |
| |
| if (!is_movable_chunk(pfn, order)) |
| continue; |
| |
| ret = alloc_contig_range_fast(pfn, pfn + nr_pages, mt); |
| if (ret == 0) |
| prep_highorder_pages(pfn, order); |
| else |
| continue; |
| |
| pages[nents - remained] = pfn_to_page(pfn); |
| remained--; |
| } |
| |
| /* save latest scanned pfn */ |
| cached_scan_pfn = pfn; |
| |
| if (remained) { |
| int i; |
| |
| drop_slab(); |
| count_vm_event(DROP_SLAB); |
| ret = hpa_killer(); |
| if (ret == 0) { |
| total_scanned = 0; |
| pr_info("HPA: drop_slab and killer retry %d count\n", |
| retry_count++); |
| goto retry; |
| } |
| |
| for (i = 0; i < (nents - remained); i++) |
| __free_pages(pages[i], order); |
| |
| pr_info("%s: remained=%d / %d, not enough memory in order %d\n", |
| __func__, remained, nents, order); |
| |
| ret = -ENOMEM; |
| } |
| |
| return ret; |
| } |
| |
| int free_pages_highorder(int order, struct page **pages, int nents) |
| { |
| int i; |
| |
| for (i = 0; i < nents; i++) |
| __free_pages(pages[i], order); |
| |
| return 0; |
| } |
| |
| static int __init init_highorder_pages_allocator(void) |
| { |
| struct zone *zone; |
| |
| for_each_zone(zone) { |
| if (zone->spanned_pages == 0) |
| continue; |
| if (zone_idx(zone) == ZONE_MOVABLE) { |
| start_pfn = zone->zone_start_pfn; |
| end_pfn = start_pfn + zone->present_pages; |
| } |
| } |
| |
| if (!start_pfn) { |
| start_pfn = __phys_to_pfn(memblock_start_of_DRAM()); |
| end_pfn = max_pfn; |
| } |
| |
| cached_scan_pfn = start_pfn; |
| |
| return 0; |
| } |
| late_initcall(init_highorder_pages_allocator); |