gpu: arm: Update from P615XXS7FXA1
diff --git a/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c
index 49cb945..df9f41d 100644
--- a/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/*
*
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,12 @@
/*
* Base kernel context APIs
*/
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
@@ -132,13 +138,50 @@ int kbase_context_common_init(struct kbase_context *kctx)
/* creating a context is considered a disjoint event */
kbase_disjoint_event(kctx->kbdev);
- spin_lock_init(&kctx->mm_update_lock);
kctx->process_mm = NULL;
+ kctx->task = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
atomic_set(&kctx->permanent_mapped_pages, 0);
kctx->tgid = current->tgid;
kctx->pid = current->pid;
+ /* Check if this is a Userspace created context */
+ if (likely(kctx->filp)) {
+ struct pid *pid_struct;
+
+ rcu_read_lock();
+ pid_struct = find_get_pid(kctx->tgid);
+ if (likely(pid_struct)) {
+ struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+ if (likely(task)) {
+ /* Take a reference on the task to avoid slow lookup
+ * later on from the page allocation loop.
+ */
+ get_task_struct(task);
+ kctx->task = task;
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get task pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+
+ put_pid(pid_struct);
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get pid pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+ rcu_read_unlock();
+
+ if (unlikely(err))
+ return err;
+ kbase_mem_mmgrab();
+ kctx->process_mm = current->mm;
+ }
+
atomic_set(&kctx->used_pages, 0);
mutex_init(&kctx->reg_lock);
@@ -164,13 +207,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
mutex_init(&kctx->legacy_hwcnt_lock);
mutex_lock(&kctx->kbdev->kctx_list_lock);
-
err = kbase_insert_kctx_to_process(kctx);
- if (err)
- dev_err(kctx->kbdev->dev,
- "(err:%d) failed to insert kctx to kbase_process\n", err);
-
mutex_unlock(&kctx->kbdev->kctx_list_lock);
+ if (err) {
+ dev_err(kctx->kbdev->dev,
+ "(err:%d) failed to insert kctx to kbase_process", err);
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
+ put_task_struct(kctx->task);
+ }
+ }
return err;
}
@@ -254,7 +300,10 @@ void kbase_context_common_term(struct kbase_context *kctx)
kbase_remove_kctx_from_process(kctx);
mutex_unlock(&kctx->kbdev->kctx_list_lock);
-
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
+ put_task_struct(kctx->task);
+ }
KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, kctx->tgid);
}
diff --git a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h
index 6dc57d044..0f593a6 100644
--- a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h
+++ b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -109,10 +109,12 @@
* 11.26
* - Added kinstr_jm API
* 11.27
- * - Backwards compatible extension to HWC ioctl.
- */
+ * Backwards compatible extension to HWC ioctl.
+ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
+ * before allocating GPU memory for the context.
+*/
#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 27
+#define BASE_UK_VERSION_MINOR 38
/**
* struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel
diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h
index c4ef69f..761c8a8 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1470,11 +1470,13 @@ struct kbase_reg_zone {
* Generally the reference count is incremented when the context
* is scheduled in and an atom is pulled from the context's per
* slot runnable tree.
- * @mm_update_lock: lock used for handling of special tracking page.
* @process_mm: Pointer to the memory descriptor of the process which
* created the context. Used for accounting the physical
* pages used for GPU allocations, done for the context,
- * to the memory consumed by the process.
+ * to the memory consumed by the process. A reference is taken
+ * on this descriptor for the Userspace created contexts so that
+ * Kbase can safely access it to update the memory usage counters.
+ * The reference is dropped on context termination.
* @gpu_va_end: End address of the GPU va space (in 4KB page units)
* @jit_va: Indicates if a JIT_VA zone has been created.
* @mem_profile_data: Buffer containing the profiling information provided by
@@ -1603,6 +1605,10 @@ struct kbase_reg_zone {
* @kinstr_jm: Kernel job manager instrumentation context handle
* @tl_kctx_list_node: List item into the device timeline's list of
* contexts, for timeline summarization.
+ * @task: Pointer to the task structure of the main thread of the process
+ * that created the Kbase context. It would be set only for the
+ * contexts created by the Userspace and not for the contexts
+ * created internally by the Kbase.
*
* A kernel base context is an entity among which the GPU is scheduled.
* Each context has its own GPU address space.
@@ -1697,8 +1703,7 @@ struct kbase_context {
atomic_t refcount;
- spinlock_t mm_update_lock;
- struct mm_struct __rcu *process_mm;
+ struct mm_struct *process_mm;
u64 gpu_va_end;
bool jit_va;
@@ -1756,6 +1761,7 @@ struct kbase_context {
#endif
struct kbase_kinstr_jm *kinstr_jm;
struct list_head tl_kctx_list_node;
+ struct task_struct *task;
};
#ifdef CONFIG_MALI_CINSTR_GWT
diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c
index 6d56220..a70bcb9 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1757,6 +1757,7 @@ void kbase_sync_single(struct kbase_context *kctx,
src = ((unsigned char *)kmap(gpu_page)) + offset;
dst = ((unsigned char *)kmap(cpu_page)) + offset;
}
+
memcpy(dst, src, size);
kunmap(gpu_page);
kunmap(cpu_page);
@@ -2148,7 +2149,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
&kctx->mem_pools.large[alloc->group_id],
nr_lp * (SZ_2M / SZ_4K),
tp,
- true);
+ true, kctx->task);
if (res > 0) {
nr_left -= res;
@@ -2202,7 +2203,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
err = kbase_mem_pool_grow(
&kctx->mem_pools.large[alloc->group_id],
- 1);
+ 1, kctx->task);
if (err)
break;
} while (1);
@@ -2249,7 +2250,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
if (nr_left) {
res = kbase_mem_pool_alloc_pages(
&kctx->mem_pools.small[alloc->group_id],
- nr_left, tp, false);
+ nr_left, tp, false, kctx->task);
if (res <= 0)
goto alloc_failed;
}
@@ -3660,7 +3661,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
spin_unlock(&kctx->mem_partials_lock);
kbase_gpu_vm_unlock(kctx);
- ret = kbase_mem_pool_grow(pool, pool_delta);
+ ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
kbase_gpu_vm_lock(kctx);
if (ret)
@@ -4429,10 +4430,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct page **pages;
struct tagged_addr *pa;
long i;
- unsigned long address;
struct device *dev;
- unsigned long offset;
- unsigned long local_size;
unsigned long gwt_mask = ~0;
/* Calls to this function are inherently asynchronous, with respect to
@@ -4449,21 +4447,38 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
- address = alloc->imported.user_buf.address;
pinned_pages = alloc->nents;
pages = alloc->imported.user_buf.pages;
dev = kctx->kbdev->dev;
- offset = address & ~PAGE_MASK;
- local_size = alloc->imported.user_buf.size;
+
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
+
+
for (i = 0; i < pinned_pages; i++) {
dma_addr_t dma_addr;
- unsigned long min;
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
err = dma_mapping_error(dev, dma_addr);
if (err)
goto unwind;
@@ -4471,8 +4486,6 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -4490,10 +4503,26 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
/* fall down */
unwind:
alloc->nents = 0;
+
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This is precautionary measure in case a GPU job has taken
+ * advantage of a partially GPU-mapped range to write and corrupt the
+ * content of memory, either inside or outside the imported region.
+ *
+ * Notice that this error recovery path doesn't try to be optimal and just
+ * flushes the entire page range.
+ */
+
while (i--) {
- dma_unmap_page(kctx->kbdev->dev,
- alloc->imported.user_buf.dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
}
/* The user buffer could already have been previously pinned before
@@ -4519,6 +4548,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
{
long i;
struct page **pages;
+ unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
unsigned long size = alloc->imported.user_buf.size;
lockdep_assert_held(&kctx->reg_lock);
@@ -4532,18 +4562,100 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
#endif
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
- unsigned long local_size;
+ unsigned long imported_size = MIN(size, PAGE_SIZE - offset_within_page);
+ /* Notice: this is a temporary variable that is used for DMA sync
+ * operations, and that could be incremented by an offset if the
+ * current page contains both imported and non-imported memory
+ * sub-regions.
+ *
+ * It is valid to add an offset to this value, because the offset
+ * is always kept within the physically contiguous dma-mapped range
+ * and there's no need to translate to physical address to offset it.
+ *
+ * This variable is not going to be used for the actual DMA unmap
+ * operation, that shall always use the original DMA address of the
+ * whole memory page.
+ */
+
+
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
- dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+ /* Manual CPU cache synchronization.
+ *
+ * When the GPU returns ownership of the buffer to the CPU, the driver
+ * needs to treat imported and non-imported memory differently.
+ *
+ * The first case to consider is non-imported sub-regions at the
+ * beginning of the first page and at the end of last page. For these
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
+ * in order to keep the last CPU write.
+ *
+ * Imported region prefers the opposite treatment: this memory has been
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
+ * committed to memory, while CPU cache shall be invalidated to make
+ * sure that CPU reads the correct memory content.
+ *
+ * The following diagram shows the expect value of the variables
+ * used in this loop in the corner case of an imported region encloed
+ * by a single memory page:
+ *
+ * page boundary ->|---------- | <- dma_addr (initial value)
+ * | |
+ * | - - - - - | <- offset_within_page
+ * |XXXXXXXXXXX|\
+ * |XXXXXXXXXXX| \
+ * |XXXXXXXXXXX| }- imported_size
+ * |XXXXXXXXXXX| /
+ * |XXXXXXXXXXX|/
+ * | - - - - - | <- offset_within_page + imported_size
+ * | |\
+ * | | }- PAGE_SIZE - imported_size - offset_within_page
+ * | |/
+ * page boundary ->|-----------|
+ *
+ * If the imported region is enclosed by more than one page, then
+ * offset_within_page = 0 for any page after the first.
+ */
+ /* Only for first page: handle non-imported range at the beginning. */
+ if (offset_within_page > 0) {
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+ DMA_BIDIRECTIONAL);
+ dma_addr += offset_within_page;
+ }
+
+ /* For every page: handle imported range. */
+ if (imported_size > 0)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+ DMA_BIDIRECTIONAL);
+
+ /* Only for last page (that may coincide with first page):
+ * handle non-imported range at the end.
+ */
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
+ dma_addr += imported_size;
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ PAGE_SIZE - imported_size - offset_within_page,
+ DMA_BIDIRECTIONAL);
+ }
+
+ /* Notice: use the original DMA address to unmap the whole memory page. */
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
DMA_BIDIRECTIONAL);
+ #else
+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
+
+
+
+
if (writeable)
set_page_dirty_lock(pages[i]);
put_page(pages[i]);
pages[i] = NULL;
- size -= local_size;
+ size -= imported_size;
}
alloc->nents = 0;
}
diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h
index 45991a1..d17a94a 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -805,7 +805,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
* @pages: Pointer to array where the physical address of the allocated
* pages will be stored.
* @partial_allowed: If fewer pages allocated is allowed
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ * the pages are being allocated. It can be NULL if the pages
+ * won't be associated with any Kbase context.
* Like kbase_mem_pool_alloc() but optimized for allocating many pages.
*
* Return:
@@ -821,7 +823,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
* this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
*/
int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
- struct tagged_addr *pages, bool partial_allowed);
+ struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner);
/**
* kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
@@ -933,13 +935,15 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
* kbase_mem_pool_grow - Grow the pool
* @pool: Memory pool to grow
* @nr_to_grow: Number of pages to add to the pool
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ * the memory pool is being grown. It can be NULL if the pages
+ * to be allocated won't be associated with any Kbase context.
* Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
* become larger than the maximum size specified.
*
* Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
*/
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,struct task_struct *page_owner);
/**
* kbase_mem_pool_trim - Grow or shrink the pool to a new size
@@ -1973,4 +1977,34 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
}
+/*
+ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ */
+static inline void kbase_mem_mmgrab(void)
+{
+ /* This merely takes a reference on the memory descriptor structure
+ * i.e. mm_struct of current process and not on its address space and
+ * so won't block the freeing of address space on process exit.
+ */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+ atomic_inc(¤t->mm->mm_count);
+#else
+ mmgrab(current->mm);
+#endif
+}
+/**
+ * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
+ * @kctx: Pointer to kbase context
+ *
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
+ * from the forked child process using the mali device file fd inherited from
+ * the parent process.
+ *
+ * Return: true if allocation is allowed.
+ */
+static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
+{
+ return (kctx->process_mm == current->mm);
+}
+
#endif /* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c
index 19f64be..ced2b06 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -44,6 +44,7 @@
#include <linux/cache.h>
#include <linux/memory_group_manager.h>
+#include <linux/version.h>
#include <mali_kbase.h>
#include <mali_kbase_mem_linux.h>
#include <tl/mali_kbase_tracepoints.h>
@@ -1494,6 +1495,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
struct kbase_alloc_import_user_buf *user_buf;
struct page **pages = NULL;
+ struct tagged_addr *pa;
+ struct device *dev;
int write;
/* Flag supported only for dma-buf imported memory */
@@ -1635,31 +1638,48 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
reg->gpu_alloc->nents = 0;
reg->extent = 0;
+ pa = kbase_get_gpu_phy_pages(reg);
+ dev = kctx->kbdev->dev;
+
if (pages) {
- struct device *dev = kctx->kbdev->dev;
- unsigned long local_size = user_buf->size;
- unsigned long offset = user_buf->address & ~PAGE_MASK;
- struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
/* Top bit signifies that this was pinned on import */
user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
+
for (i = 0; i < faulted_pages; i++) {
dma_addr_t dma_addr;
- unsigned long min;
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, dma_addr))
goto unwind_dma_map;
user_buf->dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
}
reg->gpu_alloc->nents = faulted_pages;
@@ -1668,10 +1688,22 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
return reg;
unwind_dma_map:
+
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This precautionary measure is kept here to keep this code
+ * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+ * in the future.
+ */
while (i--) {
- dma_unmap_page(kctx->kbdev->dev,
- user_buf->dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_addr_t dma_addr = user_buf->dma_addrs[i];
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
}
fault_mismatch:
if (pages) {
@@ -1687,7 +1719,6 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
no_region:
bad_size:
return NULL;
-
}
@@ -1959,7 +1990,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
*flags &= ~BASE_MEM_COHERENT_SYSTEM;
}
-
+ if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+ dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+ goto bad_flags;
+ }
if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
dev_warn(kctx->kbdev->dev,
"padding is only supported for UMM");
@@ -3078,73 +3112,23 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
{
- struct mm_struct *mm;
-
- rcu_read_lock();
- mm = rcu_dereference(kctx->process_mm);
- if (mm) {
- atomic_add(pages, &kctx->nonmapped_pages);
+ struct mm_struct *mm = kctx->process_mm;
+ if (unlikely(!mm))
+ return;
+ atomic_add(pages, &kctx->nonmapped_pages);
#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
- spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
- spin_unlock(&mm->page_table_lock);
-#endif
- }
- rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
- int pages;
- struct mm_struct *mm;
-
- spin_lock(&kctx->mm_update_lock);
- mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
- if (!mm) {
- spin_unlock(&kctx->mm_update_lock);
- return;
- }
-
- rcu_assign_pointer(kctx->process_mm, NULL);
- spin_unlock(&kctx->mm_update_lock);
- synchronize_rcu();
-
- pages = atomic_xchg(&kctx->nonmapped_pages, 0);
-#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
#else
spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
spin_unlock(&mm->page_table_lock);
#endif
}
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
- struct kbase_context *kctx;
-
- kctx = vma->vm_private_data;
- kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
- .close = kbase_special_vm_close,
-};
-
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
{
- /* check that this is the only tracking page */
- spin_lock(&kctx->mm_update_lock);
- if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
- spin_unlock(&kctx->mm_update_lock);
- return -EFAULT;
- }
-
- rcu_assign_pointer(kctx->process_mm, current->mm);
-
- spin_unlock(&kctx->mm_update_lock);
+ if (vma_pages(vma) != 1)
+ return -EINVAL;
/* no real access */
vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
@@ -3153,9 +3137,7 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_
#else
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
#endif
- vma->vm_ops = &kbase_vm_special_ops;
- vma->vm_private_data = kctx;
- return 0;
+ return 0;
}
diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c
index 0723e32..e6cb24e 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,11 @@
#include <linux/shrinker.h>
#include <linux/atomic.h>
#include <linux/version.h>
-
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
#define pool_dbg(pool, format, ...) \
dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \
(pool->next_pool) ? "kctx" : "kbdev", \
@@ -38,6 +42,46 @@
#define NOT_DIRTY false
#define NOT_RECLAIMED false
+/**
+* can_alloc_page() - Check if the current thread can allocate a physical page
+*
+* @pool: Pointer to the memory pool.
+* @page_owner: Pointer to the task/process that created the Kbase context
+* for which a page needs to be allocated. It can be NULL if
+* the page won't be associated with Kbase context.
+* @alloc_from_kthread: Flag indicating that the current thread is a kernel thread.
+*
+* This function checks if the current thread is a kernel thread and can make a
+* request to kernel to allocate a physical page. If the kernel thread is allocating
+* a page for the Kbase context and the process that created the context is exiting
+* or is being killed, then there is no point in doing a page allocation.
+*
+* The check done by the function is particularly helpful when the system is running
+* low on memory. When a page is allocated from the context of a kernel thread, OoM
+* killer doesn't consider the kernel thread for killing and kernel keeps retrying
+* to allocate the page as long as the OoM killer is able to kill processes.
+* The check allows kernel thread to quickly exit the page allocation loop once OoM
+* killer has initiated the killing of @page_owner, thereby unblocking the context
+* termination for @page_owner and freeing of GPU memory allocated by it. This helps
+* in preventing the kernel panic and also limits the number of innocent processes
+* that get killed.
+*
+* Return: true if the page can be allocated otherwise false.
+*/
+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner,
+ const bool alloc_from_kthread)
+{
+ if (likely(!alloc_from_kthread || !page_owner))
+ return true;
+
+ if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) {
+ dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm,
+ task_pid_nr(page_owner));
+ return false;
+ }
+
+ return true;
+}
static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
{
@@ -241,11 +285,11 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
}
int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
- size_t nr_to_grow)
+ size_t nr_to_grow, struct task_struct *page_owner)
{
struct page *p;
size_t i;
-
+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
kbase_mem_pool_lock(pool);
pool->dont_reclaim = true;
@@ -258,6 +302,8 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
return -ENOMEM;
}
kbase_mem_pool_unlock(pool);
+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+ return -ENOMEM;
p = kbase_mem_alloc_page(pool);
if (!p) {
@@ -290,7 +336,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
if (new_size < cur_size)
kbase_mem_pool_shrink(pool, cur_size - new_size);
else if (new_size > cur_size)
- err = kbase_mem_pool_grow(pool, new_size - cur_size);
+ err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL);
if (err) {
size_t grown_size = kbase_mem_pool_size(pool);
@@ -553,13 +599,14 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
}
int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
- struct tagged_addr *pages, bool partial_allowed)
+ struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner)
{
struct page *p;
size_t nr_from_pool;
size_t i = 0;
int err = -ENOMEM;
size_t nr_pages_internal;
+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
nr_pages_internal = nr_4k_pages / (1u << (pool->order));
@@ -591,7 +638,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
if (i != nr_4k_pages && pool->next_pool) {
/* Allocate via next pool */
err = kbase_mem_pool_alloc_pages(pool->next_pool,
- nr_4k_pages - i, pages + i, partial_allowed);
+ nr_4k_pages - i, pages + i, partial_allowed,page_owner);
if (err < 0)
goto err_rollback;
@@ -600,6 +647,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
} else {
/* Get any remaining pages from kernel */
while (i != nr_4k_pages) {
+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+ goto err_rollback;
+
p = kbase_mem_alloc_page(pool);
if (!p) {
if (partial_allowed)
diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c
index e3113f8..e3286a3 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c
@@ -280,8 +280,6 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
struct device *dev = kctx->kbdev->dev;
int i;
- dev_warn(dev,"kbase_fence_debug_check_atom\n");
-
for (i = 0; i < 2; i++) {
struct kbase_jd_atom *dep;
@@ -985,6 +983,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
jit_info_copy_size_for_jit_version[kctx->jit_version];
WARN_ON(jit_info_user_copy_size > sizeof(*info));
+ if (!kbase_mem_allow_alloc(kctx)) {
+ dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+ current->comm, current->pid, kctx->tgid, kctx->id);
+ ret = -EINVAL;
+ goto fail;
+ }
+
/* For backwards compatibility, and to prevent reading more than 1 jit
* info struct on jit version 1
*/
diff --git a/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c
index cc233c99..321dd6d 100644
--- a/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c
@@ -65,15 +65,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
u32 const exception_data = (status >> 8) & 0xFFFFFF;
int const as_no = as->number;
unsigned long flags;
+ const uintptr_t fault_addr = fault->addr;
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
- "GPU bus fault in AS%d at VA 0x%016llX\n"
+ "GPU bus fault in AS%d at VA %pK\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"exception data 0x%X\n"
"pid: %d\n",
- as_no, fault->addr,
+ as_no, (void *)fault_addr,
status,
exception_type, kbase_gpu_exception_name(exception_type),
exception_data,
diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c
index 32479c7..5410204 100644
--- a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c
@@ -1,6 +1,6 @@
/*
*
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1305,6 +1305,7 @@ void page_fault_worker(struct work_struct *data)
kbase_gpu_vm_unlock(kctx);
} else {
int ret = -ENOMEM;
+ const u8 group_id = region->gpu_alloc->group_id;
kbase_gpu_vm_unlock(kctx);
@@ -1316,23 +1317,21 @@ void page_fault_worker(struct work_struct *data)
if (grow_2mb_pool) {
/* Round page requirement up to nearest 2 MB */
struct kbase_mem_pool *const lp_mem_pool =
- &kctx->mem_pools.large[
- region->gpu_alloc->group_id];
+ &kctx->mem_pools.large[group_id];
pages_to_grow = (pages_to_grow +
((1 << lp_mem_pool->order) - 1))
>> lp_mem_pool->order;
ret = kbase_mem_pool_grow(lp_mem_pool,
- pages_to_grow);
+ pages_to_grow, kctx->task);
} else {
#endif
struct kbase_mem_pool *const mem_pool =
- &kctx->mem_pools.small[
- region->gpu_alloc->group_id];
+ &kctx->mem_pools.small[group_id];
ret = kbase_mem_pool_grow(mem_pool,
- pages_to_grow);
+ pages_to_grow, kctx->task);
#ifdef CONFIG_MALI_2MB_ALLOC
}
#endif
@@ -1732,7 +1731,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
err = kbase_mem_pool_grow(
&kbdev->mem_pools.small[
kctx->mmu.group_id],
- MIDGARD_MMU_BOTTOMLEVEL);
+ MIDGARD_MMU_BOTTOMLEVEL,kctx->task);
mutex_lock(&kctx->mmu.mmu_lock);
} while (!err);
if (err) {
@@ -1917,7 +1916,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
mutex_unlock(&mmut->mmu_lock);
err = kbase_mem_pool_grow(
&kbdev->mem_pools.small[mmut->group_id],
- cur_level);
+ cur_level,mmut->kctx ? mmut->kctx->task : NULL);
mutex_lock(&mmut->mmu_lock);
} while (!err);
@@ -2655,7 +2654,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
err = kbase_mem_pool_grow(
&kbdev->mem_pools.small[mmut->group_id],
- MIDGARD_MMU_BOTTOMLEVEL);
+ MIDGARD_MMU_BOTTOMLEVEL,kctx ? kctx->task : NULL);
if (err) {
kbase_mmu_term(kbdev, mmut);
return -ENOMEM;
diff --git a/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c b/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c
index b4244b4..5c5196e 100644
--- a/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,12 @@
/*
* Base kernel context APIs
*/
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
@@ -176,13 +182,50 @@ int kbase_context_common_init(struct kbase_context *kctx)
/* creating a context is considered a disjoint event */
kbase_disjoint_event(kctx->kbdev);
- spin_lock_init(&kctx->mm_update_lock);
kctx->process_mm = NULL;
+ kctx->task = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
atomic_set(&kctx->permanent_mapped_pages, 0);
kctx->tgid = current->tgid;
kctx->pid = current->pid;
+ /* Check if this is a Userspace created context */
+ if (likely(kctx->filp)) {
+ struct pid *pid_struct;
+
+ rcu_read_lock();
+ pid_struct = find_get_pid(kctx->tgid);
+ if (likely(pid_struct)) {
+ struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+ if (likely(task)) {
+ /* Take a reference on the task to avoid slow lookup
+ * later on from the page allocation loop.
+ */
+ get_task_struct(task);
+ kctx->task = task;
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get task pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+
+ put_pid(pid_struct);
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get pid pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+ rcu_read_unlock();
+
+ if (unlikely(err))
+ return err;
+ kbase_mem_mmgrab();
+ kctx->process_mm = current->mm;
+ }
+
atomic_set(&kctx->used_pages, 0);
mutex_init(&kctx->reg_lock);
@@ -209,13 +252,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
mutex_init(&kctx->legacy_hwcnt_lock);
mutex_lock(&kctx->kbdev->kctx_list_lock);
-
err = kbase_insert_kctx_to_process(kctx);
- if (err)
- dev_err(kctx->kbdev->dev,
- "(err:%d) failed to insert kctx to kbase_process\n", err);
-
mutex_unlock(&kctx->kbdev->kctx_list_lock);
+ if (err) {
+ dev_err(kctx->kbdev->dev,
+ "(err:%d) failed to insert kctx to kbase_process", err);
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
+ put_task_struct(kctx->task);
+ }
+ }
return err;
}
@@ -301,6 +347,11 @@ void kbase_context_common_term(struct kbase_context *kctx)
kbase_remove_kctx_from_process(kctx);
mutex_unlock(&kctx->kbdev->kctx_list_lock);
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
+ put_task_struct(kctx->task);
+ }
+
KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
}
diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c
index a83b817..28c8de9 100644
--- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c
@@ -348,7 +348,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
ret = kbase_mem_pool_alloc_pages(
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, false);
+ num_pages, queue->phys, false,kctx->task);
if (ret != num_pages)
goto phys_alloc_failed;
@@ -1072,7 +1072,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
/* Get physical page for a normal suspend buffer */
err = kbase_mem_pool_alloc_pages(
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- nr_pages, &s_buf->phy[0], false);
+ nr_pages, &s_buf->phy[0], false, kctx->task);
if (err < 0)
goto phy_pages_alloc_failed;
@@ -2995,7 +2995,7 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- 1, &phys, false);
+ 1, &phys, false, NULL);
if (ret <= 0) {
fput(filp);
@@ -3031,7 +3031,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
- false);
+ false, NULL);
if (ret <= 0)
return ret;
diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c
index 25767de..e6f7dfe 100644
--- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -546,7 +546,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
} else {
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- num_pages, phys, false);
+ num_pages, phys, false, NULL);
if (ret < 0)
goto out;
}
@@ -2248,7 +2248,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- num_pages, phys, false);
+ num_pages, phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c
index edd64be..72ac6e2 100644
--- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1289,7 +1289,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- num_pages, phys, false);
+ num_pages, phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c
index 4e26a49..b8a83f9 100644
--- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c
@@ -335,6 +335,14 @@ static int kbase_kcpu_jit_allocate_prepare(
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ if (!kbase_mem_allow_alloc(kctx)) {
+ dev_dbg(kctx->kbdev->dev,
+ "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+ current->comm, current->pid, kctx->tgid, kctx->id);
+ ret = -EINVAL;
+ goto out;
+ }
+
if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
count > ARRAY_SIZE(kctx->jit_alloc)) {
ret = -EINVAL;
diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h
index 7c8ea18..3526a81 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1555,11 +1555,13 @@ struct kbase_sub_alloc {
* is scheduled in and an atom is pulled from the context's per
* slot runnable tree in JM GPU or GPU command queue
* group is programmed on CSG slot in CSF GPU.
- * @mm_update_lock: lock used for handling of special tracking page.
* @process_mm: Pointer to the memory descriptor of the process which
* created the context. Used for accounting the physical
* pages used for GPU allocations, done for the context,
- * to the memory consumed by the process.
+ * to the memory consumed by the process. A reference is taken
+ * on this descriptor for the Userspace created contexts so that
+ * Kbase can safely access it to update the memory usage counters.
+ * The reference is dropped on context termination.
* @gpu_va_end: End address of the GPU va space (in 4KB page units)
* @jit_va: Indicates if a JIT_VA zone has been created.
* @mem_profile_data: Buffer containing the profiling information provided by
@@ -1691,7 +1693,10 @@ struct kbase_sub_alloc {
* @limited_core_mask: The mask that is applied to the affinity in case of atoms
* marked with BASE_JD_REQ_LIMITED_CORE_MASK.
* @platform_data: Pointer to platform specific per-context data.
- *
+ * @task: Pointer to the task structure of the main thread of the process
+ * that created the Kbase context. It would be set only for the
+ * contexts created by the Userspace and not for the contexts
+ * created internally by the Kbase.*
* A kernel base context is an entity among which the GPU is scheduled.
* Each context has its own GPU address space.
* Up to one context can be created for each client that opens the device file
@@ -1781,8 +1786,7 @@ struct kbase_context {
atomic_t refcount;
- spinlock_t mm_update_lock;
- struct mm_struct __rcu *process_mm;
+ struct mm_struct *process_mm;
u64 gpu_va_end;
bool jit_va;
@@ -1844,6 +1848,8 @@ struct kbase_context {
#if !MALI_USE_CSF
void *platform_data;
#endif
+
+ struct task_struct *task;
};
#ifdef CONFIG_MALI_CINSTR_GWT
diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c
index 606fc5e..b894f20 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1830,6 +1830,7 @@ void kbase_sync_single(struct kbase_context *kctx,
src = ((unsigned char *)kmap(gpu_page)) + offset;
dst = ((unsigned char *)kmap(cpu_page)) + offset;
}
+
memcpy(dst, src, size);
kunmap(gpu_page);
kunmap(cpu_page);
@@ -2247,7 +2248,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
&kctx->mem_pools.large[alloc->group_id],
nr_lp * (SZ_2M / SZ_4K),
tp,
- true);
+ true, kctx->task);
if (res > 0) {
nr_left -= res;
@@ -2301,7 +2302,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
err = kbase_mem_pool_grow(
&kctx->mem_pools.large[alloc->group_id],
- 1);
+ 1, kctx->task);
if (err)
break;
} while (1);
@@ -2348,7 +2349,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
if (nr_left) {
res = kbase_mem_pool_alloc_pages(
&kctx->mem_pools.small[alloc->group_id],
- nr_left, tp, false);
+ nr_left, tp, false, kctx->task);
if (res <= 0)
goto alloc_failed;
}
@@ -3830,7 +3831,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
spin_unlock(&kctx->mem_partials_lock);
kbase_gpu_vm_unlock(kctx);
- ret = kbase_mem_pool_grow(pool, pool_delta);
+ ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
kbase_gpu_vm_lock(kctx);
if (ret)
@@ -4655,10 +4656,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct page **pages;
struct tagged_addr *pa;
long i;
- unsigned long address;
struct device *dev;
- unsigned long offset;
- unsigned long local_size;
unsigned long gwt_mask = ~0;
/* Calls to this function are inherently asynchronous, with respect to
@@ -4675,12 +4673,28 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
- address = alloc->imported.user_buf.address;
pinned_pages = alloc->nents;
pages = alloc->imported.user_buf.pages;
dev = kctx->kbdev->dev;
- offset = address & ~PAGE_MASK;
- local_size = alloc->imported.user_buf.size;
+
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
+
+
/* The user buffer could already have been previously pinned before
* entering this function, and hence there could potentially be CPU
@@ -4689,21 +4703,21 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages);
for (i = 0; i < pinned_pages; i++) {
+
dma_addr_t dma_addr;
- unsigned long min;
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, dma_addr))
goto unwind;
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -4721,10 +4735,26 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
/* fall down */
unwind:
alloc->nents = 0;
+
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This is precautionary measure in case a GPU job has taken
+ * advantage of a partially GPU-mapped range to write and corrupt the
+ * content of memory, either inside or outside the imported region.
+ *
+ * Notice that this error recovery path doesn't try to be optimal and just
+ * flushes the entire page range.
+ */
+
while (i--) {
- dma_unmap_page(kctx->kbdev->dev,
- alloc->imported.user_buf.dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
}
while (++i < pinned_pages) {
@@ -4744,6 +4774,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
{
long i;
struct page **pages;
+ unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
unsigned long size = alloc->imported.user_buf.size;
lockdep_assert_held(&kctx->reg_lock);
@@ -4757,12 +4788,94 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
#endif
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
- unsigned long local_size;
+ unsigned long imported_size = MIN(size, PAGE_SIZE - offset_within_page);
+ /* Notice: this is a temporary variable that is used for DMA sync
+ * operations, and that could be incremented by an offset if the
+ * current page contains both imported and non-imported memory
+ * sub-regions.
+ *
+ * It is valid to add an offset to this value, because the offset
+ * is always kept within the physically contiguous dma-mapped range
+ * and there's no need to translate to physical address to offset it.
+ *
+ * This variable is not going to be used for the actual DMA unmap
+ * operation, that shall always use the original DMA address of the
+ * whole memory page.
+ */
+
+
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
- dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+ /* Manual CPU cache synchronization.
+ *
+ * When the GPU returns ownership of the buffer to the CPU, the driver
+ * needs to treat imported and non-imported memory differently.
+ *
+ * The first case to consider is non-imported sub-regions at the
+ * beginning of the first page and at the end of last page. For these
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
+ * in order to keep the last CPU write.
+ *
+ * Imported region prefers the opposite treatment: this memory has been
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
+ * committed to memory, while CPU cache shall be invalidated to make
+ * sure that CPU reads the correct memory content.
+ *
+ * The following diagram shows the expect value of the variables
+ * used in this loop in the corner case of an imported region encloed
+ * by a single memory page:
+ *
+ * page boundary ->|---------- | <- dma_addr (initial value)
+ * | |
+ * | - - - - - | <- offset_within_page
+ * |XXXXXXXXXXX|\
+ * |XXXXXXXXXXX| \
+ * |XXXXXXXXXXX| }- imported_size
+ * |XXXXXXXXXXX| /
+ * |XXXXXXXXXXX|/
+ * | - - - - - | <- offset_within_page + imported_size
+ * | |\
+ * | | }- PAGE_SIZE - imported_size - offset_within_page
+ * | |/
+ * page boundary ->|-----------|
+ *
+ * If the imported region is enclosed by more than one page, then
+ * offset_within_page = 0 for any page after the first.
+ */
+ /* Only for first page: handle non-imported range at the beginning. */
+ if (offset_within_page > 0) {
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+ DMA_BIDIRECTIONAL);
+ dma_addr += offset_within_page;
+ }
+
+ /* For every page: handle imported range. */
+ if (imported_size > 0)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+ DMA_BIDIRECTIONAL);
+
+ /* Only for last page (that may coincide with first page):
+ * handle non-imported range at the end.
+ */
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
+ dma_addr += imported_size;
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ PAGE_SIZE - imported_size - offset_within_page,
+ DMA_BIDIRECTIONAL);
+ }
+
+ /* Notice: use the original DMA address to unmap the whole memory page. */
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
DMA_BIDIRECTIONAL);
+ #else
+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
+
+
+
+
if (writeable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
@@ -4770,7 +4883,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
pages[i] = NULL;
#endif
- size -= local_size;
+ size -= imported_size;
}
#if !MALI_USE_CSF
alloc->nents = 0;
diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h
index 8fcea14..a208550 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -889,7 +889,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
* @pages: Pointer to array where the physical address of the allocated
* pages will be stored.
* @partial_allowed: If fewer pages allocated is allowed
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ * the pages are being allocated. It can be NULL if the pages
+ * won't be associated with any Kbase context.
* Like kbase_mem_pool_alloc() but optimized for allocating many pages.
*
* Return:
@@ -905,7 +907,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
* this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
*/
int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
- struct tagged_addr *pages, bool partial_allowed);
+ struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner);
/**
* kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
@@ -1017,13 +1019,15 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
* kbase_mem_pool_grow - Grow the pool
* @pool: Memory pool to grow
* @nr_to_grow: Number of pages to add to the pool
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ * the memory pool is being grown. It can be NULL if the pages
+ * to be allocated won't be associated with any Kbase context.
* Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
* become larger than the maximum size specified.
*
* Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
*/
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,struct task_struct *page_owner);
/**
* kbase_mem_pool_trim - Grow or shrink the pool to a new size
@@ -2120,12 +2124,26 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
}
+/*
+ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ */
+static inline void kbase_mem_mmgrab(void)
+{
+ /* This merely takes a reference on the memory descriptor structure
+ * i.e. mm_struct of current process and not on its address space and
+ * so won't block the freeing of address space on process exit.
+ */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+ atomic_inc(¤t->mm->mm_count);
+#else
+ mmgrab(current->mm);
+#endif
+}
/**
* kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
* @kctx: Pointer to kbase context
*
- * Don't allow the allocation of GPU memory until user space has set up the
- * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
* from the forked child process using the mali device file fd inherited from
* the parent process.
*
@@ -2133,12 +2151,6 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
*/
static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
{
- bool allow_alloc = true;
-
- rcu_read_lock();
- allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm);
- rcu_read_unlock();
-
- return allow_alloc;
+ return (kctx->process_mm == current->mm);
}
#endif /* _KBASE_MEM_H_ */
diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c
index f28c556..d82bc01 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,6 +39,7 @@
#include <linux/cache.h>
#include <linux/memory_group_manager.h>
+#include <linux/version.h>
#include <mali_kbase.h>
#include <mali_kbase_mem_linux.h>
#include <tl/mali_kbase_tracepoints.h>
@@ -1527,6 +1528,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
struct kbase_alloc_import_user_buf *user_buf;
struct page **pages = NULL;
+ struct tagged_addr *pa;
+ struct device *dev;
int write;
/* Flag supported only for dma-buf imported memory */
@@ -1667,31 +1670,48 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
reg->gpu_alloc->nents = 0;
reg->extension = 0;
+ pa = kbase_get_gpu_phy_pages(reg);
+ dev = kctx->kbdev->dev;
+
if (pages) {
- struct device *dev = kctx->kbdev->dev;
- unsigned long local_size = user_buf->size;
- unsigned long offset = user_buf->address & ~PAGE_MASK;
- struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
/* Top bit signifies that this was pinned on import */
user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
+
for (i = 0; i < faulted_pages; i++) {
dma_addr_t dma_addr;
- unsigned long min;
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, dma_addr))
goto unwind_dma_map;
user_buf->dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
}
reg->gpu_alloc->nents = faulted_pages;
@@ -1700,10 +1720,22 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
return reg;
unwind_dma_map:
+
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This precautionary measure is kept here to keep this code
+ * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+ * in the future.
+ */
while (i--) {
- dma_unmap_page(kctx->kbdev->dev,
- user_buf->dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_addr_t dma_addr = user_buf->dma_addrs[i];
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
}
fault_mismatch:
if (pages) {
@@ -1723,7 +1755,6 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
no_region:
bad_size:
return NULL;
-
}
@@ -1999,7 +2030,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
*flags &= ~BASE_MEM_COHERENT_SYSTEM;
}
-
+ if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+ dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+ goto bad_flags;
+ }
if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
dev_warn(kctx->kbdev->dev,
"padding is only supported for UMM");
@@ -3147,79 +3181,27 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
{
- struct mm_struct *mm;
-
- rcu_read_lock();
- mm = rcu_dereference(kctx->process_mm);
- if (mm) {
- atomic_add(pages, &kctx->nonmapped_pages);
+ struct mm_struct *mm = kctx->process_mm;
+ if (unlikely(!mm))
+ return;
+ atomic_add(pages, &kctx->nonmapped_pages);
#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
- spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
- spin_unlock(&mm->page_table_lock);
-#endif
- }
- rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
- int pages;
- struct mm_struct *mm;
-
- spin_lock(&kctx->mm_update_lock);
- mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
- if (!mm) {
- spin_unlock(&kctx->mm_update_lock);
- return;
- }
-
- rcu_assign_pointer(kctx->process_mm, NULL);
- spin_unlock(&kctx->mm_update_lock);
- synchronize_rcu();
-
- pages = atomic_xchg(&kctx->nonmapped_pages, 0);
-#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
#else
spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
spin_unlock(&mm->page_table_lock);
#endif
}
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
- struct kbase_context *kctx;
-
- kctx = vma->vm_private_data;
- kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
- .close = kbase_special_vm_close,
-};
-
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
{
- /* check that this is the only tracking page */
- spin_lock(&kctx->mm_update_lock);
- if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
- spin_unlock(&kctx->mm_update_lock);
- return -EFAULT;
- }
-
- rcu_assign_pointer(kctx->process_mm, current->mm);
-
- spin_unlock(&kctx->mm_update_lock);
+ if (vma_pages(vma) != 1)
+ return -EINVAL;
/* no real access */
vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
- vma->vm_ops = &kbase_vm_special_ops;
- vma->vm_private_data = kctx;
return 0;
}
diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c
index a11da82..1889e20 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,11 @@
#include <linux/shrinker.h>
#include <linux/atomic.h>
#include <linux/version.h>
-
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
#define pool_dbg(pool, format, ...) \
dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \
(pool->next_pool) ? "kctx" : "kbdev", \
@@ -37,6 +41,46 @@
#define NOT_DIRTY false
#define NOT_RECLAIMED false
+/**
+* can_alloc_page() - Check if the current thread can allocate a physical page
+*
+* @pool: Pointer to the memory pool.
+* @page_owner: Pointer to the task/process that created the Kbase context
+* for which a page needs to be allocated. It can be NULL if
+* the page won't be associated with Kbase context.
+* @alloc_from_kthread: Flag indicating that the current thread is a kernel thread.
+*
+* This function checks if the current thread is a kernel thread and can make a
+* request to kernel to allocate a physical page. If the kernel thread is allocating
+* a page for the Kbase context and the process that created the context is exiting
+* or is being killed, then there is no point in doing a page allocation.
+*
+* The check done by the function is particularly helpful when the system is running
+* low on memory. When a page is allocated from the context of a kernel thread, OoM
+* killer doesn't consider the kernel thread for killing and kernel keeps retrying
+* to allocate the page as long as the OoM killer is able to kill processes.
+* The check allows kernel thread to quickly exit the page allocation loop once OoM
+* killer has initiated the killing of @page_owner, thereby unblocking the context
+* termination for @page_owner and freeing of GPU memory allocated by it. This helps
+* in preventing the kernel panic and also limits the number of innocent processes
+* that get killed.
+*
+* Return: true if the page can be allocated otherwise false.
+*/
+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner,
+ const bool alloc_from_kthread)
+{
+ if (likely(!alloc_from_kthread || !page_owner))
+ return true;
+
+ if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) {
+ dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm,
+ task_pid_nr(page_owner));
+ return false;
+ }
+
+ return true;
+}
static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
{
@@ -232,11 +276,11 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
}
int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
- size_t nr_to_grow)
+ size_t nr_to_grow, struct task_struct *page_owner)
{
struct page *p;
size_t i;
-
+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
kbase_mem_pool_lock(pool);
pool->dont_reclaim = true;
@@ -249,6 +293,8 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
return -ENOMEM;
}
kbase_mem_pool_unlock(pool);
+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+ return -ENOMEM;
p = kbase_mem_alloc_page(pool);
if (!p) {
@@ -281,7 +327,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
if (new_size < cur_size)
kbase_mem_pool_shrink(pool, cur_size - new_size);
else if (new_size > cur_size)
- err = kbase_mem_pool_grow(pool, new_size - cur_size);
+ err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL);
if (err) {
size_t grown_size = kbase_mem_pool_size(pool);
@@ -527,13 +573,14 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
}
int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
- struct tagged_addr *pages, bool partial_allowed)
+ struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner)
{
struct page *p;
size_t nr_from_pool;
size_t i = 0;
int err = -ENOMEM;
size_t nr_pages_internal;
+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
nr_pages_internal = nr_4k_pages / (1u << (pool->order));
@@ -565,7 +612,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
if (i != nr_4k_pages && pool->next_pool) {
/* Allocate via next pool */
err = kbase_mem_pool_alloc_pages(pool->next_pool,
- nr_4k_pages - i, pages + i, partial_allowed);
+ nr_4k_pages - i, pages + i, partial_allowed,page_owner);
if (err < 0)
goto err_rollback;
@@ -574,6 +621,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
} else {
/* Get any remaining pages from kernel */
while (i != nr_4k_pages) {
+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+ goto err_rollback;
+
p = kbase_mem_alloc_page(pool);
if (!p) {
if (partial_allowed)
diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c
index 3ca5a70..be23c8f 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c
@@ -981,6 +981,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
jit_info_copy_size_for_jit_version[kctx->jit_version];
WARN_ON(jit_info_user_copy_size > sizeof(*info));
+ if (!kbase_mem_allow_alloc(kctx)) {
+ dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+ current->comm, current->pid, kctx->tgid, kctx->id);
+ ret = -EINVAL;
+ goto fail;
+ }
+
/* For backwards compatibility, and to prevent reading more than 1 jit
* info struct on jit version 1
*/
diff --git a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c
index 05253ae..067b5dc 100644
--- a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c
+++ b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c
@@ -148,17 +148,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
"true" : "false";
int as_no = as->number;
unsigned long flags;
+ const uintptr_t fault_addr = fault->addr;
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
- "GPU bus fault in AS%d at VA 0x%016llX\n"
+ "GPU bus fault in AS%d at VA %pK\n"
"VA_VALID: %s\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"access type 0x%X: %s\n"
"source id 0x%X\n"
"pid: %d\n",
- as_no, fault->addr,
+ as_no, (void *)fault_addr,
addr_valid,
status,
exception_type, kbase_gpu_exception_name(exception_type),
diff --git a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c
index 6d2c6e2..ebe35bc 100644
--- a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c
@@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
u32 const exception_data = (status >> 8) & 0xFFFFFF;
int const as_no = as->number;
unsigned long flags;
+ const uintptr_t fault_addr = fault->addr;
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
- "GPU bus fault in AS%d at PA 0x%016llX\n"
+ "GPU bus fault in AS%d at PA %pK\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"exception data 0x%X\n"
"pid: %d\n",
- as_no, fault->addr,
+ as_no, (void *)fault_addr,
status,
exception_type, kbase_gpu_exception_name(exception_type),
exception_data,
diff --git a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c
index b3a7fcc..68eda00 100644
--- a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1315,6 +1315,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
kbase_gpu_vm_unlock(kctx);
} else {
int ret = -ENOMEM;
+ const u8 group_id = region->gpu_alloc->group_id;
kbase_gpu_vm_unlock(kctx);
@@ -1326,23 +1327,21 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
if (grow_2mb_pool) {
/* Round page requirement up to nearest 2 MB */
struct kbase_mem_pool *const lp_mem_pool =
- &kctx->mem_pools.large[
- region->gpu_alloc->group_id];
+ &kctx->mem_pools.large[group_id];
pages_to_grow = (pages_to_grow +
((1 << lp_mem_pool->order) - 1))
>> lp_mem_pool->order;
ret = kbase_mem_pool_grow(lp_mem_pool,
- pages_to_grow);
+ pages_to_grow, kctx->task);
} else {
#endif
struct kbase_mem_pool *const mem_pool =
- &kctx->mem_pools.small[
- region->gpu_alloc->group_id];
+ &kctx->mem_pools.small[group_id];
ret = kbase_mem_pool_grow(mem_pool,
- pages_to_grow);
+ pages_to_grow, kctx->task);
#ifdef CONFIG_MALI_2MB_ALLOC
}
#endif
@@ -1756,7 +1755,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
&kbdev->mem_pools.small[
#endif
kctx->mmu.group_id],
- MIDGARD_MMU_BOTTOMLEVEL);
+ MIDGARD_MMU_BOTTOMLEVEL,kctx->task);
mutex_lock(&kctx->mmu.mmu_lock);
} while (!err);
if (err) {
@@ -1950,7 +1949,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
#else
&kbdev->mem_pools.small[mmut->group_id],
#endif
- cur_level);
+ cur_level,mmut->kctx ? mmut->kctx->task : NULL);
mutex_lock(&mmut->mmu_lock);
} while (!err);
@@ -2690,7 +2689,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
#else
&kbdev->mem_pools.small[mmut->group_id],
#endif
- MIDGARD_MMU_BOTTOMLEVEL);
+ MIDGARD_MMU_BOTTOMLEVEL,kctx ? kctx->task : NULL);
if (err) {
kbase_mmu_term(kbdev, mmut);
return -ENOMEM;
diff --git a/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c b/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c
index 5fc1636..f67dddd 100644
--- a/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,12 @@
/*
* Base kernel context APIs
*/
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
#include <mali_kbase.h>
#include <gpu/mali_kbase_gpu_regmap.h>
@@ -176,13 +182,50 @@ int kbase_context_common_init(struct kbase_context *kctx)
/* creating a context is considered a disjoint event */
kbase_disjoint_event(kctx->kbdev);
- spin_lock_init(&kctx->mm_update_lock);
kctx->process_mm = NULL;
+ kctx->task = NULL;
atomic_set(&kctx->nonmapped_pages, 0);
atomic_set(&kctx->permanent_mapped_pages, 0);
kctx->tgid = current->tgid;
kctx->pid = current->pid;
+ /* Check if this is a Userspace created context */
+ if (likely(kctx->filp)) {
+ struct pid *pid_struct;
+
+ rcu_read_lock();
+ pid_struct = find_get_pid(kctx->tgid);
+ if (likely(pid_struct)) {
+ struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+ if (likely(task)) {
+ /* Take a reference on the task to avoid slow lookup
+ * later on from the page allocation loop.
+ */
+ get_task_struct(task);
+ kctx->task = task;
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get task pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+
+ put_pid(pid_struct);
+ } else {
+ dev_err(kctx->kbdev->dev,
+ "Failed to get pid pointer for %s/%d",
+ current->comm, current->pid);
+ err = -ESRCH;
+ }
+ rcu_read_unlock();
+
+ if (unlikely(err))
+ return err;
+ kbase_mem_mmgrab();
+ kctx->process_mm = current->mm;
+ }
+
atomic_set(&kctx->used_pages, 0);
mutex_init(&kctx->reg_lock);
@@ -213,13 +256,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
mutex_lock(&kctx->kbdev->kctx_list_lock);
-
err = kbase_insert_kctx_to_process(kctx);
- if (err)
- dev_err(kctx->kbdev->dev,
- "(err:%d) failed to insert kctx to kbase_process\n", err);
-
mutex_unlock(&kctx->kbdev->kctx_list_lock);
+ if (err) {
+ dev_err(kctx->kbdev->dev,
+ "(err:%d) failed to insert kctx to kbase_process", err);
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
+ put_task_struct(kctx->task);
+ }
+ }
return err;
}
@@ -307,6 +353,11 @@ void kbase_context_common_term(struct kbase_context *kctx)
kbase_remove_kctx_from_process(kctx);
mutex_unlock(&kctx->kbdev->kctx_list_lock);
+ if (likely(kctx->filp)) {
+ mmdrop(kctx->process_mm);
+ put_task_struct(kctx->task);
+ }
+
KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
}
diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c
index 0a70ae7..5f63cbf9 100644
--- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c
@@ -345,7 +345,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
ret = kbase_mem_pool_alloc_pages(
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
- num_pages, queue->phys, false);
+ num_pages, queue->phys, false, kctx->task);
if (ret != num_pages)
goto phys_alloc_failed;
@@ -1126,7 +1126,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
/* Get physical page for a normal suspend buffer */
err = kbase_mem_pool_alloc_pages(
&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- nr_pages, &s_buf->phy[0], false);
+ nr_pages, &s_buf->phy[0], false, kctx->task);
if (err < 0)
goto phy_pages_alloc_failed;
@@ -3025,7 +3025,7 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- 1, &phys, false);
+ 1, &phys, false, NULL);
if (ret <= 0) {
fput(filp);
@@ -3061,7 +3061,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
- false);
+ false, NULL);
if (ret <= 0)
return ret;
diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c
index 74df40c..e840d3b 100644
--- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -627,7 +627,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
ret = kbase_mem_pool_alloc_pages(
kbase_mem_pool_group_select(
kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
- num_pages_aligned, phys, false);
+ num_pages_aligned, phys, false, NULL);
}
}
@@ -2653,7 +2653,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- num_pages, phys, false);
+ num_pages, phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c
index c716122..353339a 100644
--- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1485,7 +1485,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
ret = kbase_mem_pool_alloc_pages(
&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- num_pages, phys, false);
+ num_pages, phys, false, NULL);
if (ret <= 0)
goto phys_mem_pool_alloc_error;
diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c
index 2e0c26d..9115a69 100644
--- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c
@@ -356,6 +356,14 @@ static int kbase_kcpu_jit_allocate_prepare(
lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
+ if (!kbase_mem_allow_alloc(kctx)) {
+ dev_dbg(kctx->kbdev->dev,
+ "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+ current->comm, current->pid, kctx->tgid, kctx->id);
+ ret = -EINVAL;
+ goto out;
+ }
+
if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
count > ARRAY_SIZE(kctx->jit_alloc)) {
ret = -EINVAL;
diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h b/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h
index b62ead4..852e5e4 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1599,11 +1599,13 @@ struct kbase_sub_alloc {
* is scheduled in and an atom is pulled from the context's per
* slot runnable tree in JM GPU or GPU command queue
* group is programmed on CSG slot in CSF GPU.
- * @mm_update_lock: lock used for handling of special tracking page.
* @process_mm: Pointer to the memory descriptor of the process which
* created the context. Used for accounting the physical
* pages used for GPU allocations, done for the context,
- * to the memory consumed by the process.
+ * to the memory consumed by the process. A reference is taken
+ * on this descriptor for the Userspace created contexts so that
+ * Kbase can safely access it to update the memory usage counters.
+ * The reference is dropped on context termination.
* @gpu_va_end: End address of the GPU va space (in 4KB page units)
* @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
* tiler heaps of the kbase context.
@@ -1731,7 +1733,10 @@ struct kbase_sub_alloc {
* @limited_core_mask: The mask that is applied to the affinity in case of atoms
* marked with BASE_JD_REQ_LIMITED_CORE_MASK.
* @platform_data: Pointer to platform specific per-context data.
- *
+ * @task: Pointer to the task structure of the main thread of the process
+ * that created the Kbase context. It would be set only for the
+ * contexts created by the Userspace and not for the contexts
+ * created internally by the Kbase.*
* A kernel base context is an entity among which the GPU is scheduled.
* Each context has its own GPU address space.
* Up to one context can be created for each client that opens the device file
@@ -1824,8 +1829,7 @@ struct kbase_context {
atomic_t refcount;
- spinlock_t mm_update_lock;
- struct mm_struct __rcu *process_mm;
+ struct mm_struct *process_mm;
u64 gpu_va_end;
#if MALI_USE_CSF
u32 running_total_tiler_heap_nr_chunks;
@@ -1889,6 +1893,8 @@ struct kbase_context {
#if !MALI_USE_CSF
void *platform_data;
#endif
+
+ struct task_struct *task;
};
#ifdef CONFIG_MALI_CINSTR_GWT
diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c
index b02f1d8..2ed908e 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -2050,6 +2050,7 @@ void kbase_sync_single(struct kbase_context *kctx,
src = ((unsigned char *)kmap(gpu_page)) + offset;
dst = ((unsigned char *)kmap(cpu_page)) + offset;
}
+
memcpy(dst, src, size);
kunmap(gpu_page);
kunmap(cpu_page);
@@ -2487,7 +2488,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
&kctx->mem_pools.large[alloc->group_id],
nr_lp * (SZ_2M / SZ_4K),
tp,
- true);
+ true, kctx->task);
if (res > 0) {
nr_left -= res;
@@ -2541,7 +2542,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
err = kbase_mem_pool_grow(
&kctx->mem_pools.large[alloc->group_id],
- 1);
+ 1, kctx->task);
if (err)
break;
} while (1);
@@ -2588,7 +2589,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
if (nr_left) {
res = kbase_mem_pool_alloc_pages(
&kctx->mem_pools.small[alloc->group_id],
- nr_left, tp, false);
+ nr_left, tp, false, kctx->task);
if (res <= 0)
goto alloc_failed;
}
@@ -4077,7 +4078,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
spin_unlock(&kctx->mem_partials_lock);
kbase_gpu_vm_unlock(kctx);
- ret = kbase_mem_pool_grow(pool, pool_delta);
+ ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
kbase_gpu_vm_lock(kctx);
if (ret)
@@ -4885,10 +4886,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
struct page **pages;
struct tagged_addr *pa;
long i;
- unsigned long address;
struct device *dev;
- unsigned long offset;
- unsigned long local_size;
unsigned long gwt_mask = ~0;
/* Calls to this function are inherently asynchronous, with respect to
@@ -4905,21 +4903,37 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc = reg->gpu_alloc;
pa = kbase_get_gpu_phy_pages(reg);
- address = alloc->imported.user_buf.address;
pinned_pages = alloc->nents;
pages = alloc->imported.user_buf.pages;
dev = kctx->kbdev->dev;
- offset = address & ~PAGE_MASK;
- local_size = alloc->imported.user_buf.size;
+
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
+
for (i = 0; i < pinned_pages; i++) {
dma_addr_t dma_addr;
- unsigned long min;
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
err = dma_mapping_error(dev, dma_addr);
if (err)
goto unwind;
@@ -4927,8 +4941,6 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
alloc->imported.user_buf.dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
}
#ifdef CONFIG_MALI_CINSTR_GWT
@@ -4946,10 +4958,26 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
/* fall down */
unwind:
alloc->nents = 0;
+
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This is precautionary measure in case a GPU job has taken
+ * advantage of a partially GPU-mapped range to write and corrupt the
+ * content of memory, either inside or outside the imported region.
+ *
+ * Notice that this error recovery path doesn't try to be optimal and just
+ * flushes the entire page range.
+ */
+
while (i--) {
- dma_unmap_page(kctx->kbdev->dev,
- alloc->imported.user_buf.dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
}
/* The user buffer could already have been previously pinned before
@@ -4975,6 +5003,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
{
long i;
struct page **pages;
+ unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
unsigned long size = alloc->imported.user_buf.size;
lockdep_assert_held(&kctx->reg_lock);
@@ -4988,12 +5017,94 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
#endif
for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
- unsigned long local_size;
+ unsigned long imported_size = MIN(size, PAGE_SIZE - offset_within_page);
+ /* Notice: this is a temporary variable that is used for DMA sync
+ * operations, and that could be incremented by an offset if the
+ * current page contains both imported and non-imported memory
+ * sub-regions.
+ *
+ * It is valid to add an offset to this value, because the offset
+ * is always kept within the physically contiguous dma-mapped range
+ * and there's no need to translate to physical address to offset it.
+ *
+ * This variable is not going to be used for the actual DMA unmap
+ * operation, that shall always use the original DMA address of the
+ * whole memory page.
+ */
+
+
dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
- local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
- dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+ /* Manual CPU cache synchronization.
+ *
+ * When the GPU returns ownership of the buffer to the CPU, the driver
+ * needs to treat imported and non-imported memory differently.
+ *
+ * The first case to consider is non-imported sub-regions at the
+ * beginning of the first page and at the end of last page. For these
+ * sub-regions: CPU cache shall be committed with a clean+invalidate,
+ * in order to keep the last CPU write.
+ *
+ * Imported region prefers the opposite treatment: this memory has been
+ * legitimately mapped and used by the GPU, hence GPU writes shall be
+ * committed to memory, while CPU cache shall be invalidated to make
+ * sure that CPU reads the correct memory content.
+ *
+ * The following diagram shows the expect value of the variables
+ * used in this loop in the corner case of an imported region encloed
+ * by a single memory page:
+ *
+ * page boundary ->|---------- | <- dma_addr (initial value)
+ * | |
+ * | - - - - - | <- offset_within_page
+ * |XXXXXXXXXXX|\
+ * |XXXXXXXXXXX| \
+ * |XXXXXXXXXXX| }- imported_size
+ * |XXXXXXXXXXX| /
+ * |XXXXXXXXXXX|/
+ * | - - - - - | <- offset_within_page + imported_size
+ * | |\
+ * | | }- PAGE_SIZE - imported_size - offset_within_page
+ * | |/
+ * page boundary ->|-----------|
+ *
+ * If the imported region is enclosed by more than one page, then
+ * offset_within_page = 0 for any page after the first.
+ */
+ /* Only for first page: handle non-imported range at the beginning. */
+ if (offset_within_page > 0) {
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+ DMA_BIDIRECTIONAL);
+ dma_addr += offset_within_page;
+ }
+
+ /* For every page: handle imported range. */
+ if (imported_size > 0)
+ dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+ DMA_BIDIRECTIONAL);
+
+ /* Only for last page (that may coincide with first page):
+ * handle non-imported range at the end.
+ */
+ if ((imported_size + offset_within_page) < PAGE_SIZE) {
+ dma_addr += imported_size;
+ dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+ PAGE_SIZE - imported_size - offset_within_page,
+ DMA_BIDIRECTIONAL);
+ }
+
+ /* Notice: use the original DMA address to unmap the whole memory page. */
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
DMA_BIDIRECTIONAL);
+ #else
+ dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+ PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
+
+
+
+
if (writeable)
set_page_dirty_lock(pages[i]);
#if !MALI_USE_CSF
@@ -5001,7 +5112,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
pages[i] = NULL;
#endif
- size -= local_size;
+ size -= imported_size;
}
#if !MALI_USE_CSF
alloc->nents = 0;
diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h
index 8a95154..a9a0d7e 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -947,7 +947,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
* @pages: Pointer to array where the physical address of the allocated
* pages will be stored.
* @partial_allowed: If fewer pages allocated is allowed
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ * the pages are being allocated. It can be NULL if the pages
+ * won't be associated with any Kbase context.
* Like kbase_mem_pool_alloc() but optimized for allocating many pages.
*
* Return:
@@ -963,7 +965,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
* this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
*/
int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
- struct tagged_addr *pages, bool partial_allowed);
+ struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner);
/**
* kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
@@ -1075,13 +1077,15 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
* kbase_mem_pool_grow - Grow the pool
* @pool: Memory pool to grow
* @nr_to_grow: Number of pages to add to the pool
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ * the memory pool is being grown. It can be NULL if the pages
+ * to be allocated won't be associated with any Kbase context.
* Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
* become larger than the maximum size specified.
*
* Return: 0 on success, -ENOMEM if unable to allocate sufficent pages
*/
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,struct task_struct *page_owner);
/**
* kbase_mem_pool_trim - Grow or shrink the pool to a new size
@@ -2203,8 +2207,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
* kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
* @kctx: Pointer to kbase context
*
- * Don't allow the allocation of GPU memory until user space has set up the
- * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
* from the forked child process using the mali device file fd inherited from
* the parent process.
*
@@ -2212,13 +2215,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
*/
static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
{
- bool allow_alloc = true;
-
- rcu_read_lock();
- allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm);
- rcu_read_unlock();
-
- return allow_alloc;
+ return (kctx->process_mm == current->mm);
}
/**
@@ -2237,6 +2234,22 @@ static inline int kbase_mem_group_id_get(base_mem_alloc_flags flags)
}
/**
+ * * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ * */
+static inline void kbase_mem_mmgrab(void)
+{
+ /* This merely takes a reference on the memory descriptor structure
+ * * i.e. mm_struct of current process and not on its address space and
+ * * so won't block the freeing of address space on process exit.
+ * */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+ atomic_inc(¤t->mm->mm_count);
+#else
+ mmgrab(current->mm);
+#endif
+}
+
+/**
* kbase_mem_group_id_set - Set group ID into base_mem_alloc_flags
* @id: group ID(0~15) you want to encode
*
diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c
index 45345b7..f46e2b5 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -36,6 +36,7 @@
#include <linux/cache.h>
#include <linux/memory_group_manager.h>
+#include <linux/version.h>
#include <mali_kbase.h>
#include <mali_kbase_mem_linux.h>
#include <tl/mali_kbase_tracepoints.h>
@@ -1556,6 +1557,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
struct kbase_alloc_import_user_buf *user_buf;
struct page **pages = NULL;
+ struct tagged_addr *pa;
+ struct device *dev;
int write;
/* Flag supported only for dma-buf imported memory */
@@ -1697,31 +1700,48 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
reg->gpu_alloc->nents = 0;
reg->extension = 0;
+ pa = kbase_get_gpu_phy_pages(reg);
+ dev = kctx->kbdev->dev;
+
if (pages) {
- struct device *dev = kctx->kbdev->dev;
- unsigned long local_size = user_buf->size;
- unsigned long offset = user_buf->address & ~PAGE_MASK;
- struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
/* Top bit signifies that this was pinned on import */
user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
+ /* Manual CPU cache synchronization.
+ *
+ * The driver disables automatic CPU cache synchronization because the
+ * memory pages that enclose the imported region may also contain
+ * sub-regions which are not imported and that are allocated and used
+ * by the user process. This may be the case of memory at the beginning
+ * of the first page and at the end of the last page. Automatic CPU cache
+ * synchronization would force some operations on those memory allocations,
+ * unbeknown to the user process: in particular, a CPU cache invalidate
+ * upon unmapping would destroy the content of dirty CPU caches and cause
+ * the user process to lose CPU writes to the non-imported sub-regions.
+ *
+ * When the GPU claims ownership of the imported memory buffer, it shall
+ * commit CPU writes for the whole of all pages that enclose the imported
+ * region, otherwise the initial content of memory would be wrong.
+ */
+
for (i = 0; i < faulted_pages; i++) {
dma_addr_t dma_addr;
- unsigned long min;
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+ DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
- min = MIN(PAGE_SIZE - offset, local_size);
- dma_addr = dma_map_page(dev, pages[i],
- offset, min,
- DMA_BIDIRECTIONAL);
if (dma_mapping_error(dev, dma_addr))
goto unwind_dma_map;
user_buf->dma_addrs[i] = dma_addr;
pa[i] = as_tagged(page_to_phys(pages[i]));
- local_size -= min;
- offset = 0;
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
}
reg->gpu_alloc->nents = faulted_pages;
@@ -1730,10 +1750,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
return reg;
unwind_dma_map:
+
+ /* Run the unmap loop in the same order as map loop, and perform again
+ * CPU cache synchronization to re-write the content of dirty CPU caches
+ * to memory. This precautionary measure is kept here to keep this code
+ * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+ * in the future.
+ */
while (i--) {
- dma_unmap_page(kctx->kbdev->dev,
- user_buf->dma_addrs[i],
- PAGE_SIZE, DMA_BIDIRECTIONAL);
+ dma_addr_t dma_addr = user_buf->dma_addrs[i];
+ dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ #else
+ dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ #endif
}
fault_mismatch:
if (pages) {
@@ -1749,7 +1781,6 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
no_region:
bad_size:
return NULL;
-
}
@@ -2029,7 +2060,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
*flags &= ~BASE_MEM_COHERENT_SYSTEM;
}
-
+ if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+ dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+ goto bad_flags;
+ }
if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
dev_warn(kctx->kbdev->dev,
"padding is only supported for UMM");
@@ -3190,79 +3224,27 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
{
- struct mm_struct *mm;
-
- rcu_read_lock();
- mm = rcu_dereference(kctx->process_mm);
- if (mm) {
- atomic_add(pages, &kctx->nonmapped_pages);
+ struct mm_struct *mm = kctx->process_mm;
+ if (unlikely(!mm))
+ return;
+ atomic_add(pages, &kctx->nonmapped_pages);
#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
- spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
- spin_unlock(&mm->page_table_lock);
-#endif
- }
- rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
- int pages;
- struct mm_struct *mm;
-
- spin_lock(&kctx->mm_update_lock);
- mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
- if (!mm) {
- spin_unlock(&kctx->mm_update_lock);
- return;
- }
-
- rcu_assign_pointer(kctx->process_mm, NULL);
- spin_unlock(&kctx->mm_update_lock);
- synchronize_rcu();
-
- pages = atomic_xchg(&kctx->nonmapped_pages, 0);
-#ifdef SPLIT_RSS_COUNTING
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
#else
spin_lock(&mm->page_table_lock);
- kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+ kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
spin_unlock(&mm->page_table_lock);
#endif
}
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
- struct kbase_context *kctx;
-
- kctx = vma->vm_private_data;
- kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
- .close = kbase_special_vm_close,
-};
-
static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
{
- /* check that this is the only tracking page */
- spin_lock(&kctx->mm_update_lock);
- if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
- spin_unlock(&kctx->mm_update_lock);
- return -EFAULT;
- }
-
- rcu_assign_pointer(kctx->process_mm, current->mm);
-
- spin_unlock(&kctx->mm_update_lock);
+ if (vma_pages(vma) != 1)
+ return -EINVAL;
/* no real access */
vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
- vma->vm_ops = &kbase_vm_special_ops;
- vma->vm_private_data = kctx;
return 0;
}
diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c
index 4103bd1..1889e20 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,11 @@
#include <linux/shrinker.h>
#include <linux/atomic.h>
#include <linux/version.h>
-
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
#define pool_dbg(pool, format, ...) \
dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format, \
(pool->next_pool) ? "kctx" : "kbdev", \
@@ -37,6 +41,46 @@
#define NOT_DIRTY false
#define NOT_RECLAIMED false
+/**
+* can_alloc_page() - Check if the current thread can allocate a physical page
+*
+* @pool: Pointer to the memory pool.
+* @page_owner: Pointer to the task/process that created the Kbase context
+* for which a page needs to be allocated. It can be NULL if
+* the page won't be associated with Kbase context.
+* @alloc_from_kthread: Flag indicating that the current thread is a kernel thread.
+*
+* This function checks if the current thread is a kernel thread and can make a
+* request to kernel to allocate a physical page. If the kernel thread is allocating
+* a page for the Kbase context and the process that created the context is exiting
+* or is being killed, then there is no point in doing a page allocation.
+*
+* The check done by the function is particularly helpful when the system is running
+* low on memory. When a page is allocated from the context of a kernel thread, OoM
+* killer doesn't consider the kernel thread for killing and kernel keeps retrying
+* to allocate the page as long as the OoM killer is able to kill processes.
+* The check allows kernel thread to quickly exit the page allocation loop once OoM
+* killer has initiated the killing of @page_owner, thereby unblocking the context
+* termination for @page_owner and freeing of GPU memory allocated by it. This helps
+* in preventing the kernel panic and also limits the number of innocent processes
+* that get killed.
+*
+* Return: true if the page can be allocated otherwise false.
+*/
+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner,
+ const bool alloc_from_kthread)
+{
+ if (likely(!alloc_from_kthread || !page_owner))
+ return true;
+
+ if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) {
+ dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm,
+ task_pid_nr(page_owner));
+ return false;
+ }
+
+ return true;
+}
static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
{
@@ -126,7 +170,6 @@ static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
struct page *p)
{
struct device *dev = pool->kbdev->dev;
-
dma_sync_single_for_device(dev, kbase_dma_addr(p),
(PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
}
@@ -233,11 +276,11 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
}
int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
- size_t nr_to_grow)
+ size_t nr_to_grow, struct task_struct *page_owner)
{
struct page *p;
size_t i;
-
+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
kbase_mem_pool_lock(pool);
pool->dont_reclaim = true;
@@ -250,6 +293,8 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
return -ENOMEM;
}
kbase_mem_pool_unlock(pool);
+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+ return -ENOMEM;
p = kbase_mem_alloc_page(pool);
if (!p) {
@@ -282,7 +327,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
if (new_size < cur_size)
kbase_mem_pool_shrink(pool, cur_size - new_size);
else if (new_size > cur_size)
- err = kbase_mem_pool_grow(pool, new_size - cur_size);
+ err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL);
if (err) {
size_t grown_size = kbase_mem_pool_size(pool);
@@ -528,13 +573,14 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
}
int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
- struct tagged_addr *pages, bool partial_allowed)
+ struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner)
{
struct page *p;
size_t nr_from_pool;
size_t i = 0;
int err = -ENOMEM;
size_t nr_pages_internal;
+ const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
nr_pages_internal = nr_4k_pages / (1u << (pool->order));
@@ -549,7 +595,6 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
while (nr_from_pool--) {
int j;
-
p = kbase_mem_pool_remove_locked(pool);
if (pool->order) {
pages[i++] = as_tagged_tag(page_to_phys(p),
@@ -567,7 +612,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
if (i != nr_4k_pages && pool->next_pool) {
/* Allocate via next pool */
err = kbase_mem_pool_alloc_pages(pool->next_pool,
- nr_4k_pages - i, pages + i, partial_allowed);
+ nr_4k_pages - i, pages + i, partial_allowed,page_owner);
if (err < 0)
goto err_rollback;
@@ -576,6 +621,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
} else {
/* Get any remaining pages from kernel */
while (i != nr_4k_pages) {
+ if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+ goto err_rollback;
+
p = kbase_mem_alloc_page(pool);
if (!p) {
if (partial_allowed)
diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c
index d58ed36..066a871 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c
@@ -973,6 +973,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
jit_info_copy_size_for_jit_version[kctx->jit_version];
WARN_ON(jit_info_user_copy_size > sizeof(*info));
+ if (!kbase_mem_allow_alloc(kctx)) {
+ dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+ current->comm, current->pid, kctx->tgid, kctx->id);
+ ret = -EINVAL;
+ goto fail;
+ }
+
/* For backwards compatibility, and to prevent reading more than 1 jit
* info struct on jit version 1
*/
diff --git a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c
index 04f5cdf..6c52f0c 100644
--- a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c
+++ b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c
@@ -149,17 +149,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
"true" : "false";
int as_no = as->number;
unsigned long flags;
+ const uintptr_t fault_addr = fault->addr;
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
- "GPU bus fault in AS%d at PA 0x%016llX\n"
+ "GPU bus fault in AS%d at PA %pK\n"
"PA_VALID: %s\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"access type 0x%X: %s\n"
"source id 0x%X\n"
"pid: %d\n",
- as_no, fault->addr,
+ as_no, (void *)fault_addr,
addr_valid,
status,
exception_type, kbase_gpu_exception_name(exception_type),
diff --git a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c
index 3130b33..2442149 100644
--- a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c
@@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
u32 const exception_data = (status >> 8) & 0xFFFFFF;
int const as_no = as->number;
unsigned long flags;
+ const uintptr_t fault_addr = fault->addr;
/* terminal fault, print info about the fault */
dev_err(kbdev->dev,
- "GPU bus fault in AS%d at PA 0x%016llX\n"
+ "GPU bus fault in AS%d at PA %pK\n"
"raw fault status: 0x%X\n"
"exception type 0x%X: %s\n"
"exception data 0x%X\n"
"pid: %d\n",
- as_no, fault->addr,
+ as_no, (void *)fault_addr,
status,
exception_type, kbase_gpu_exception_name(exception_type),
exception_data,
diff --git a/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c
index cee88c8..f82f77d 100644
--- a/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1358,6 +1358,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
kbase_gpu_vm_unlock(kctx);
} else {
int ret = -ENOMEM;
+ const u8 group_id = region->gpu_alloc->group_id;
kbase_gpu_vm_unlock(kctx);
@@ -1369,23 +1370,21 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
if (grow_2mb_pool) {
/* Round page requirement up to nearest 2 MB */
struct kbase_mem_pool *const lp_mem_pool =
- &kctx->mem_pools.large[
- region->gpu_alloc->group_id];
+ &kctx->mem_pools.large[group_id];
pages_to_grow = (pages_to_grow +
((1 << lp_mem_pool->order) - 1))
>> lp_mem_pool->order;
ret = kbase_mem_pool_grow(lp_mem_pool,
- pages_to_grow);
+ pages_to_grow, kctx->task);
} else {
#endif
struct kbase_mem_pool *const mem_pool =
- &kctx->mem_pools.small[
- region->gpu_alloc->group_id];
+ &kctx->mem_pools.small[group_id];
ret = kbase_mem_pool_grow(mem_pool,
- pages_to_grow);
+ pages_to_grow, kctx->task);
#ifdef CONFIG_MALI_2MB_ALLOC
}
#endif
@@ -1795,7 +1794,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
err = kbase_mem_pool_grow(
&kbdev->mem_pools.small[
kctx->mmu.group_id],
- MIDGARD_MMU_BOTTOMLEVEL);
+ MIDGARD_MMU_BOTTOMLEVEL,kctx->task);
mutex_lock(&kctx->mmu.mmu_lock);
} while (!err);
if (err) {
@@ -1956,7 +1955,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
mutex_unlock(&mmut->mmu_lock);
err = kbase_mem_pool_grow(
&kbdev->mem_pools.small[mmut->group_id],
- cur_level);
+ cur_level,mmut->kctx ? mmut->kctx->task : NULL);
mutex_lock(&mmut->mmu_lock);
} while (!err);
@@ -2733,7 +2732,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
err = kbase_mem_pool_grow(
&kbdev->mem_pools.small[mmut->group_id],
- MIDGARD_MMU_BOTTOMLEVEL);
+ MIDGARD_MMU_BOTTOMLEVEL,kctx ? kctx->task : NULL);
if (err) {
kbase_mmu_term(kbdev, mmut);
return -ENOMEM;