gpu: arm: Update from P615XXS7FXA1

commit: bb39658de25405d37fa7470107509d653c7764d4 [log] [tgz]
author: Fede2782 <78815152+Fede2782@users.noreply.github.com> Thu May 02 18:48:47 2024 +0200
committer: Tim Zimmermann <tim@linux4.de> Sun Jun 02 06:20:43 2024 +0200
tree: ea752fa8f8af3b7c5f22f5660e43c51a5f508606
parent: 9674844564b8579ca58717e23128044238546df2 [diff]
diff --git a/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c
index 49cb945..df9f41d 100644
--- a/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/b_r26p0/context/mali_kbase_context.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
  *
- * (C) COPYRIGHT 2019-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -24,6 +24,12 @@
 /*
  * Base kernel context APIs
  */
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
 
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
@@ -132,13 +138,50 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	/* creating a context is considered a disjoint event */
 	kbase_disjoint_event(kctx->kbdev);
 
-	spin_lock_init(&kctx->mm_update_lock);
 	kctx->process_mm = NULL;
+	kctx->task = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
 	atomic_set(&kctx->permanent_mapped_pages, 0);
 	kctx->tgid = current->tgid;
 	kctx->pid = current->pid;
 
+	/* Check if this is a Userspace created context */
+	if (likely(kctx->filp)) {
+		struct pid *pid_struct;
+
+		rcu_read_lock();
+		pid_struct = find_get_pid(kctx->tgid);
+		if (likely(pid_struct)) {
+			struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+			if (likely(task)) {
+				/* Take a reference on the task to avoid slow lookup
+				 * later on from the page allocation loop.
+				 */
+				get_task_struct(task);
+				kctx->task = task;
+			} else {
+				dev_err(kctx->kbdev->dev,
+					"Failed to get task pointer for %s/%d",
+					current->comm, current->pid);
+				err = -ESRCH;
+			}
+
+			put_pid(pid_struct);
+		} else {
+			dev_err(kctx->kbdev->dev,
+				"Failed to get pid pointer for %s/%d",
+				current->comm, current->pid);
+			err = -ESRCH;
+		}
+		rcu_read_unlock();
+
+		if (unlikely(err))
+			return err;
+                kbase_mem_mmgrab();
+                kctx->process_mm = current->mm;
+	}
+
 	atomic_set(&kctx->used_pages, 0);
 
 	mutex_init(&kctx->reg_lock);
@@ -164,13 +207,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	mutex_init(&kctx->legacy_hwcnt_lock);
 
 	mutex_lock(&kctx->kbdev->kctx_list_lock);
-
 	err = kbase_insert_kctx_to_process(kctx);
-	if (err)
-		dev_err(kctx->kbdev->dev,
-		"(err:%d) failed to insert kctx to kbase_process\n", err);
-
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+	if (err) {
+		dev_err(kctx->kbdev->dev,
+			"(err:%d) failed to insert kctx to kbase_process", err);
+		if (likely(kctx->filp)) {
+                        mmdrop(kctx->process_mm);
+                        put_task_struct(kctx->task);
+                }
+        }
 
 	return err;
 }
@@ -254,7 +300,10 @@ void kbase_context_common_term(struct kbase_context *kctx)
 	kbase_remove_kctx_from_process(kctx);
 
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
-
+        if (likely(kctx->filp)) {
+                mmdrop(kctx->process_mm);
+                put_task_struct(kctx->task);
+        }
 	KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, kctx->tgid);
 }
 

diff --git a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h
index 6dc57d044..0f593a6 100644
--- a/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h
+++ b/drivers/gpu/arm/b_r26p0/jm/mali_kbase_jm_ioctl.h

@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -109,10 +109,12 @@
  * 11.26
  * - Added kinstr_jm API
  * 11.27
- * - Backwards compatible extension to HWC ioctl.
- */
+ * Backwards compatible extension to HWC ioctl.
+ * - Relax the requirement to create a mapping with BASE_MEM_MAP_TRACKING_HANDLE
+ *   before allocating GPU memory for the context.
+*/
 #define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 27
+#define BASE_UK_VERSION_MINOR 38
 
 /**
  * struct kbase_ioctl_job_submit - Submit jobs/atoms to the kernel

diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h b/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h
index c4ef69f..761c8a8 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_defs.h

@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2011-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1470,11 +1470,13 @@ struct kbase_reg_zone {
  *                        Generally the reference count is incremented when the context
  *                        is scheduled in and an atom is pulled from the context's per
  *                        slot runnable tree.
- * @mm_update_lock:       lock used for handling of special tracking page.
  * @process_mm:           Pointer to the memory descriptor of the process which
  *                        created the context. Used for accounting the physical
  *                        pages used for GPU allocations, done for the context,
- *                        to the memory consumed by the process.
+ *                        to the memory consumed by the process. A reference is taken
+ *                        on this descriptor for the Userspace created contexts so that
+ *                        Kbase can safely access it to update the memory usage counters.
+ *                        The reference is dropped on context termination.
  * @gpu_va_end:           End address of the GPU va space (in 4KB page units)
  * @jit_va:               Indicates if a JIT_VA zone has been created.
  * @mem_profile_data:     Buffer containing the profiling information provided by
@@ -1603,6 +1605,10 @@ struct kbase_reg_zone {
  * @kinstr_jm:            Kernel job manager instrumentation context handle
  * @tl_kctx_list_node:    List item into the device timeline's list of
  *                        contexts, for timeline summarization.
+ * @task:                 Pointer to the task structure of the main thread of the process
+ *                        that created the Kbase context. It would be set only for the
+ *                        contexts created by the Userspace and not for the contexts
+ *                        created internally by the Kbase.
  *
  * A kernel base context is an entity among which the GPU is scheduled.
  * Each context has its own GPU address space.
@@ -1697,8 +1703,7 @@ struct kbase_context {
 
 	atomic_t refcount;
 
-	spinlock_t         mm_update_lock;
-	struct mm_struct __rcu *process_mm;
+	struct mm_struct *process_mm;
 	u64 gpu_va_end;
 	bool jit_va;
 
@@ -1756,6 +1761,7 @@ struct kbase_context {
 #endif
 	struct kbase_kinstr_jm *kinstr_jm;
 	struct list_head tl_kctx_list_node;
+        struct task_struct *task;
 };
 
 #ifdef CONFIG_MALI_CINSTR_GWT

diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c
index 6d56220..a70bcb9 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.c

@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1757,6 +1757,7 @@ void kbase_sync_single(struct kbase_context *kctx,
 			src = ((unsigned char *)kmap(gpu_page)) + offset;
 			dst = ((unsigned char *)kmap(cpu_page)) + offset;
 		}
+
 		memcpy(dst, src, size);
 		kunmap(gpu_page);
 		kunmap(cpu_page);
@@ -2148,7 +2149,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 			&kctx->mem_pools.large[alloc->group_id],
 			 nr_lp * (SZ_2M / SZ_4K),
 			 tp,
-			 true);
+			 true, kctx->task);
 
 		if (res > 0) {
 			nr_left -= res;
@@ -2202,7 +2203,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 
 				err = kbase_mem_pool_grow(
 					&kctx->mem_pools.large[alloc->group_id],
-					1);
+					1, kctx->task);
 				if (err)
 					break;
 			} while (1);
@@ -2249,7 +2250,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 	if (nr_left) {
 		res = kbase_mem_pool_alloc_pages(
 			&kctx->mem_pools.small[alloc->group_id],
-			nr_left, tp, false);
+			nr_left, tp, false, kctx->task);
 		if (res <= 0)
 			goto alloc_failed;
 	}
@@ -3660,7 +3661,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 		spin_unlock(&kctx->mem_partials_lock);
 
 		kbase_gpu_vm_unlock(kctx);
-		ret = kbase_mem_pool_grow(pool, pool_delta);
+		ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
 		kbase_gpu_vm_lock(kctx);
 
 		if (ret)
@@ -4429,10 +4430,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	struct page **pages;
 	struct tagged_addr *pa;
 	long i;
-	unsigned long address;
 	struct device *dev;
-	unsigned long offset;
-	unsigned long local_size;
 	unsigned long gwt_mask = ~0;
 
     /* Calls to this function are inherently asynchronous, with respect to
@@ -4449,21 +4447,38 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 
 	alloc = reg->gpu_alloc;
 	pa = kbase_get_gpu_phy_pages(reg);
-	address = alloc->imported.user_buf.address;
 	pinned_pages = alloc->nents;
 	pages = alloc->imported.user_buf.pages;
 	dev = kctx->kbdev->dev;
-	offset = address & ~PAGE_MASK;
-	local_size = alloc->imported.user_buf.size;
+
+       /* Manual CPU cache synchronization.
+        *
+        * The driver disables automatic CPU cache synchronization because the
+        * memory pages that enclose the imported region may also contain
+        * sub-regions which are not imported and that are allocated and used
+        * by the user process. This may be the case of memory at the beginning
+        * of the first page and at the end of the last page. Automatic CPU cache
+        * synchronization would force some operations on those memory allocations,
+        * unbeknown to the user process: in particular, a CPU cache invalidate
+        * upon unmapping would destroy the content of dirty CPU caches and cause
+        * the user process to lose CPU writes to the non-imported sub-regions.
+        *
+        * When the GPU claims ownership of the imported memory buffer, it shall
+        * commit CPU writes for the whole of all pages that enclose the imported
+        * region, otherwise the initial content of memory would be wrong.
+        */
+
+
 
 	for (i = 0; i < pinned_pages; i++) {
 		dma_addr_t dma_addr;
-		unsigned long min;
+                #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                        dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                #else
+                        dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+                                             DMA_ATTR_SKIP_CPU_SYNC);
+                #endif
 
-		min = MIN(PAGE_SIZE - offset, local_size);
-		dma_addr = dma_map_page(dev, pages[i],
-				offset, min,
-				DMA_BIDIRECTIONAL);
 		err = dma_mapping_error(dev, dma_addr);
 		if (err)
 			goto unwind;
@@ -4471,8 +4486,6 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
 		pa[i] = as_tagged(page_to_phys(pages[i]));
 
-		local_size -= min;
-		offset = 0;
 	}
 
 #ifdef CONFIG_MALI_CINSTR_GWT
@@ -4490,10 +4503,26 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	/* fall down */
 unwind:
 	alloc->nents = 0;
+
+       /* Run the unmap loop in the same order as map loop, and perform again
+        * CPU cache synchronization to re-write the content of dirty CPU caches
+        * to memory. This is precautionary measure in case a GPU job has taken
+        * advantage of a partially GPU-mapped range to write and corrupt the
+        * content of memory, either inside or outside the imported region.
+        *
+        * Notice that this error recovery path doesn't try to be optimal and just
+        * flushes the entire page range.
+        */
+
 	while (i--) {
-		dma_unmap_page(kctx->kbdev->dev,
-				alloc->imported.user_buf.dma_addrs[i],
-				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+                dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                        dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                #else
+                        dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+                #endif
 	}
 
 	/* The user buffer could already have been previously pinned before
@@ -4519,6 +4548,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 {
 	long i;
 	struct page **pages;
+        unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
 	unsigned long size = alloc->imported.user_buf.size;
 	lockdep_assert_held(&kctx->reg_lock);
 
@@ -4532,18 +4562,100 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 #endif
 
 	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
-		unsigned long local_size;
+                unsigned long imported_size = MIN(size, PAGE_SIZE - offset_within_page);
+               /* Notice: this is a temporary variable that is used for DMA sync
+                * operations, and that could be incremented by an offset if the
+                * current page contains both imported and non-imported memory
+                * sub-regions.
+                *
+                * It is valid to add an offset to this value, because the offset
+                * is always kept within the physically contiguous dma-mapped range
+                * and there's no need to translate to physical address to offset it.
+                *
+                * This variable is not going to be used for the actual DMA unmap
+                * operation, that shall always use the original DMA address of the
+                * whole memory page.
+                */
+
+
 		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
 
-		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
-		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+               /* Manual CPU cache synchronization.
+                *
+                * When the GPU returns ownership of the buffer to the CPU, the driver
+                * needs to treat imported and non-imported memory differently.
+                *
+                * The first case to consider is non-imported sub-regions at the
+                * beginning of the first page and at the end of last page. For these
+                * sub-regions: CPU cache shall be committed with a clean+invalidate,
+                * in order to keep the last CPU write.
+                *
+                * Imported region prefers the opposite treatment: this memory has been
+                * legitimately mapped and used by the GPU, hence GPU writes shall be
+                * committed to memory, while CPU cache shall be invalidated to make
+                * sure that CPU reads the correct memory content.
+                *
+                * The following diagram shows the expect value of the variables
+                * used in this loop in the corner case of an imported region encloed
+                * by a single memory page:
+                *
+                * page boundary ->|---------- | <- dma_addr (initial value)
+                *                 |           |
+                *                 | - - - - - | <- offset_within_page
+                *                 |XXXXXXXXXXX|\
+                *                 |XXXXXXXXXXX| \
+                *                 |XXXXXXXXXXX|  }- imported_size
+                *                 |XXXXXXXXXXX| /
+                *                 |XXXXXXXXXXX|/
+                *                 | - - - - - | <- offset_within_page + imported_size
+                *                 |           |\
+                *                 |           | }- PAGE_SIZE - imported_size - offset_within_page
+                *                 |           |/
+                * page boundary ->|-----------|
+                *
+                * If the imported region is enclosed by more than one page, then
+                * offset_within_page = 0 for any page after the first.
+                */
+               /* Only for first page: handle non-imported range at the beginning. */
+               if (offset_within_page > 0) {
+                       dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+                                                  DMA_BIDIRECTIONAL);
+                       dma_addr += offset_within_page;
+               }
+
+               /* For every page: handle imported range. */
+               if (imported_size > 0)
+                       dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+                                               DMA_BIDIRECTIONAL);
+
+               /* Only for last page (that may coincide with first page):
+                * handle non-imported range at the end.
+                */
+               if ((imported_size + offset_within_page) < PAGE_SIZE) {
+                       dma_addr += imported_size;
+                       dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+                                                  PAGE_SIZE - imported_size - offset_within_page,
+                                                  DMA_BIDIRECTIONAL);
+               }
+
+               /* Notice: use the original DMA address to unmap the whole memory page. */
+                #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                        dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
 				DMA_BIDIRECTIONAL);
+                #else
+                        dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+                                    PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+                #endif
+
+
+
+
 		if (writeable)
 			set_page_dirty_lock(pages[i]);
 		put_page(pages[i]);
 		pages[i] = NULL;
 
-		size -= local_size;
+		size -= imported_size;
 	}
 	alloc->nents = 0;
 }

diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h
index 45991a1..d17a94a 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem.h

@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -805,7 +805,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
  * @pages:    Pointer to array where the physical address of the allocated
  *            pages will be stored.
  * @partial_allowed: If fewer pages allocated is allowed
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the pages are being allocated. It can be NULL if the pages
+ *              won't be associated with any Kbase context.
  * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
  *
  * Return:
@@ -821,7 +823,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
  * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
  */
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-		struct tagged_addr *pages, bool partial_allowed);
+		struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner);
 
 /**
  * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
@@ -933,13 +935,15 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
  * kbase_mem_pool_grow - Grow the pool
  * @pool:       Memory pool to grow
  * @nr_to_grow: Number of pages to add to the pool
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the memory pool is being grown. It can be NULL if the pages
+ *              to be allocated won't be associated with any Kbase context.
  * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
  * become larger than the maximum size specified.
  *
  * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
  */
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,struct task_struct *page_owner);
 
 /**
  * kbase_mem_pool_trim - Grow or shrink the pool to a new size
@@ -1973,4 +1977,34 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
 	return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
 }
 
+/*
+ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ */
+static inline void kbase_mem_mmgrab(void)
+{
+        /* This merely takes a reference on the memory descriptor structure
+        * i.e. mm_struct of current process and not on its address space and
+        * so won't block the freeing of address space on process exit.
+        */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+        atomic_inc(&current->mm->mm_count);
+#else
+        mmgrab(current->mm);
+#endif
+}
+/**
+ * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
+ * @kctx: Pointer to kbase context
+ *
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
+ * from the forked child process using the mali device file fd inherited from
+ * the parent process.
+ *
+ * Return: true if allocation is allowed.
+ */
+static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
+{
+        return (kctx->process_mm == current->mm);
+}
+
 #endif				/* _KBASE_MEM_H_ */

diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c
index 19f64be..ced2b06 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_linux.c

@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -44,6 +44,7 @@
 #include <linux/cache.h>
 #include <linux/memory_group_manager.h>
 
+#include <linux/version.h>
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
 #include <tl/mali_kbase_tracepoints.h>
@@ -1494,6 +1495,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
 	struct kbase_alloc_import_user_buf *user_buf;
 	struct page **pages = NULL;
+        struct tagged_addr *pa;
+        struct device *dev;
 	int write;
 
 	/* Flag supported only for dma-buf imported memory */
@@ -1635,31 +1638,48 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 	reg->gpu_alloc->nents = 0;
 	reg->extent = 0;
 
+        pa = kbase_get_gpu_phy_pages(reg);
+        dev = kctx->kbdev->dev;
+
 	if (pages) {
-		struct device *dev = kctx->kbdev->dev;
-		unsigned long local_size = user_buf->size;
-		unsigned long offset = user_buf->address & ~PAGE_MASK;
-		struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
 
 		/* Top bit signifies that this was pinned on import */
 		user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
 
+               /* Manual CPU cache synchronization.
+                *
+                * The driver disables automatic CPU cache synchronization because the
+                * memory pages that enclose the imported region may also contain
+                * sub-regions which are not imported and that are allocated and used
+                * by the user process. This may be the case of memory at the beginning
+                * of the first page and at the end of the last page. Automatic CPU cache
+                * synchronization would force some operations on those memory allocations,
+                * unbeknown to the user process: in particular, a CPU cache invalidate
+                * upon unmapping would destroy the content of dirty CPU caches and cause
+                * the user process to lose CPU writes to the non-imported sub-regions.
+                *
+                * When the GPU claims ownership of the imported memory buffer, it shall
+                * commit CPU writes for the whole of all pages that enclose the imported
+                * region, otherwise the initial content of memory would be wrong.
+                */
+
 		for (i = 0; i < faulted_pages; i++) {
 			dma_addr_t dma_addr;
-			unsigned long min;
+                        #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                                dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                        #else
+                                dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+                                                     DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+                        #endif
 
-			min = MIN(PAGE_SIZE - offset, local_size);
-			dma_addr = dma_map_page(dev, pages[i],
-					offset, min,
-					DMA_BIDIRECTIONAL);
 			if (dma_mapping_error(dev, dma_addr))
 				goto unwind_dma_map;
 
 			user_buf->dma_addrs[i] = dma_addr;
 			pa[i] = as_tagged(page_to_phys(pages[i]));
 
-			local_size -= min;
-			offset = 0;
+			dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+			
 		}
 
 		reg->gpu_alloc->nents = faulted_pages;
@@ -1668,10 +1688,22 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 	return reg;
 
 unwind_dma_map:
+
+       /* Run the unmap loop in the same order as map loop, and perform again
+        * CPU cache synchronization to re-write the content of dirty CPU caches
+        * to memory. This precautionary measure is kept here to keep this code
+        * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+        * in the future.
+        */
 	while (i--) {
-		dma_unmap_page(kctx->kbdev->dev,
-				user_buf->dma_addrs[i],
-				PAGE_SIZE, DMA_BIDIRECTIONAL);
+        dma_addr_t dma_addr = user_buf->dma_addrs[i];
+               dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+        #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+               dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+        #else
+               dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+        #endif
 	}
 fault_mismatch:
 	if (pages) {
@@ -1687,7 +1719,6 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 no_region:
 bad_size:
 	return NULL;
-
 }
 
 
@@ -1959,7 +1990,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
 		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
 	}
-
+	if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+		dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+		goto bad_flags;
+	}
 	if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
 		dev_warn(kctx->kbdev->dev,
 				"padding is only supported for UMM");
@@ -3078,73 +3112,23 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
 
 void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
 {
-	struct mm_struct *mm;
-
-	rcu_read_lock();
-	mm = rcu_dereference(kctx->process_mm);
-	if (mm) {
-		atomic_add(pages, &kctx->nonmapped_pages);
+	struct mm_struct *mm = kctx->process_mm;
+        if (unlikely(!mm))
+                return;
+        atomic_add(pages, &kctx->nonmapped_pages);
 #ifdef SPLIT_RSS_COUNTING
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
-		spin_lock(&mm->page_table_lock);
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-		spin_unlock(&mm->page_table_lock);
-#endif
-	}
-	rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
-	int pages;
-	struct mm_struct *mm;
-
-	spin_lock(&kctx->mm_update_lock);
-	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
-	if (!mm) {
-		spin_unlock(&kctx->mm_update_lock);
-		return;
-	}
-
-	rcu_assign_pointer(kctx->process_mm, NULL);
-	spin_unlock(&kctx->mm_update_lock);
-	synchronize_rcu();
-
-	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
-#ifdef SPLIT_RSS_COUNTING
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 #else
 	spin_lock(&mm->page_table_lock);
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 	spin_unlock(&mm->page_table_lock);
 #endif
 }
 
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
-	struct kbase_context *kctx;
-
-	kctx = vma->vm_private_data;
-	kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
-	.close = kbase_special_vm_close,
-};
-
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
 {
-	/* check that this is the only tracking page */
-	spin_lock(&kctx->mm_update_lock);
-	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
-		spin_unlock(&kctx->mm_update_lock);
-		return -EFAULT;
-	}
-
-	rcu_assign_pointer(kctx->process_mm, current->mm);
-
-	spin_unlock(&kctx->mm_update_lock);
+	if (vma_pages(vma) != 1)
+                return -EINVAL;
 
 	/* no real access */
 	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
@@ -3153,9 +3137,7 @@ static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_
 #else
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_IO;
 #endif
-	vma->vm_ops = &kbase_vm_special_ops;
-	vma->vm_private_data = kctx;
 
-	return 0;
+        return 0;
 }
 

diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c
index 0723e32..e6cb24e 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_mem_pool.c

@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2015-2019 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -28,7 +28,11 @@
 #include <linux/shrinker.h>
 #include <linux/atomic.h>
 #include <linux/version.h>
-
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
 #define pool_dbg(pool, format, ...) \
 	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
 		(pool->next_pool) ? "kctx" : "kbdev",	\
@@ -38,6 +42,46 @@
 
 #define NOT_DIRTY false
 #define NOT_RECLAIMED false
+/**
+* can_alloc_page() - Check if the current thread can allocate a physical page
+*
+* @pool:                Pointer to the memory pool.
+* @page_owner:          Pointer to the task/process that created the Kbase context
+*                       for which a page needs to be allocated. It can be NULL if
+*                       the page won't be associated with Kbase context.
+* @alloc_from_kthread:  Flag indicating that the current thread is a kernel thread.
+*
+* This function checks if the current thread is a kernel thread and can make a
+* request to kernel to allocate a physical page. If the kernel thread is allocating
+* a page for the Kbase context and the process that created the context is exiting
+* or is being killed, then there is no point in doing a page allocation.
+*
+* The check done by the function is particularly helpful when the system is running
+* low on memory. When a page is allocated from the context of a kernel thread, OoM
+* killer doesn't consider the kernel thread for killing and kernel keeps retrying
+* to allocate the page as long as the OoM killer is able to kill processes.
+* The check allows kernel thread to quickly exit the page allocation loop once OoM
+* killer has initiated the killing of @page_owner, thereby unblocking the context
+* termination for @page_owner and freeing of GPU memory allocated by it. This helps
+* in preventing the kernel panic and also limits the number of innocent processes
+* that get killed.
+*
+* Return: true if the page can be allocated otherwise false.
+*/
+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner,
+                                 const bool alloc_from_kthread)
+{
+       if (likely(!alloc_from_kthread || !page_owner))
+               return true;
+
+       if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) {
+               dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm,
+                        task_pid_nr(page_owner));
+               return false;
+       }
+
+       return true;
+}
 
 static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
 {
@@ -241,11 +285,11 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
 }
 
 int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
-		size_t nr_to_grow)
+		size_t nr_to_grow, struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t i;
-
+        const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
 	kbase_mem_pool_lock(pool);
 
 	pool->dont_reclaim = true;
@@ -258,6 +302,8 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
 			return -ENOMEM;
 		}
 		kbase_mem_pool_unlock(pool);
+                if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+                        return -ENOMEM;
 
 		p = kbase_mem_alloc_page(pool);
 		if (!p) {
@@ -290,7 +336,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
 	if (new_size < cur_size)
 		kbase_mem_pool_shrink(pool, cur_size - new_size);
 	else if (new_size > cur_size)
-		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+		err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL);
 
 	if (err) {
 		size_t grown_size = kbase_mem_pool_size(pool);
@@ -553,13 +599,14 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
 }
 
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-		struct tagged_addr *pages, bool partial_allowed)
+		struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t nr_from_pool;
 	size_t i = 0;
 	int err = -ENOMEM;
 	size_t nr_pages_internal;
+        const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
 
 	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
 
@@ -591,7 +638,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	if (i != nr_4k_pages && pool->next_pool) {
 		/* Allocate via next pool */
 		err = kbase_mem_pool_alloc_pages(pool->next_pool,
-				nr_4k_pages - i, pages + i, partial_allowed);
+				nr_4k_pages - i, pages + i, partial_allowed,page_owner);
 
 		if (err < 0)
 			goto err_rollback;
@@ -600,6 +647,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	} else {
 		/* Get any remaining pages from kernel */
 		while (i != nr_4k_pages) {
+                       if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+                               goto err_rollback;
+
 			p = kbase_mem_alloc_page(pool);
 			if (!p) {
 				if (partial_allowed)

diff --git a/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c b/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c
index e3113f8..e3286a3 100644
--- a/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/b_r26p0/mali_kbase_softjobs.c

@@ -280,8 +280,6 @@ static void kbase_fence_debug_check_atom(struct kbase_jd_atom *katom)
 	struct device *dev = kctx->kbdev->dev;
 	int i;
 
-	dev_warn(dev,"kbase_fence_debug_check_atom\n");
-	
 	for (i = 0; i < 2; i++) {
 		struct kbase_jd_atom *dep;
 
@@ -985,6 +983,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 			jit_info_copy_size_for_jit_version[kctx->jit_version];
 	WARN_ON(jit_info_user_copy_size > sizeof(*info));
 
+	if (!kbase_mem_allow_alloc(kctx)) {
+		dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+			current->comm, current->pid, kctx->tgid, kctx->id);
+		ret = -EINVAL;
+		goto fail;
+	}
+
 	/* For backwards compatibility, and to prevent reading more than 1 jit
 	 * info struct on jit version 1
 	 */

diff --git a/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c
index cc233c99..321dd6d 100644
--- a/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/b_r26p0/mmu/backend/mali_kbase_mmu_jm.c

@@ -65,15 +65,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 	u32 const exception_data = (status >> 8) & 0xFFFFFF;
 	int const as_no = as->number;
 	unsigned long flags;
+        const uintptr_t fault_addr = fault->addr;
 
 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
-		"GPU bus fault in AS%d at VA 0x%016llX\n"
+		"GPU bus fault in AS%d at VA %pK\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
 		"exception data 0x%X\n"
 		"pid: %d\n",
-		as_no, fault->addr,
+		as_no, (void *)fault_addr,
 		status,
 		exception_type, kbase_gpu_exception_name(exception_type),
 		exception_data,

diff --git a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c
index 32479c7..5410204 100644
--- a/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/b_r26p0/mmu/mali_kbase_mmu.c

@@ -1,6 +1,6 @@
 /*
  *
- * (C) COPYRIGHT 2010-2020 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1305,6 +1305,7 @@ void page_fault_worker(struct work_struct *data)
 		kbase_gpu_vm_unlock(kctx);
 	} else {
 		int ret = -ENOMEM;
+		const u8 group_id = region->gpu_alloc->group_id;
 
 		kbase_gpu_vm_unlock(kctx);
 
@@ -1316,23 +1317,21 @@ void page_fault_worker(struct work_struct *data)
 			if (grow_2mb_pool) {
 				/* Round page requirement up to nearest 2 MB */
 				struct kbase_mem_pool *const lp_mem_pool =
-					&kctx->mem_pools.large[
-					region->gpu_alloc->group_id];
+					&kctx->mem_pools.large[group_id];
 
 				pages_to_grow = (pages_to_grow +
 					((1 << lp_mem_pool->order) - 1))
 						>> lp_mem_pool->order;
 
 				ret = kbase_mem_pool_grow(lp_mem_pool,
-					pages_to_grow);
+					pages_to_grow, kctx->task);
 			} else {
 #endif
 				struct kbase_mem_pool *const mem_pool =
-					&kctx->mem_pools.small[
-					region->gpu_alloc->group_id];
+					&kctx->mem_pools.small[group_id];
 
 				ret = kbase_mem_pool_grow(mem_pool,
-					pages_to_grow);
+					pages_to_grow, kctx->task);
 #ifdef CONFIG_MALI_2MB_ALLOC
 			}
 #endif
@@ -1732,7 +1731,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 			err = kbase_mem_pool_grow(
 				&kbdev->mem_pools.small[
 					kctx->mmu.group_id],
-				MIDGARD_MMU_BOTTOMLEVEL);
+				MIDGARD_MMU_BOTTOMLEVEL,kctx->task);
 			mutex_lock(&kctx->mmu.mmu_lock);
 		} while (!err);
 		if (err) {
@@ -1917,7 +1916,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
 			mutex_unlock(&mmut->mmu_lock);
 			err = kbase_mem_pool_grow(
 				&kbdev->mem_pools.small[mmut->group_id],
-				cur_level);
+				cur_level,mmut->kctx ? mmut->kctx->task : NULL);
 			mutex_lock(&mmut->mmu_lock);
 		} while (!err);
 
@@ -2655,7 +2654,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
 
 		err = kbase_mem_pool_grow(
 			&kbdev->mem_pools.small[mmut->group_id],
-			MIDGARD_MMU_BOTTOMLEVEL);
+			MIDGARD_MMU_BOTTOMLEVEL,kctx ? kctx->task : NULL);
 		if (err) {
 			kbase_mmu_term(kbdev, mmut);
 			return -ENOMEM;

diff --git a/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c b/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c
index b4244b4..5c5196e 100644
--- a/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bv_r32p1/context/mali_kbase_context.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,12 @@
 /*
  * Base kernel context APIs
  */
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
 
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
@@ -176,13 +182,50 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	/* creating a context is considered a disjoint event */
 	kbase_disjoint_event(kctx->kbdev);
 
-	spin_lock_init(&kctx->mm_update_lock);
 	kctx->process_mm = NULL;
+	kctx->task = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
 	atomic_set(&kctx->permanent_mapped_pages, 0);
 	kctx->tgid = current->tgid;
 	kctx->pid = current->pid;
 
+	/* Check if this is a Userspace created context */
+	if (likely(kctx->filp)) {
+		struct pid *pid_struct;
+
+		rcu_read_lock();
+		pid_struct = find_get_pid(kctx->tgid);
+		if (likely(pid_struct)) {
+			struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+			if (likely(task)) {
+				/* Take a reference on the task to avoid slow lookup
+				 * later on from the page allocation loop.
+				 */
+				get_task_struct(task);
+				kctx->task = task;
+			} else {
+				dev_err(kctx->kbdev->dev,
+					"Failed to get task pointer for %s/%d",
+					current->comm, current->pid);
+				err = -ESRCH;
+			}
+
+			put_pid(pid_struct);
+		} else {
+			dev_err(kctx->kbdev->dev,
+				"Failed to get pid pointer for %s/%d",
+				current->comm, current->pid);
+			err = -ESRCH;
+		}
+		rcu_read_unlock();
+
+		if (unlikely(err))
+			return err;
+                kbase_mem_mmgrab();
+                kctx->process_mm = current->mm;
+	}
+
 	atomic_set(&kctx->used_pages, 0);
 
 	mutex_init(&kctx->reg_lock);
@@ -209,13 +252,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	mutex_init(&kctx->legacy_hwcnt_lock);
 
 	mutex_lock(&kctx->kbdev->kctx_list_lock);
-
 	err = kbase_insert_kctx_to_process(kctx);
-	if (err)
-		dev_err(kctx->kbdev->dev,
-		"(err:%d) failed to insert kctx to kbase_process\n", err);
-
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+	if (err) {
+		dev_err(kctx->kbdev->dev,
+			"(err:%d) failed to insert kctx to kbase_process", err);
+		if (likely(kctx->filp)) {
+                        mmdrop(kctx->process_mm);
+                        put_task_struct(kctx->task);
+                }
+        }
 
 	return err;
 }
@@ -301,6 +347,11 @@ void kbase_context_common_term(struct kbase_context *kctx)
 	kbase_remove_kctx_from_process(kctx);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 
+	if (likely(kctx->filp)) {
+                mmdrop(kctx->process_mm);
+                put_task_struct(kctx->task);
+        }
+
 	KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
 }
 

diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c
index a83b817..28c8de9 100644
--- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf.c

@@ -348,7 +348,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
 
 	ret = kbase_mem_pool_alloc_pages(
 				&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
-				num_pages, queue->phys, false);
+				num_pages, queue->phys, false,kctx->task);
 
 	if (ret != num_pages)
 		goto phys_alloc_failed;
@@ -1072,7 +1072,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
 	/* Get physical page for a normal suspend buffer */
 	err = kbase_mem_pool_alloc_pages(
 			&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-			nr_pages, &s_buf->phy[0], false);
+			nr_pages, &s_buf->phy[0], false, kctx->task);
 
 	if (err < 0)
 		goto phy_pages_alloc_failed;
@@ -2995,7 +2995,7 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
 
 	ret = kbase_mem_pool_alloc_pages(
 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-		1, &phys, false);
+		1, &phys, false, NULL);
 
 	if (ret <= 0) {
 		fput(filp);
@@ -3031,7 +3031,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
 
 	ret = kbase_mem_pool_alloc_pages(
 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
-		false);
+		false, NULL);
 
 	if (ret <= 0)
 		return ret;

diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c
index 25767de..e6f7dfe 100644
--- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -546,7 +546,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 	} else {
 		ret = kbase_mem_pool_alloc_pages(
 			&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-			num_pages, phys, false);
+			num_pages, phys, false, NULL);
 		if (ret < 0)
 			goto out;
 	}
@@ -2248,7 +2248,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mem_pool_alloc_pages(
 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-		num_pages, phys, false);
+		num_pages, phys, false, NULL);
 	if (ret <= 0)
 		goto phys_mem_pool_alloc_error;
 

diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c
index edd64be..72ac6e2 100644
--- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_firmware_no_mali.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1289,7 +1289,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mem_pool_alloc_pages(
 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-		num_pages, phys, false);
+		num_pages, phys, false, NULL);
 	if (ret <= 0)
 		goto phys_mem_pool_alloc_error;
 

diff --git a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c
index 4e26a49..b8a83f9 100644
--- a/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bv_r32p1/csf/mali_kbase_csf_kcpu.c

@@ -335,6 +335,14 @@ static int kbase_kcpu_jit_allocate_prepare(
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
 
+	if (!kbase_mem_allow_alloc(kctx)) {
+		dev_dbg(kctx->kbdev->dev,
+			"Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+			current->comm, current->pid, kctx->tgid, kctx->id);
+		ret = -EINVAL;
+		goto out;
+	}
+
 	if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
 			count > ARRAY_SIZE(kctx->jit_alloc)) {
 		ret = -EINVAL;

diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h
index 7c8ea18..3526a81 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_defs.h

@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1555,11 +1555,13 @@ struct kbase_sub_alloc {
  *                        is scheduled in and an atom is pulled from the context's per
  *                        slot runnable tree in JM GPU or GPU command queue
  *                        group is programmed on CSG slot in CSF GPU.
- * @mm_update_lock:       lock used for handling of special tracking page.
  * @process_mm:           Pointer to the memory descriptor of the process which
  *                        created the context. Used for accounting the physical
  *                        pages used for GPU allocations, done for the context,
- *                        to the memory consumed by the process.
+ *                        to the memory consumed by the process. A reference is taken
+ *                        on this descriptor for the Userspace created contexts so that
+ *                        Kbase can safely access it to update the memory usage counters.
+ *                        The reference is dropped on context termination.
  * @gpu_va_end:           End address of the GPU va space (in 4KB page units)
  * @jit_va:               Indicates if a JIT_VA zone has been created.
  * @mem_profile_data:     Buffer containing the profiling information provided by
@@ -1691,7 +1693,10 @@ struct kbase_sub_alloc {
  * @limited_core_mask:    The mask that is applied to the affinity in case of atoms
  *                        marked with BASE_JD_REQ_LIMITED_CORE_MASK.
  * @platform_data:        Pointer to platform specific per-context data.
- *
+ *  @task:                 Pointer to the task structure of the main thread of the process
+ *                        that created the Kbase context. It would be set only for the
+ *                        contexts created by the Userspace and not for the contexts
+ *                        created internally by the Kbase.*
  * A kernel base context is an entity among which the GPU is scheduled.
  * Each context has its own GPU address space.
  * Up to one context can be created for each client that opens the device file
@@ -1781,8 +1786,7 @@ struct kbase_context {
 
 	atomic_t refcount;
 
-	spinlock_t         mm_update_lock;
-	struct mm_struct __rcu *process_mm;
+        struct mm_struct *process_mm;
 	u64 gpu_va_end;
 	bool jit_va;
 
@@ -1844,6 +1848,8 @@ struct kbase_context {
 #if !MALI_USE_CSF
 	void *platform_data;
 #endif
+
+       struct task_struct *task;
 };
 
 #ifdef CONFIG_MALI_CINSTR_GWT

diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c
index 606fc5e..b894f20 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1830,6 +1830,7 @@ void kbase_sync_single(struct kbase_context *kctx,
 			src = ((unsigned char *)kmap(gpu_page)) + offset;
 			dst = ((unsigned char *)kmap(cpu_page)) + offset;
 		}
+
 		memcpy(dst, src, size);
 		kunmap(gpu_page);
 		kunmap(cpu_page);
@@ -2247,7 +2248,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 			&kctx->mem_pools.large[alloc->group_id],
 			 nr_lp * (SZ_2M / SZ_4K),
 			 tp,
-			 true);
+			 true, kctx->task);
 
 		if (res > 0) {
 			nr_left -= res;
@@ -2301,7 +2302,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 
 				err = kbase_mem_pool_grow(
 					&kctx->mem_pools.large[alloc->group_id],
-					1);
+					1, kctx->task);
 				if (err)
 					break;
 			} while (1);
@@ -2348,7 +2349,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 	if (nr_left) {
 		res = kbase_mem_pool_alloc_pages(
 			&kctx->mem_pools.small[alloc->group_id],
-			nr_left, tp, false);
+			nr_left, tp, false, kctx->task);
 		if (res <= 0)
 			goto alloc_failed;
 	}
@@ -3830,7 +3831,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 		spin_unlock(&kctx->mem_partials_lock);
 
 		kbase_gpu_vm_unlock(kctx);
-		ret = kbase_mem_pool_grow(pool, pool_delta);
+		ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
 		kbase_gpu_vm_lock(kctx);
 
 		if (ret)
@@ -4655,10 +4656,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	struct page **pages;
 	struct tagged_addr *pa;
 	long i;
-	unsigned long address;
 	struct device *dev;
-	unsigned long offset;
-	unsigned long local_size;
 	unsigned long gwt_mask = ~0;
 
 	/* Calls to this function are inherently asynchronous, with respect to
@@ -4675,12 +4673,28 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 
 	alloc = reg->gpu_alloc;
 	pa = kbase_get_gpu_phy_pages(reg);
-	address = alloc->imported.user_buf.address;
 	pinned_pages = alloc->nents;
 	pages = alloc->imported.user_buf.pages;
 	dev = kctx->kbdev->dev;
-	offset = address & ~PAGE_MASK;
-	local_size = alloc->imported.user_buf.size;
+
+       /* Manual CPU cache synchronization.
+        *
+        * The driver disables automatic CPU cache synchronization because the
+        * memory pages that enclose the imported region may also contain
+        * sub-regions which are not imported and that are allocated and used
+        * by the user process. This may be the case of memory at the beginning
+        * of the first page and at the end of the last page. Automatic CPU cache
+        * synchronization would force some operations on those memory allocations,
+        * unbeknown to the user process: in particular, a CPU cache invalidate
+        * upon unmapping would destroy the content of dirty CPU caches and cause
+        * the user process to lose CPU writes to the non-imported sub-regions.
+        *
+        * When the GPU claims ownership of the imported memory buffer, it shall
+        * commit CPU writes for the whole of all pages that enclose the imported
+        * region, otherwise the initial content of memory would be wrong.
+        */
+
+
 
 	/* The user buffer could already have been previously pinned before
 	 * entering this function, and hence there could potentially be CPU
@@ -4689,21 +4703,21 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	kbase_mem_shrink_cpu_mapping(kctx, reg, 0, pinned_pages);
 
 	for (i = 0; i < pinned_pages; i++) {
+		
 		dma_addr_t dma_addr;
-		unsigned long min;
+                #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                        dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                #else
+                        dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+                                             DMA_ATTR_SKIP_CPU_SYNC);
+                #endif
 
-		min = MIN(PAGE_SIZE - offset, local_size);
-		dma_addr = dma_map_page(dev, pages[i],
-				offset, min,
-				DMA_BIDIRECTIONAL);
 		if (dma_mapping_error(dev, dma_addr))
 			goto unwind;
 
 		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
 		pa[i] = as_tagged(page_to_phys(pages[i]));
 
-		local_size -= min;
-		offset = 0;
 	}
 
 #ifdef CONFIG_MALI_CINSTR_GWT
@@ -4721,10 +4735,26 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	/* fall down */
 unwind:
 	alloc->nents = 0;
+
+       /* Run the unmap loop in the same order as map loop, and perform again
+        * CPU cache synchronization to re-write the content of dirty CPU caches
+        * to memory. This is precautionary measure in case a GPU job has taken
+        * advantage of a partially GPU-mapped range to write and corrupt the
+        * content of memory, either inside or outside the imported region.
+        *
+        * Notice that this error recovery path doesn't try to be optimal and just
+        * flushes the entire page range.
+        */
+
 	while (i--) {
-		dma_unmap_page(kctx->kbdev->dev,
-				alloc->imported.user_buf.dma_addrs[i],
-				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+                dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                        dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                #else
+                        dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+                #endif
 	}
 
 	while (++i < pinned_pages) {
@@ -4744,6 +4774,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 {
 	long i;
 	struct page **pages;
+        unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
 	unsigned long size = alloc->imported.user_buf.size;
 	lockdep_assert_held(&kctx->reg_lock);
 
@@ -4757,12 +4788,94 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 #endif
 
 	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
-		unsigned long local_size;
+                unsigned long imported_size = MIN(size, PAGE_SIZE - offset_within_page);
+               /* Notice: this is a temporary variable that is used for DMA sync
+                * operations, and that could be incremented by an offset if the
+                * current page contains both imported and non-imported memory
+                * sub-regions.
+                *
+                * It is valid to add an offset to this value, because the offset
+                * is always kept within the physically contiguous dma-mapped range
+                * and there's no need to translate to physical address to offset it.
+                *
+                * This variable is not going to be used for the actual DMA unmap
+                * operation, that shall always use the original DMA address of the
+                * whole memory page.
+                */
+
+
 		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
 
-		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
-		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+               /* Manual CPU cache synchronization.
+                *
+                * When the GPU returns ownership of the buffer to the CPU, the driver
+                * needs to treat imported and non-imported memory differently.
+                *
+                * The first case to consider is non-imported sub-regions at the
+                * beginning of the first page and at the end of last page. For these
+                * sub-regions: CPU cache shall be committed with a clean+invalidate,
+                * in order to keep the last CPU write.
+                *
+                * Imported region prefers the opposite treatment: this memory has been
+                * legitimately mapped and used by the GPU, hence GPU writes shall be
+                * committed to memory, while CPU cache shall be invalidated to make
+                * sure that CPU reads the correct memory content.
+                *
+                * The following diagram shows the expect value of the variables
+                * used in this loop in the corner case of an imported region encloed
+                * by a single memory page:
+                *
+                * page boundary ->|---------- | <- dma_addr (initial value)
+                *                 |           |
+                *                 | - - - - - | <- offset_within_page
+                *                 |XXXXXXXXXXX|\
+                *                 |XXXXXXXXXXX| \
+                *                 |XXXXXXXXXXX|  }- imported_size
+                *                 |XXXXXXXXXXX| /
+                *                 |XXXXXXXXXXX|/
+                *                 | - - - - - | <- offset_within_page + imported_size
+                *                 |           |\
+                *                 |           | }- PAGE_SIZE - imported_size - offset_within_page
+                *                 |           |/
+                * page boundary ->|-----------|
+                *
+                * If the imported region is enclosed by more than one page, then
+                * offset_within_page = 0 for any page after the first.
+                */
+               /* Only for first page: handle non-imported range at the beginning. */
+               if (offset_within_page > 0) {
+                       dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+                                                  DMA_BIDIRECTIONAL);
+                       dma_addr += offset_within_page;
+               }
+
+               /* For every page: handle imported range. */
+               if (imported_size > 0)
+                       dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+                                               DMA_BIDIRECTIONAL);
+
+               /* Only for last page (that may coincide with first page):
+                * handle non-imported range at the end.
+                */
+               if ((imported_size + offset_within_page) < PAGE_SIZE) {
+                       dma_addr += imported_size;
+                       dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+                                                  PAGE_SIZE - imported_size - offset_within_page,
+                                                  DMA_BIDIRECTIONAL);
+               }
+
+               /* Notice: use the original DMA address to unmap the whole memory page. */
+                #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                        dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
 				DMA_BIDIRECTIONAL);
+                #else
+                        dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+                                    PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+                #endif
+
+
+
+
 		if (writeable)
 			set_page_dirty_lock(pages[i]);
 #if !MALI_USE_CSF
@@ -4770,7 +4883,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 		pages[i] = NULL;
 #endif
 
-		size -= local_size;
+		size -= imported_size;
 	}
 #if !MALI_USE_CSF
 	alloc->nents = 0;

diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h
index 8fcea14..a208550 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem.h

@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -889,7 +889,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
  * @pages:    Pointer to array where the physical address of the allocated
  *            pages will be stored.
  * @partial_allowed: If fewer pages allocated is allowed
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the pages are being allocated. It can be NULL if the pages
+ *              won't be associated with any Kbase context.
  * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
  *
  * Return:
@@ -905,7 +907,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
  * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
  */
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-		struct tagged_addr *pages, bool partial_allowed);
+		struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner);
 
 /**
  * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
@@ -1017,13 +1019,15 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
  * kbase_mem_pool_grow - Grow the pool
  * @pool:       Memory pool to grow
  * @nr_to_grow: Number of pages to add to the pool
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the memory pool is being grown. It can be NULL if the pages
+ *              to be allocated won't be associated with any Kbase context.
  * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
  * become larger than the maximum size specified.
  *
  * Returns: 0 on success, -ENOMEM if unable to allocate sufficent pages
  */
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,struct task_struct *page_owner);
 
 /**
  * kbase_mem_pool_trim - Grow or shrink the pool to a new size
@@ -2120,12 +2124,26 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
 	return &kctx->reg_zone[KBASE_REG_ZONE_IDX(zone_bits)];
 }
 
+/*
+ * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ */
+static inline void kbase_mem_mmgrab(void)
+{
+        /* This merely takes a reference on the memory descriptor structure
+        * i.e. mm_struct of current process and not on its address space and
+        * so won't block the freeing of address space on process exit.
+        */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+        atomic_inc(&current->mm->mm_count);
+#else
+        mmgrab(current->mm);
+#endif
+}
 /**
  * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
  * @kctx: Pointer to kbase context
  *
- * Don't allow the allocation of GPU memory until user space has set up the
- * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
  * from the forked child process using the mali device file fd inherited from
  * the parent process.
  *
@@ -2133,12 +2151,6 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
  */
 static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
 {
-	bool allow_alloc = true;
-
-	rcu_read_lock();
-	allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm);
-	rcu_read_unlock();
-
-	return allow_alloc;
+        return (kctx->process_mm == current->mm);
 }
 #endif				/* _KBASE_MEM_H_ */

diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c
index f28c556..d82bc01 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_linux.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -39,6 +39,7 @@
 #include <linux/cache.h>
 #include <linux/memory_group_manager.h>
 
+#include <linux/version.h>
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
 #include <tl/mali_kbase_tracepoints.h>
@@ -1527,6 +1528,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
 	struct kbase_alloc_import_user_buf *user_buf;
 	struct page **pages = NULL;
+        struct tagged_addr *pa;
+        struct device *dev;
 	int write;
 
 	/* Flag supported only for dma-buf imported memory */
@@ -1667,31 +1670,48 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 	reg->gpu_alloc->nents = 0;
 	reg->extension = 0;
 
+        pa = kbase_get_gpu_phy_pages(reg);
+        dev = kctx->kbdev->dev;
+
 	if (pages) {
-		struct device *dev = kctx->kbdev->dev;
-		unsigned long local_size = user_buf->size;
-		unsigned long offset = user_buf->address & ~PAGE_MASK;
-		struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
 
 		/* Top bit signifies that this was pinned on import */
 		user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
 
+               /* Manual CPU cache synchronization.
+                *
+                * The driver disables automatic CPU cache synchronization because the
+                * memory pages that enclose the imported region may also contain
+                * sub-regions which are not imported and that are allocated and used
+                * by the user process. This may be the case of memory at the beginning
+                * of the first page and at the end of the last page. Automatic CPU cache
+                * synchronization would force some operations on those memory allocations,
+                * unbeknown to the user process: in particular, a CPU cache invalidate
+                * upon unmapping would destroy the content of dirty CPU caches and cause
+                * the user process to lose CPU writes to the non-imported sub-regions.
+                *
+                * When the GPU claims ownership of the imported memory buffer, it shall
+                * commit CPU writes for the whole of all pages that enclose the imported
+                * region, otherwise the initial content of memory would be wrong.
+                */
+
 		for (i = 0; i < faulted_pages; i++) {
 			dma_addr_t dma_addr;
-			unsigned long min;
+                        #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                                dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                        #else
+                                dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+                                                     DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+                        #endif
 
-			min = MIN(PAGE_SIZE - offset, local_size);
-			dma_addr = dma_map_page(dev, pages[i],
-					offset, min,
-					DMA_BIDIRECTIONAL);
 			if (dma_mapping_error(dev, dma_addr))
 				goto unwind_dma_map;
 
 			user_buf->dma_addrs[i] = dma_addr;
 			pa[i] = as_tagged(page_to_phys(pages[i]));
 
-			local_size -= min;
-			offset = 0;
+			dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+			
 		}
 
 		reg->gpu_alloc->nents = faulted_pages;
@@ -1700,10 +1720,22 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 	return reg;
 
 unwind_dma_map:
+
+       /* Run the unmap loop in the same order as map loop, and perform again
+        * CPU cache synchronization to re-write the content of dirty CPU caches
+        * to memory. This precautionary measure is kept here to keep this code
+        * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+        * in the future.
+        */
 	while (i--) {
-		dma_unmap_page(kctx->kbdev->dev,
-				user_buf->dma_addrs[i],
-				PAGE_SIZE, DMA_BIDIRECTIONAL);
+        dma_addr_t dma_addr = user_buf->dma_addrs[i];
+               dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+        #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+               dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+        #else
+               dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+        #endif
 	}
 fault_mismatch:
 	if (pages) {
@@ -1723,7 +1755,6 @@ KERNEL_VERSION(4, 5, 0) > LINUX_VERSION_CODE
 no_region:
 bad_size:
 	return NULL;
-
 }
 
 
@@ -1999,7 +2030,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
 		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
 	}
-
+	if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+		dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+		goto bad_flags;
+	}
 	if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
 		dev_warn(kctx->kbdev->dev,
 				"padding is only supported for UMM");
@@ -3147,79 +3181,27 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
 
 void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
 {
-	struct mm_struct *mm;
-
-	rcu_read_lock();
-	mm = rcu_dereference(kctx->process_mm);
-	if (mm) {
-		atomic_add(pages, &kctx->nonmapped_pages);
+        struct mm_struct *mm = kctx->process_mm;
+        if (unlikely(!mm))
+                return;
+        atomic_add(pages, &kctx->nonmapped_pages);
 #ifdef SPLIT_RSS_COUNTING
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
-		spin_lock(&mm->page_table_lock);
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-		spin_unlock(&mm->page_table_lock);
-#endif
-	}
-	rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
-	int pages;
-	struct mm_struct *mm;
-
-	spin_lock(&kctx->mm_update_lock);
-	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
-	if (!mm) {
-		spin_unlock(&kctx->mm_update_lock);
-		return;
-	}
-
-	rcu_assign_pointer(kctx->process_mm, NULL);
-	spin_unlock(&kctx->mm_update_lock);
-	synchronize_rcu();
-
-	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
-#ifdef SPLIT_RSS_COUNTING
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 #else
 	spin_lock(&mm->page_table_lock);
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 	spin_unlock(&mm->page_table_lock);
 #endif
 }
 
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
-	struct kbase_context *kctx;
-
-	kctx = vma->vm_private_data;
-	kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
-	.close = kbase_special_vm_close,
-};
-
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
 {
-	/* check that this is the only tracking page */
-	spin_lock(&kctx->mm_update_lock);
-	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
-		spin_unlock(&kctx->mm_update_lock);
-		return -EFAULT;
-	}
-
-	rcu_assign_pointer(kctx->process_mm, current->mm);
-
-	spin_unlock(&kctx->mm_update_lock);
+        if (vma_pages(vma) != 1)
+                return -EINVAL;
 
 	/* no real access */
 	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
-	vma->vm_ops = &kbase_vm_special_ops;
-	vma->vm_private_data = kctx;
 
 	return 0;
 }

diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c
index a11da82..1889e20 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_mem_pool.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,11 @@
 #include <linux/shrinker.h>
 #include <linux/atomic.h>
 #include <linux/version.h>
-
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
 #define pool_dbg(pool, format, ...) \
 	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
 		(pool->next_pool) ? "kctx" : "kbdev",	\
@@ -37,6 +41,46 @@
 
 #define NOT_DIRTY false
 #define NOT_RECLAIMED false
+/**
+* can_alloc_page() - Check if the current thread can allocate a physical page
+*
+* @pool:                Pointer to the memory pool.
+* @page_owner:          Pointer to the task/process that created the Kbase context
+*                       for which a page needs to be allocated. It can be NULL if
+*                       the page won't be associated with Kbase context.
+* @alloc_from_kthread:  Flag indicating that the current thread is a kernel thread.
+*
+* This function checks if the current thread is a kernel thread and can make a
+* request to kernel to allocate a physical page. If the kernel thread is allocating
+* a page for the Kbase context and the process that created the context is exiting
+* or is being killed, then there is no point in doing a page allocation.
+*
+* The check done by the function is particularly helpful when the system is running
+* low on memory. When a page is allocated from the context of a kernel thread, OoM
+* killer doesn't consider the kernel thread for killing and kernel keeps retrying
+* to allocate the page as long as the OoM killer is able to kill processes.
+* The check allows kernel thread to quickly exit the page allocation loop once OoM
+* killer has initiated the killing of @page_owner, thereby unblocking the context
+* termination for @page_owner and freeing of GPU memory allocated by it. This helps
+* in preventing the kernel panic and also limits the number of innocent processes
+* that get killed.
+*
+* Return: true if the page can be allocated otherwise false.
+*/
+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner,
+                                 const bool alloc_from_kthread)
+{
+       if (likely(!alloc_from_kthread || !page_owner))
+               return true;
+
+       if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) {
+               dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm,
+                        task_pid_nr(page_owner));
+               return false;
+       }
+
+       return true;
+}
 
 static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
 {
@@ -232,11 +276,11 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
 }
 
 int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
-		size_t nr_to_grow)
+		size_t nr_to_grow, struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t i;
-
+        const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
 	kbase_mem_pool_lock(pool);
 
 	pool->dont_reclaim = true;
@@ -249,6 +293,8 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
 			return -ENOMEM;
 		}
 		kbase_mem_pool_unlock(pool);
+                if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+                        return -ENOMEM;
 
 		p = kbase_mem_alloc_page(pool);
 		if (!p) {
@@ -281,7 +327,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
 	if (new_size < cur_size)
 		kbase_mem_pool_shrink(pool, cur_size - new_size);
 	else if (new_size > cur_size)
-		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+		err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL);
 
 	if (err) {
 		size_t grown_size = kbase_mem_pool_size(pool);
@@ -527,13 +573,14 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
 }
 
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-		struct tagged_addr *pages, bool partial_allowed)
+		struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t nr_from_pool;
 	size_t i = 0;
 	int err = -ENOMEM;
 	size_t nr_pages_internal;
+        const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
 
 	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
 
@@ -565,7 +612,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	if (i != nr_4k_pages && pool->next_pool) {
 		/* Allocate via next pool */
 		err = kbase_mem_pool_alloc_pages(pool->next_pool,
-				nr_4k_pages - i, pages + i, partial_allowed);
+				nr_4k_pages - i, pages + i, partial_allowed,page_owner);
 
 		if (err < 0)
 			goto err_rollback;
@@ -574,6 +621,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	} else {
 		/* Get any remaining pages from kernel */
 		while (i != nr_4k_pages) {
+                       if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+                               goto err_rollback;
+
 			p = kbase_mem_alloc_page(pool);
 			if (!p) {
 				if (partial_allowed)

diff --git a/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c b/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c
index 3ca5a70..be23c8f 100644
--- a/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/bv_r32p1/mali_kbase_softjobs.c

@@ -981,6 +981,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 			jit_info_copy_size_for_jit_version[kctx->jit_version];
 	WARN_ON(jit_info_user_copy_size > sizeof(*info));
 
+	if (!kbase_mem_allow_alloc(kctx)) {
+		dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+			current->comm, current->pid, kctx->tgid, kctx->id);
+		ret = -EINVAL;
+		goto fail;
+	}
+
 	/* For backwards compatibility, and to prevent reading more than 1 jit
 	 * info struct on jit version 1
 	 */

diff --git a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c
index 05253ae..067b5dc 100644
--- a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c
+++ b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_csf.c

@@ -148,17 +148,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 					"true" : "false";
 	int as_no = as->number;
 	unsigned long flags;
+        const uintptr_t fault_addr = fault->addr;
 
 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
-		"GPU bus fault in AS%d at VA 0x%016llX\n"
+                "GPU bus fault in AS%d at VA %pK\n"
 		"VA_VALID: %s\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
 		"access type 0x%X: %s\n"
 		"source id 0x%X\n"
 		"pid: %d\n",
-		as_no, fault->addr,
+                as_no, (void *)fault_addr,
 		addr_valid,
 		status,
 		exception_type, kbase_gpu_exception_name(exception_type),

diff --git a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c
index 6d2c6e2..ebe35bc 100644
--- a/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/bv_r32p1/mmu/backend/mali_kbase_mmu_jm.c

@@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 	u32 const exception_data = (status >> 8) & 0xFFFFFF;
 	int const as_no = as->number;
 	unsigned long flags;
+	const uintptr_t fault_addr = fault->addr;
 
 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
-		"GPU bus fault in AS%d at PA 0x%016llX\n"
+		"GPU bus fault in AS%d at PA %pK\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
 		"exception data 0x%X\n"
 		"pid: %d\n",
-		as_no, fault->addr,
+		as_no, (void *)fault_addr,
 		status,
 		exception_type, kbase_gpu_exception_name(exception_type),
 		exception_data,

diff --git a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c
index b3a7fcc..68eda00 100644
--- a/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/bv_r32p1/mmu/mali_kbase_mmu.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1315,6 +1315,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 		kbase_gpu_vm_unlock(kctx);
 	} else {
 		int ret = -ENOMEM;
+		const u8 group_id = region->gpu_alloc->group_id;
 
 		kbase_gpu_vm_unlock(kctx);
 
@@ -1326,23 +1327,21 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 			if (grow_2mb_pool) {
 				/* Round page requirement up to nearest 2 MB */
 				struct kbase_mem_pool *const lp_mem_pool =
-					&kctx->mem_pools.large[
-					region->gpu_alloc->group_id];
+					&kctx->mem_pools.large[group_id];
 
 				pages_to_grow = (pages_to_grow +
 					((1 << lp_mem_pool->order) - 1))
 						>> lp_mem_pool->order;
 
 				ret = kbase_mem_pool_grow(lp_mem_pool,
-					pages_to_grow);
+					pages_to_grow, kctx->task);
 			} else {
 #endif
 				struct kbase_mem_pool *const mem_pool =
-					&kctx->mem_pools.small[
-					region->gpu_alloc->group_id];
+					&kctx->mem_pools.small[group_id];
 
 				ret = kbase_mem_pool_grow(mem_pool,
-					pages_to_grow);
+					pages_to_grow, kctx->task);
 #ifdef CONFIG_MALI_2MB_ALLOC
 			}
 #endif
@@ -1756,7 +1755,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 				&kbdev->mem_pools.small[
 #endif
 					kctx->mmu.group_id],
-				MIDGARD_MMU_BOTTOMLEVEL);
+				MIDGARD_MMU_BOTTOMLEVEL,kctx->task);
 			mutex_lock(&kctx->mmu.mmu_lock);
 		} while (!err);
 		if (err) {
@@ -1950,7 +1949,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev,
 #else
 				&kbdev->mem_pools.small[mmut->group_id],
 #endif
-				cur_level);
+				cur_level,mmut->kctx ? mmut->kctx->task : NULL);
 			mutex_lock(&mmut->mmu_lock);
 		} while (!err);
 
@@ -2690,7 +2689,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
 #else
 			&kbdev->mem_pools.small[mmut->group_id],
 #endif
-			MIDGARD_MMU_BOTTOMLEVEL);
+			MIDGARD_MMU_BOTTOMLEVEL,kctx ? kctx->task : NULL);
 		if (err) {
 			kbase_mmu_term(kbdev, mmut);
 			return -ENOMEM;

diff --git a/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c b/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c
index 5fc1636..f67dddd 100644
--- a/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c
+++ b/drivers/gpu/arm/bv_r38p1/context/mali_kbase_context.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2019-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,12 @@
 /*
  * Base kernel context APIs
  */
+#include <linux/version.h>
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/task.h>
+#else
+#include <linux/sched.h>
+#endif
 
 #include <mali_kbase.h>
 #include <gpu/mali_kbase_gpu_regmap.h>
@@ -176,13 +182,50 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	/* creating a context is considered a disjoint event */
 	kbase_disjoint_event(kctx->kbdev);
 
-	spin_lock_init(&kctx->mm_update_lock);
 	kctx->process_mm = NULL;
+	kctx->task = NULL;
 	atomic_set(&kctx->nonmapped_pages, 0);
 	atomic_set(&kctx->permanent_mapped_pages, 0);
 	kctx->tgid = current->tgid;
 	kctx->pid = current->pid;
 
+	/* Check if this is a Userspace created context */
+	if (likely(kctx->filp)) {
+		struct pid *pid_struct;
+
+		rcu_read_lock();
+		pid_struct = find_get_pid(kctx->tgid);
+		if (likely(pid_struct)) {
+			struct task_struct *task = pid_task(pid_struct, PIDTYPE_PID);
+
+			if (likely(task)) {
+				/* Take a reference on the task to avoid slow lookup
+				 * later on from the page allocation loop.
+				 */
+				get_task_struct(task);
+				kctx->task = task;
+			} else {
+				dev_err(kctx->kbdev->dev,
+					"Failed to get task pointer for %s/%d",
+					current->comm, current->pid);
+				err = -ESRCH;
+			}
+
+			put_pid(pid_struct);
+		} else {
+			dev_err(kctx->kbdev->dev,
+				"Failed to get pid pointer for %s/%d",
+				current->comm, current->pid);
+			err = -ESRCH;
+		}
+		rcu_read_unlock();
+
+		if (unlikely(err))
+			return err;
+                kbase_mem_mmgrab();
+                kctx->process_mm = current->mm;
+	}
+
 	atomic_set(&kctx->used_pages, 0);
 
 	mutex_init(&kctx->reg_lock);
@@ -213,13 +256,16 @@ int kbase_context_common_init(struct kbase_context *kctx)
 	kctx->id = atomic_add_return(1, &(kctx->kbdev->ctx_num)) - 1;
 
 	mutex_lock(&kctx->kbdev->kctx_list_lock);
-
 	err = kbase_insert_kctx_to_process(kctx);
-	if (err)
-		dev_err(kctx->kbdev->dev,
-		"(err:%d) failed to insert kctx to kbase_process\n", err);
-
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
+	if (err) {
+		dev_err(kctx->kbdev->dev,
+			"(err:%d) failed to insert kctx to kbase_process", err);
+		if (likely(kctx->filp)) {
+                        mmdrop(kctx->process_mm);
+                        put_task_struct(kctx->task);
+                }
+        }
 
 	return err;
 }
@@ -307,6 +353,11 @@ void kbase_context_common_term(struct kbase_context *kctx)
 	kbase_remove_kctx_from_process(kctx);
 	mutex_unlock(&kctx->kbdev->kctx_list_lock);
 
+	if (likely(kctx->filp)) {
+                mmdrop(kctx->process_mm);
+                put_task_struct(kctx->task);
+        }
+
 	KBASE_KTRACE_ADD(kctx->kbdev, CORE_CTX_DESTROY, kctx, 0u);
 }
 

diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c
index 0a70ae7..5f63cbf9 100644
--- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c
+++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf.c

@@ -345,7 +345,7 @@ int kbase_csf_alloc_command_stream_user_pages(struct kbase_context *kctx,
 
 	ret = kbase_mem_pool_alloc_pages(
 				&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_IO],
-				num_pages, queue->phys, false);
+				num_pages, queue->phys, false, kctx->task);
 
 	if (ret != num_pages)
 		goto phys_alloc_failed;
@@ -1126,7 +1126,7 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
 	/* Get physical page for a normal suspend buffer */
 	err = kbase_mem_pool_alloc_pages(
 			&kctx->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-			nr_pages, &s_buf->phy[0], false);
+			nr_pages, &s_buf->phy[0], false, kctx->task);
 
 	if (err < 0)
 		goto phy_pages_alloc_failed;
@@ -3025,7 +3025,7 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
 
 	ret = kbase_mem_pool_alloc_pages(
 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-		1, &phys, false);
+		1, &phys, false, NULL);
 
 	if (ret <= 0) {
 		fput(filp);
@@ -3061,7 +3061,7 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev)
 
 	ret = kbase_mem_pool_alloc_pages(
 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], 1, &phys,
-		false);
+		false, NULL);
 
 	if (ret <= 0)
 		return ret;

diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c
index 74df40c..e840d3b 100644
--- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c
+++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -627,7 +627,7 @@ static int parse_memory_setup_entry(struct kbase_device *kbdev,
 			ret = kbase_mem_pool_alloc_pages(
 				kbase_mem_pool_group_select(
 					kbdev, KBASE_MEM_GROUP_CSF_FW, is_small_page),
-				num_pages_aligned, phys, false);
+				num_pages_aligned, phys, false, NULL);
 		}
 	}
 
@@ -2653,7 +2653,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mem_pool_alloc_pages(
 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-		num_pages, phys, false);
+		num_pages, phys, false, NULL);
 	if (ret <= 0)
 		goto phys_mem_pool_alloc_error;
 

diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c
index c716122..353339a 100644
--- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_firmware_no_mali.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2018-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1485,7 +1485,7 @@ int kbase_csf_firmware_mcu_shared_mapping_init(
 
 	ret = kbase_mem_pool_alloc_pages(
 		&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
-		num_pages, phys, false);
+		num_pages, phys, false, NULL);
 	if (ret <= 0)
 		goto phys_mem_pool_alloc_error;
 

diff --git a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c
index 2e0c26d..9115a69 100644
--- a/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c
+++ b/drivers/gpu/arm/bv_r38p1/csf/mali_kbase_csf_kcpu.c

@@ -356,6 +356,14 @@ static int kbase_kcpu_jit_allocate_prepare(
 
 	lockdep_assert_held(&kctx->csf.kcpu_queues.lock);
 
+	if (!kbase_mem_allow_alloc(kctx)) {
+		dev_dbg(kctx->kbdev->dev,
+			"Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+			current->comm, current->pid, kctx->tgid, kctx->id);
+		ret = -EINVAL;
+		goto out;
+	}
+
 	if (!data || count > kcpu_queue->kctx->jit_max_allocations ||
 			count > ARRAY_SIZE(kctx->jit_alloc)) {
 		ret = -EINVAL;

diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h b/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h
index b62ead4..852e5e4 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_defs.h

@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2011-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1599,11 +1599,13 @@ struct kbase_sub_alloc {
  *                        is scheduled in and an atom is pulled from the context's per
  *                        slot runnable tree in JM GPU or GPU command queue
  *                        group is programmed on CSG slot in CSF GPU.
- * @mm_update_lock:       lock used for handling of special tracking page.
  * @process_mm:           Pointer to the memory descriptor of the process which
  *                        created the context. Used for accounting the physical
  *                        pages used for GPU allocations, done for the context,
- *                        to the memory consumed by the process.
+ *                        to the memory consumed by the process. A reference is taken
+ *                        on this descriptor for the Userspace created contexts so that
+ *                        Kbase can safely access it to update the memory usage counters.
+ *                        The reference is dropped on context termination.
  * @gpu_va_end:           End address of the GPU va space (in 4KB page units)
  * @running_total_tiler_heap_nr_chunks: Running total of number of chunks in all
  *                        tiler heaps of the kbase context.
@@ -1731,7 +1733,10 @@ struct kbase_sub_alloc {
  * @limited_core_mask:    The mask that is applied to the affinity in case of atoms
  *                        marked with BASE_JD_REQ_LIMITED_CORE_MASK.
  * @platform_data:        Pointer to platform specific per-context data.
- *
+ *  @task:                 Pointer to the task structure of the main thread of the process
+ *                        that created the Kbase context. It would be set only for the
+ *                        contexts created by the Userspace and not for the contexts
+ *                        created internally by the Kbase.*
  * A kernel base context is an entity among which the GPU is scheduled.
  * Each context has its own GPU address space.
  * Up to one context can be created for each client that opens the device file
@@ -1824,8 +1829,7 @@ struct kbase_context {
 
 	atomic_t refcount;
 
-	spinlock_t         mm_update_lock;
-	struct mm_struct __rcu *process_mm;
+        struct mm_struct *process_mm;
 	u64 gpu_va_end;
 #if MALI_USE_CSF
 	u32 running_total_tiler_heap_nr_chunks;
@@ -1889,6 +1893,8 @@ struct kbase_context {
 #if !MALI_USE_CSF
 	void *platform_data;
 #endif
+
+       struct task_struct *task;
 };
 
 #ifdef CONFIG_MALI_CINSTR_GWT

diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c
index b02f1d8..2ed908e 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -2050,6 +2050,7 @@ void kbase_sync_single(struct kbase_context *kctx,
 			src = ((unsigned char *)kmap(gpu_page)) + offset;
 			dst = ((unsigned char *)kmap(cpu_page)) + offset;
 		}
+
 		memcpy(dst, src, size);
 		kunmap(gpu_page);
 		kunmap(cpu_page);
@@ -2487,7 +2488,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 			&kctx->mem_pools.large[alloc->group_id],
 			 nr_lp * (SZ_2M / SZ_4K),
 			 tp,
-			 true);
+			 true, kctx->task);
 
 		if (res > 0) {
 			nr_left -= res;
@@ -2541,7 +2542,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 
 				err = kbase_mem_pool_grow(
 					&kctx->mem_pools.large[alloc->group_id],
-					1);
+					1, kctx->task);
 				if (err)
 					break;
 			} while (1);
@@ -2588,7 +2589,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc,
 	if (nr_left) {
 		res = kbase_mem_pool_alloc_pages(
 			&kctx->mem_pools.small[alloc->group_id],
-			nr_left, tp, false);
+			nr_left, tp, false, kctx->task);
 		if (res <= 0)
 			goto alloc_failed;
 	}
@@ -4077,7 +4078,7 @@ static int kbase_jit_grow(struct kbase_context *kctx,
 		spin_unlock(&kctx->mem_partials_lock);
 
 		kbase_gpu_vm_unlock(kctx);
-		ret = kbase_mem_pool_grow(pool, pool_delta);
+		ret = kbase_mem_pool_grow(pool, pool_delta, kctx->task);
 		kbase_gpu_vm_lock(kctx);
 
 		if (ret)
@@ -4885,10 +4886,7 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	struct page **pages;
 	struct tagged_addr *pa;
 	long i;
-	unsigned long address;
 	struct device *dev;
-	unsigned long offset;
-	unsigned long local_size;
 	unsigned long gwt_mask = ~0;
 
 	/* Calls to this function are inherently asynchronous, with respect to
@@ -4905,21 +4903,37 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 
 	alloc = reg->gpu_alloc;
 	pa = kbase_get_gpu_phy_pages(reg);
-	address = alloc->imported.user_buf.address;
 	pinned_pages = alloc->nents;
 	pages = alloc->imported.user_buf.pages;
 	dev = kctx->kbdev->dev;
-	offset = address & ~PAGE_MASK;
-	local_size = alloc->imported.user_buf.size;
+
+       /* Manual CPU cache synchronization.
+        *
+        * The driver disables automatic CPU cache synchronization because the
+        * memory pages that enclose the imported region may also contain
+        * sub-regions which are not imported and that are allocated and used
+        * by the user process. This may be the case of memory at the beginning
+        * of the first page and at the end of the last page. Automatic CPU cache
+        * synchronization would force some operations on those memory allocations,
+        * unbeknown to the user process: in particular, a CPU cache invalidate
+        * upon unmapping would destroy the content of dirty CPU caches and cause
+        * the user process to lose CPU writes to the non-imported sub-regions.
+        *
+        * When the GPU claims ownership of the imported memory buffer, it shall
+        * commit CPU writes for the whole of all pages that enclose the imported
+        * region, otherwise the initial content of memory would be wrong.
+        */
+
 
 	for (i = 0; i < pinned_pages; i++) {
 		dma_addr_t dma_addr;
-		unsigned long min;
+                #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                        dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                #else
+                        dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL,
+                                             DMA_ATTR_SKIP_CPU_SYNC);
+                #endif
 
-		min = MIN(PAGE_SIZE - offset, local_size);
-		dma_addr = dma_map_page(dev, pages[i],
-				offset, min,
-				DMA_BIDIRECTIONAL);
 		err = dma_mapping_error(dev, dma_addr);
 		if (err)
 			goto unwind;
@@ -4927,8 +4941,6 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 		alloc->imported.user_buf.dma_addrs[i] = dma_addr;
 		pa[i] = as_tagged(page_to_phys(pages[i]));
 
-		local_size -= min;
-		offset = 0;
 	}
 
 #ifdef CONFIG_MALI_CINSTR_GWT
@@ -4946,10 +4958,26 @@ static int kbase_jd_user_buf_map(struct kbase_context *kctx,
 	/* fall down */
 unwind:
 	alloc->nents = 0;
+
+       /* Run the unmap loop in the same order as map loop, and perform again
+        * CPU cache synchronization to re-write the content of dirty CPU caches
+        * to memory. This is precautionary measure in case a GPU job has taken
+        * advantage of a partially GPU-mapped range to write and corrupt the
+        * content of memory, either inside or outside the imported region.
+        *
+        * Notice that this error recovery path doesn't try to be optimal and just
+        * flushes the entire page range.
+        */
+
 	while (i--) {
-		dma_unmap_page(kctx->kbdev->dev,
-				alloc->imported.user_buf.dma_addrs[i],
-				PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
+                dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                        dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                #else
+                        dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+                #endif
 	}
 
 	/* The user buffer could already have been previously pinned before
@@ -4975,6 +5003,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 {
 	long i;
 	struct page **pages;
+        unsigned long offset_within_page = alloc->imported.user_buf.address & ~PAGE_MASK;
 	unsigned long size = alloc->imported.user_buf.size;
 	lockdep_assert_held(&kctx->reg_lock);
 
@@ -4988,12 +5017,94 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 #endif
 
 	for (i = 0; i < alloc->imported.user_buf.nr_pages; i++) {
-		unsigned long local_size;
+                unsigned long imported_size = MIN(size, PAGE_SIZE - offset_within_page);
+               /* Notice: this is a temporary variable that is used for DMA sync
+                * operations, and that could be incremented by an offset if the
+                * current page contains both imported and non-imported memory
+                * sub-regions.
+                *
+                * It is valid to add an offset to this value, because the offset
+                * is always kept within the physically contiguous dma-mapped range
+                * and there's no need to translate to physical address to offset it.
+                *
+                * This variable is not going to be used for the actual DMA unmap
+                * operation, that shall always use the original DMA address of the
+                * whole memory page.
+                */
+
+
 		dma_addr_t dma_addr = alloc->imported.user_buf.dma_addrs[i];
 
-		local_size = MIN(size, PAGE_SIZE - (dma_addr & ~PAGE_MASK));
-		dma_unmap_page(kctx->kbdev->dev, dma_addr, local_size,
+               /* Manual CPU cache synchronization.
+                *
+                * When the GPU returns ownership of the buffer to the CPU, the driver
+                * needs to treat imported and non-imported memory differently.
+                *
+                * The first case to consider is non-imported sub-regions at the
+                * beginning of the first page and at the end of last page. For these
+                * sub-regions: CPU cache shall be committed with a clean+invalidate,
+                * in order to keep the last CPU write.
+                *
+                * Imported region prefers the opposite treatment: this memory has been
+                * legitimately mapped and used by the GPU, hence GPU writes shall be
+                * committed to memory, while CPU cache shall be invalidated to make
+                * sure that CPU reads the correct memory content.
+                *
+                * The following diagram shows the expect value of the variables
+                * used in this loop in the corner case of an imported region encloed
+                * by a single memory page:
+                *
+                * page boundary ->|---------- | <- dma_addr (initial value)
+                *                 |           |
+                *                 | - - - - - | <- offset_within_page
+                *                 |XXXXXXXXXXX|\
+                *                 |XXXXXXXXXXX| \
+                *                 |XXXXXXXXXXX|  }- imported_size
+                *                 |XXXXXXXXXXX| /
+                *                 |XXXXXXXXXXX|/
+                *                 | - - - - - | <- offset_within_page + imported_size
+                *                 |           |\
+                *                 |           | }- PAGE_SIZE - imported_size - offset_within_page
+                *                 |           |/
+                * page boundary ->|-----------|
+                *
+                * If the imported region is enclosed by more than one page, then
+                * offset_within_page = 0 for any page after the first.
+                */
+               /* Only for first page: handle non-imported range at the beginning. */
+               if (offset_within_page > 0) {
+                       dma_sync_single_for_device(kctx->kbdev->dev, dma_addr, offset_within_page,
+                                                  DMA_BIDIRECTIONAL);
+                       dma_addr += offset_within_page;
+               }
+
+               /* For every page: handle imported range. */
+               if (imported_size > 0)
+                       dma_sync_single_for_cpu(kctx->kbdev->dev, dma_addr, imported_size,
+                                               DMA_BIDIRECTIONAL);
+
+               /* Only for last page (that may coincide with first page):
+                * handle non-imported range at the end.
+                */
+               if ((imported_size + offset_within_page) < PAGE_SIZE) {
+                       dma_addr += imported_size;
+                       dma_sync_single_for_device(kctx->kbdev->dev, dma_addr,
+                                                  PAGE_SIZE - imported_size - offset_within_page,
+                                                  DMA_BIDIRECTIONAL);
+               }
+
+               /* Notice: use the original DMA address to unmap the whole memory page. */
+                #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                        dma_unmap_page(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i], PAGE_SIZE,
 				DMA_BIDIRECTIONAL);
+                #else
+                        dma_unmap_page_attrs(kctx->kbdev->dev, alloc->imported.user_buf.dma_addrs[i],
+                                    PAGE_SIZE, DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+                #endif
+
+
+
+
 		if (writeable)
 			set_page_dirty_lock(pages[i]);
 #if !MALI_USE_CSF
@@ -5001,7 +5112,7 @@ static void kbase_jd_user_buf_unmap(struct kbase_context *kctx, struct kbase_mem
 		pages[i] = NULL;
 #endif
 
-		size -= local_size;
+		size -= imported_size;
 	}
 #if !MALI_USE_CSF
 	alloc->nents = 0;

diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h
index 8a95154..a9a0d7e 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem.h

@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -947,7 +947,9 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
  * @pages:    Pointer to array where the physical address of the allocated
  *            pages will be stored.
  * @partial_allowed: If fewer pages allocated is allowed
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the pages are being allocated. It can be NULL if the pages
+ *              won't be associated with any Kbase context.
  * Like kbase_mem_pool_alloc() but optimized for allocating many pages.
  *
  * Return:
@@ -963,7 +965,7 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
  * this lock, it should use kbase_mem_pool_alloc_pages_locked() instead.
  */
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-		struct tagged_addr *pages, bool partial_allowed);
+		struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner);
 
 /**
  * kbase_mem_pool_alloc_pages_locked - Allocate pages from memory pool
@@ -1075,13 +1077,15 @@ void kbase_mem_pool_set_max_size(struct kbase_mem_pool *pool, size_t max_size);
  * kbase_mem_pool_grow - Grow the pool
  * @pool:       Memory pool to grow
  * @nr_to_grow: Number of pages to add to the pool
- *
+ * @page_owner: Pointer to the task that created the Kbase context for which
+ *              the memory pool is being grown. It can be NULL if the pages
+ *              to be allocated won't be associated with any Kbase context.
  * Adds @nr_to_grow pages to the pool. Note that this may cause the pool to
  * become larger than the maximum size specified.
  *
  * Return: 0 on success, -ENOMEM if unable to allocate sufficent pages
  */
-int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow);
+int kbase_mem_pool_grow(struct kbase_mem_pool *pool, size_t nr_to_grow,struct task_struct *page_owner);
 
 /**
  * kbase_mem_pool_trim - Grow or shrink the pool to a new size
@@ -2203,8 +2207,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
  * kbase_mem_allow_alloc - Check if allocation of GPU memory is allowed
  * @kctx: Pointer to kbase context
  *
- * Don't allow the allocation of GPU memory until user space has set up the
- * tracking page (which sets kctx->process_mm) or if the ioctl has been issued
+ * Don't allow the allocation of GPU memory if the ioctl has been issued
  * from the forked child process using the mali device file fd inherited from
  * the parent process.
  *
@@ -2212,13 +2215,7 @@ kbase_ctx_reg_zone_get(struct kbase_context *kctx, unsigned long zone_bits)
  */
 static inline bool kbase_mem_allow_alloc(struct kbase_context *kctx)
 {
-	bool allow_alloc = true;
-
-	rcu_read_lock();
-	allow_alloc = (rcu_dereference(kctx->process_mm) == current->mm);
-	rcu_read_unlock();
-
-	return allow_alloc;
+        return (kctx->process_mm == current->mm);
 }
 
 /**
@@ -2237,6 +2234,22 @@ static inline int kbase_mem_group_id_get(base_mem_alloc_flags flags)
 }
 
 /**
+ *  * kbase_mem_mmgrab - Wrapper function to take reference on mm_struct of current process
+ *   */
+static inline void kbase_mem_mmgrab(void)
+{
+        /* This merely takes a reference on the memory descriptor structure
+ *          * i.e. mm_struct of current process and not on its address space and
+ *                   * so won't block the freeing of address space on process exit.
+ *                            */
+#if KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE
+        atomic_inc(&current->mm->mm_count);
+#else
+        mmgrab(current->mm);
+#endif
+}
+
+/**
  * kbase_mem_group_id_set - Set group ID into base_mem_alloc_flags
  * @id: group ID(0~15) you want to encode
  *

diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c
index 45345b7..f46e2b5 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_linux.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -36,6 +36,7 @@
 #include <linux/cache.h>
 #include <linux/memory_group_manager.h>
 
+#include <linux/version.h>
 #include <mali_kbase.h>
 #include <mali_kbase_mem_linux.h>
 #include <tl/mali_kbase_tracepoints.h>
@@ -1556,6 +1557,8 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	u32 cache_line_alignment = kbase_get_cache_line_alignment(kctx->kbdev);
 	struct kbase_alloc_import_user_buf *user_buf;
 	struct page **pages = NULL;
+        struct tagged_addr *pa;
+        struct device *dev;
 	int write;
 
 	/* Flag supported only for dma-buf imported memory */
@@ -1697,31 +1700,48 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	reg->gpu_alloc->nents = 0;
 	reg->extension = 0;
 
+        pa = kbase_get_gpu_phy_pages(reg);
+        dev = kctx->kbdev->dev;
+
 	if (pages) {
-		struct device *dev = kctx->kbdev->dev;
-		unsigned long local_size = user_buf->size;
-		unsigned long offset = user_buf->address & ~PAGE_MASK;
-		struct tagged_addr *pa = kbase_get_gpu_phy_pages(reg);
 
 		/* Top bit signifies that this was pinned on import */
 		user_buf->current_mapping_usage_count |= PINNED_ON_IMPORT;
 
+               /* Manual CPU cache synchronization.
+                *
+                * The driver disables automatic CPU cache synchronization because the
+                * memory pages that enclose the imported region may also contain
+                * sub-regions which are not imported and that are allocated and used
+                * by the user process. This may be the case of memory at the beginning
+                * of the first page and at the end of the last page. Automatic CPU cache
+                * synchronization would force some operations on those memory allocations,
+                * unbeknown to the user process: in particular, a CPU cache invalidate
+                * upon unmapping would destroy the content of dirty CPU caches and cause
+                * the user process to lose CPU writes to the non-imported sub-regions.
+                *
+                * When the GPU claims ownership of the imported memory buffer, it shall
+                * commit CPU writes for the whole of all pages that enclose the imported
+                * region, otherwise the initial content of memory would be wrong.
+                */
+
 		for (i = 0; i < faulted_pages; i++) {
 			dma_addr_t dma_addr;
-			unsigned long min;
+                        #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+                                dma_addr = dma_map_page(dev, pages[i], 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+                        #else
+                                dma_addr = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+                                                     DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+                        #endif
 
-			min = MIN(PAGE_SIZE - offset, local_size);
-			dma_addr = dma_map_page(dev, pages[i],
-					offset, min,
-					DMA_BIDIRECTIONAL);
 			if (dma_mapping_error(dev, dma_addr))
 				goto unwind_dma_map;
 
 			user_buf->dma_addrs[i] = dma_addr;
 			pa[i] = as_tagged(page_to_phys(pages[i]));
 
-			local_size -= min;
-			offset = 0;
+			dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+			
 		}
 
 		reg->gpu_alloc->nents = faulted_pages;
@@ -1730,10 +1750,22 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 	return reg;
 
 unwind_dma_map:
+
+       /* Run the unmap loop in the same order as map loop, and perform again
+        * CPU cache synchronization to re-write the content of dirty CPU caches
+        * to memory. This precautionary measure is kept here to keep this code
+        * aligned with kbase_jd_user_buf_map() to allow for a potential refactor
+        * in the future.
+        */
 	while (i--) {
-		dma_unmap_page(kctx->kbdev->dev,
-				user_buf->dma_addrs[i],
-				PAGE_SIZE, DMA_BIDIRECTIONAL);
+        dma_addr_t dma_addr = user_buf->dma_addrs[i];
+               dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+        #if (KERNEL_VERSION(4, 10, 0) > LINUX_VERSION_CODE)
+               dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+        #else
+               dma_unmap_page_attrs(dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL,
+                                    DMA_ATTR_SKIP_CPU_SYNC);
+        #endif
 	}
 fault_mismatch:
 	if (pages) {
@@ -1749,7 +1781,6 @@ static struct kbase_va_region *kbase_mem_from_user_buffer(
 no_region:
 bad_size:
 	return NULL;
-
 }
 
 
@@ -2029,7 +2060,10 @@ int kbase_mem_import(struct kbase_context *kctx, enum base_mem_import_type type,
 		/* Remove COHERENT_SYSTEM flag if coherent mem is unavailable */
 		*flags &= ~BASE_MEM_COHERENT_SYSTEM;
 	}
-
+	if (((*flags & BASE_MEM_CACHED_CPU) == 0) && (type == BASE_MEM_IMPORT_TYPE_USER_BUFFER)) {
+		dev_warn(kctx->kbdev->dev, "USER_BUFFER must be CPU cached");
+		goto bad_flags;
+	}
 	if ((padding != 0) && (type != BASE_MEM_IMPORT_TYPE_UMM)) {
 		dev_warn(kctx->kbdev->dev,
 				"padding is only supported for UMM");
@@ -3190,79 +3224,27 @@ static void kbasep_add_mm_counter(struct mm_struct *mm, int member, long value)
 
 void kbasep_os_process_page_usage_update(struct kbase_context *kctx, int pages)
 {
-	struct mm_struct *mm;
-
-	rcu_read_lock();
-	mm = rcu_dereference(kctx->process_mm);
-	if (mm) {
-		atomic_add(pages, &kctx->nonmapped_pages);
+        struct mm_struct *mm = kctx->process_mm;
+        if (unlikely(!mm))
+                return;
+        atomic_add(pages, &kctx->nonmapped_pages);
 #ifdef SPLIT_RSS_COUNTING
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-#else
-		spin_lock(&mm->page_table_lock);
-		kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
-		spin_unlock(&mm->page_table_lock);
-#endif
-	}
-	rcu_read_unlock();
-}
-
-static void kbasep_os_process_page_usage_drain(struct kbase_context *kctx)
-{
-	int pages;
-	struct mm_struct *mm;
-
-	spin_lock(&kctx->mm_update_lock);
-	mm = rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock));
-	if (!mm) {
-		spin_unlock(&kctx->mm_update_lock);
-		return;
-	}
-
-	rcu_assign_pointer(kctx->process_mm, NULL);
-	spin_unlock(&kctx->mm_update_lock);
-	synchronize_rcu();
-
-	pages = atomic_xchg(&kctx->nonmapped_pages, 0);
-#ifdef SPLIT_RSS_COUNTING
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 #else
 	spin_lock(&mm->page_table_lock);
-	kbasep_add_mm_counter(mm, MM_FILEPAGES, -pages);
+	kbasep_add_mm_counter(mm, MM_FILEPAGES, pages);
 	spin_unlock(&mm->page_table_lock);
 #endif
 }
 
-static void kbase_special_vm_close(struct vm_area_struct *vma)
-{
-	struct kbase_context *kctx;
-
-	kctx = vma->vm_private_data;
-	kbasep_os_process_page_usage_drain(kctx);
-}
-
-static const struct vm_operations_struct kbase_vm_special_ops = {
-	.close = kbase_special_vm_close,
-};
-
 static int kbase_tracking_page_setup(struct kbase_context *kctx, struct vm_area_struct *vma)
 {
-	/* check that this is the only tracking page */
-	spin_lock(&kctx->mm_update_lock);
-	if (rcu_dereference_protected(kctx->process_mm, lockdep_is_held(&kctx->mm_update_lock))) {
-		spin_unlock(&kctx->mm_update_lock);
-		return -EFAULT;
-	}
-
-	rcu_assign_pointer(kctx->process_mm, current->mm);
-
-	spin_unlock(&kctx->mm_update_lock);
+        if (vma_pages(vma) != 1)
+                return -EINVAL;
 
 	/* no real access */
 	vma->vm_flags &= ~(VM_READ | VM_MAYREAD | VM_WRITE | VM_MAYWRITE | VM_EXEC | VM_MAYEXEC);
 	vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
-	vma->vm_ops = &kbase_vm_special_ops;
-	vma->vm_private_data = kctx;
 
 	return 0;
 }

diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c
index 4103bd1..1889e20 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_mem_pool.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2015-2021 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -27,7 +27,11 @@
 #include <linux/shrinker.h>
 #include <linux/atomic.h>
 #include <linux/version.h>
-
+#if KERNEL_VERSION(4, 11, 0) <= LINUX_VERSION_CODE
+#include <linux/sched/signal.h>
+#else
+#include <linux/signal.h>
+#endif
 #define pool_dbg(pool, format, ...) \
 	dev_dbg(pool->kbdev->dev, "%s-pool [%zu/%zu]: " format,	\
 		(pool->next_pool) ? "kctx" : "kbdev",	\
@@ -37,6 +41,46 @@
 
 #define NOT_DIRTY false
 #define NOT_RECLAIMED false
+/**
+* can_alloc_page() - Check if the current thread can allocate a physical page
+*
+* @pool:                Pointer to the memory pool.
+* @page_owner:          Pointer to the task/process that created the Kbase context
+*                       for which a page needs to be allocated. It can be NULL if
+*                       the page won't be associated with Kbase context.
+* @alloc_from_kthread:  Flag indicating that the current thread is a kernel thread.
+*
+* This function checks if the current thread is a kernel thread and can make a
+* request to kernel to allocate a physical page. If the kernel thread is allocating
+* a page for the Kbase context and the process that created the context is exiting
+* or is being killed, then there is no point in doing a page allocation.
+*
+* The check done by the function is particularly helpful when the system is running
+* low on memory. When a page is allocated from the context of a kernel thread, OoM
+* killer doesn't consider the kernel thread for killing and kernel keeps retrying
+* to allocate the page as long as the OoM killer is able to kill processes.
+* The check allows kernel thread to quickly exit the page allocation loop once OoM
+* killer has initiated the killing of @page_owner, thereby unblocking the context
+* termination for @page_owner and freeing of GPU memory allocated by it. This helps
+* in preventing the kernel panic and also limits the number of innocent processes
+* that get killed.
+*
+* Return: true if the page can be allocated otherwise false.
+*/
+static inline bool can_alloc_page(struct kbase_mem_pool *pool, struct task_struct *page_owner,
+                                 const bool alloc_from_kthread)
+{
+       if (likely(!alloc_from_kthread || !page_owner))
+               return true;
+
+       if ((page_owner->flags & PF_EXITING) || fatal_signal_pending(page_owner)) {
+               dev_info(pool->kbdev->dev, "%s : Process %s/%d exiting", __func__, page_owner->comm,
+                        task_pid_nr(page_owner));
+               return false;
+       }
+
+       return true;
+}
 
 static size_t kbase_mem_pool_capacity(struct kbase_mem_pool *pool)
 {
@@ -126,7 +170,6 @@ static void kbase_mem_pool_sync_page(struct kbase_mem_pool *pool,
 		struct page *p)
 {
 	struct device *dev = pool->kbdev->dev;
-
 	dma_sync_single_for_device(dev, kbase_dma_addr(p),
 			(PAGE_SIZE << pool->order), DMA_BIDIRECTIONAL);
 }
@@ -233,11 +276,11 @@ static size_t kbase_mem_pool_shrink(struct kbase_mem_pool *pool,
 }
 
 int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
-		size_t nr_to_grow)
+		size_t nr_to_grow, struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t i;
-
+        const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
 	kbase_mem_pool_lock(pool);
 
 	pool->dont_reclaim = true;
@@ -250,6 +293,8 @@ int kbase_mem_pool_grow(struct kbase_mem_pool *pool,
 			return -ENOMEM;
 		}
 		kbase_mem_pool_unlock(pool);
+                if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+                        return -ENOMEM;
 
 		p = kbase_mem_alloc_page(pool);
 		if (!p) {
@@ -282,7 +327,7 @@ void kbase_mem_pool_trim(struct kbase_mem_pool *pool, size_t new_size)
 	if (new_size < cur_size)
 		kbase_mem_pool_shrink(pool, cur_size - new_size);
 	else if (new_size > cur_size)
-		err = kbase_mem_pool_grow(pool, new_size - cur_size);
+		err = kbase_mem_pool_grow(pool, new_size - cur_size, NULL);
 
 	if (err) {
 		size_t grown_size = kbase_mem_pool_size(pool);
@@ -528,13 +573,14 @@ void kbase_mem_pool_free_locked(struct kbase_mem_pool *pool, struct page *p,
 }
 
 int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
-		struct tagged_addr *pages, bool partial_allowed)
+		struct tagged_addr *pages, bool partial_allowed, struct task_struct *page_owner)
 {
 	struct page *p;
 	size_t nr_from_pool;
 	size_t i = 0;
 	int err = -ENOMEM;
 	size_t nr_pages_internal;
+        const bool alloc_from_kthread = !!(current->flags & PF_KTHREAD);
 
 	nr_pages_internal = nr_4k_pages / (1u << (pool->order));
 
@@ -549,7 +595,6 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	nr_from_pool = min(nr_pages_internal, kbase_mem_pool_size(pool));
 	while (nr_from_pool--) {
 		int j;
-
 		p = kbase_mem_pool_remove_locked(pool);
 		if (pool->order) {
 			pages[i++] = as_tagged_tag(page_to_phys(p),
@@ -567,7 +612,7 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	if (i != nr_4k_pages && pool->next_pool) {
 		/* Allocate via next pool */
 		err = kbase_mem_pool_alloc_pages(pool->next_pool,
-				nr_4k_pages - i, pages + i, partial_allowed);
+				nr_4k_pages - i, pages + i, partial_allowed,page_owner);
 
 		if (err < 0)
 			goto err_rollback;
@@ -576,6 +621,9 @@ int kbase_mem_pool_alloc_pages(struct kbase_mem_pool *pool, size_t nr_4k_pages,
 	} else {
 		/* Get any remaining pages from kernel */
 		while (i != nr_4k_pages) {
+                       if (unlikely(!can_alloc_page(pool, page_owner, alloc_from_kthread)))
+                               goto err_rollback;
+
 			p = kbase_mem_alloc_page(pool);
 			if (!p) {
 				if (partial_allowed)

diff --git a/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c b/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c
index d58ed36..066a871 100644
--- a/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c
+++ b/drivers/gpu/arm/bv_r38p1/mali_kbase_softjobs.c

@@ -973,6 +973,13 @@ static int kbase_jit_allocate_prepare(struct kbase_jd_atom *katom)
 			jit_info_copy_size_for_jit_version[kctx->jit_version];
 	WARN_ON(jit_info_user_copy_size > sizeof(*info));
 
+	if (!kbase_mem_allow_alloc(kctx)) {
+		dev_dbg(kbdev->dev, "Invalid attempt to allocate JIT memory by %s/%d for ctx %d_%d",
+			current->comm, current->pid, kctx->tgid, kctx->id);
+		ret = -EINVAL;
+		goto fail;
+	}
+
 	/* For backwards compatibility, and to prevent reading more than 1 jit
 	 * info struct on jit version 1
 	 */

diff --git a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c
index 04f5cdf..6c52f0c 100644
--- a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c
+++ b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_csf.c

@@ -149,17 +149,18 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 					"true" : "false";
 	int as_no = as->number;
 	unsigned long flags;
+	const uintptr_t fault_addr = fault->addr;
 
 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
-		"GPU bus fault in AS%d at PA 0x%016llX\n"
+		"GPU bus fault in AS%d at PA %pK\n"
 		"PA_VALID: %s\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
 		"access type 0x%X: %s\n"
 		"source id 0x%X\n"
 		"pid: %d\n",
-		as_no, fault->addr,
+		as_no, (void *)fault_addr,
 		addr_valid,
 		status,
 		exception_type, kbase_gpu_exception_name(exception_type),

diff --git a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c
index 3130b33..2442149 100644
--- a/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c
+++ b/drivers/gpu/arm/bv_r38p1/mmu/backend/mali_kbase_mmu_jm.c

@@ -63,15 +63,16 @@ void kbase_gpu_report_bus_fault_and_kill(struct kbase_context *kctx,
 	u32 const exception_data = (status >> 8) & 0xFFFFFF;
 	int const as_no = as->number;
 	unsigned long flags;
+	const uintptr_t fault_addr = fault->addr;
 
 	/* terminal fault, print info about the fault */
 	dev_err(kbdev->dev,
-		"GPU bus fault in AS%d at PA 0x%016llX\n"
+		"GPU bus fault in AS%d at PA %pK\n"
 		"raw fault status: 0x%X\n"
 		"exception type 0x%X: %s\n"
 		"exception data 0x%X\n"
 		"pid: %d\n",
-		as_no, fault->addr,
+		as_no, (void *)fault_addr,
 		status,
 		exception_type, kbase_gpu_exception_name(exception_type),
 		exception_data,

diff --git a/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c b/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c
index cee88c8..f82f77d 100644
--- a/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c
+++ b/drivers/gpu/arm/bv_r38p1/mmu/mali_kbase_mmu.c

@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
 /*
  *
- * (C) COPYRIGHT 2010-2022 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
  *
  * This program is free software and is provided to you under the terms of the
  * GNU General Public License version 2 as published by the Free Software
@@ -1358,6 +1358,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 		kbase_gpu_vm_unlock(kctx);
 	} else {
 		int ret = -ENOMEM;
+		const u8 group_id = region->gpu_alloc->group_id;
 
 		kbase_gpu_vm_unlock(kctx);
 
@@ -1369,23 +1370,21 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
 			if (grow_2mb_pool) {
 				/* Round page requirement up to nearest 2 MB */
 				struct kbase_mem_pool *const lp_mem_pool =
-					&kctx->mem_pools.large[
-					region->gpu_alloc->group_id];
+					&kctx->mem_pools.large[group_id];
 
 				pages_to_grow = (pages_to_grow +
 					((1 << lp_mem_pool->order) - 1))
 						>> lp_mem_pool->order;
 
 				ret = kbase_mem_pool_grow(lp_mem_pool,
-					pages_to_grow);
+					pages_to_grow, kctx->task);
 			} else {
 #endif
 				struct kbase_mem_pool *const mem_pool =
-					&kctx->mem_pools.small[
-					region->gpu_alloc->group_id];
+					&kctx->mem_pools.small[group_id];
 
 				ret = kbase_mem_pool_grow(mem_pool,
-					pages_to_grow);
+					pages_to_grow, kctx->task);
 #ifdef CONFIG_MALI_2MB_ALLOC
 			}
 #endif
@@ -1795,7 +1794,7 @@ int kbase_mmu_insert_single_page(struct kbase_context *kctx, u64 vpfn,
 			err = kbase_mem_pool_grow(
 				&kbdev->mem_pools.small[
 					kctx->mmu.group_id],
-				MIDGARD_MMU_BOTTOMLEVEL);
+				MIDGARD_MMU_BOTTOMLEVEL,kctx->task);
 			mutex_lock(&kctx->mmu.mmu_lock);
 		} while (!err);
 		if (err) {
@@ -1956,7 +1955,7 @@ int kbase_mmu_insert_pages_no_flush(struct kbase_device *kbdev, struct kbase_mmu
 			mutex_unlock(&mmut->mmu_lock);
 			err = kbase_mem_pool_grow(
 				&kbdev->mem_pools.small[mmut->group_id],
-				cur_level);
+				cur_level,mmut->kctx ? mmut->kctx->task : NULL);
 			mutex_lock(&mmut->mmu_lock);
 		} while (!err);
 
@@ -2733,7 +2732,7 @@ int kbase_mmu_init(struct kbase_device *const kbdev,
 
 		err = kbase_mem_pool_grow(
 			&kbdev->mem_pools.small[mmut->group_id],
-			MIDGARD_MMU_BOTTOMLEVEL);
+			MIDGARD_MMU_BOTTOMLEVEL,kctx ? kctx->task : NULL);
 		if (err) {
 			kbase_mmu_term(kbdev, mmut);
 			return -ENOMEM;
commit	bb39658de25405d37fa7470107509d653c7764d4	[log] [tgz]
author	Fede2782 <78815152+Fede2782@users.noreply.github.com>	Thu May 02 18:48:47 2024 +0200
committer	Tim Zimmermann <tim@linux4.de>	Sun Jun 02 06:20:43 2024 +0200
tree	ea752fa8f8af3b7c5f22f5660e43c51a5f508606
parent	9674844564b8579ca58717e23128044238546df2 [diff]