summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVamsidhar reddy Gaddam <gvamsi@google.com>2024-04-10 17:14:26 +0000
committerVamsidhar reddy Gaddam <gvamsi@google.com>2024-04-10 17:14:26 +0000
commit157548df5743bfca8bba1bbd148453a7b7fabac1 (patch)
treea3efc8e2e4be8806e340670bb67af927c137c51f
parent656ed97ecba51a1656d1f1deb5b0659ebf073a59 (diff)
parenta999cd8fd398aed7390c8e5d99795e9b735d6ba7 (diff)
downloadgpu-android-gs-raviole-5.10-android15-beta.tar.gz
Bring in R48P0 KMD update along. Bug: 323316466 Test: go/pixel-gpu-kmd-r48p0 Change-Id: I32484fe3e881ec9ec60cfeac27befc84d15b7904 Signed-off-by: Vamsidhar reddy Gaddam <gvamsi@google.com>
-rw-r--r--common/include/linux/mali_hw_access.h38
-rw-r--r--common/include/linux/memory_group_manager.h4
-rw-r--r--common/include/linux/version_compat_defs.h41
-rw-r--r--common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h2
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h45
-rw-r--r--common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h33
-rw-r--r--common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h2
-rw-r--r--common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h24
-rw-r--r--common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h6
-rw-r--r--common/include/uapi/gpu/arm/midgard/mali_base_kernel.h7
-rw-r--r--mali_kbase/Kbuild2
-rw-r--r--mali_kbase/Kconfig11
-rw-r--r--mali_kbase/Makefile5
-rw-r--r--mali_kbase/Mconfig40
-rw-r--r--mali_kbase/backend/gpu/Kbuild9
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_devfreq.c9
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_irq_internal.h2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_jm_rb.c2
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_dummy.c56
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_model_linux.h14
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_backend.c58
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_pm_driver.c3
-rw-r--r--mali_kbase/backend/gpu/mali_kbase_time.c14
-rw-r--r--mali_kbase/build.bp12
-rw-r--r--mali_kbase/context/backend/mali_kbase_context_csf.c6
-rw-r--r--mali_kbase/csf/mali_kbase_csf.c261
-rw-r--r--mali_kbase/csf/mali_kbase_csf.h34
-rw-r--r--mali_kbase/csf/mali_kbase_csf_defs.h110
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.c18
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware.h3
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_cfg.c4
-rw-r--r--mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c13
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.c182
-rw-r--r--mali_kbase/csf/mali_kbase_csf_kcpu.h31
-rw-r--r--mali_kbase/csf/mali_kbase_csf_registers.h24
-rw-r--r--mali_kbase/csf/mali_kbase_csf_reset_gpu.c14
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.c368
-rw-r--r--mali_kbase/csf/mali_kbase_csf_scheduler.h45
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c34
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h4
-rw-r--r--mali_kbase/csf/mali_kbase_csf_tl_reader.c37
-rw-r--r--mali_kbase/csf/mali_kbase_csf_trace_buffer.c8
-rw-r--r--mali_kbase/csf/mali_kbase_csf_trace_buffer.h9
-rw-r--r--mali_kbase/csf/mali_kbase_csf_util.c2
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_csf.c4
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_hw_csf.c5
-rw-r--r--mali_kbase/device/backend/mali_kbase_device_jm.c4
-rw-r--r--mali_kbase/device/mali_kbase_device.h3
-rw-r--r--mali_kbase/device/mali_kbase_device_hw.c4
-rw-r--r--mali_kbase/hw_access/backend/mali_kbase_hw_access_real_hw.c21
-rw-r--r--mali_kbase/hw_access/mali_kbase_hw_access_regmap.h10
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c101
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h26
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h4
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c14
-rw-r--r--mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c16
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c19
-rw-r--r--mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h28
-rw-r--r--mali_kbase/mali_base_hwconfig_features.h2
-rw-r--r--mali_kbase/mali_base_hwconfig_issues.h26
-rw-r--r--mali_kbase/mali_kbase_caps.h22
-rw-r--r--mali_kbase/mali_kbase_config_defaults.h15
-rw-r--r--mali_kbase/mali_kbase_core_linux.c279
-rw-r--r--mali_kbase/mali_kbase_defs.h11
-rw-r--r--mali_kbase/mali_kbase_fence.h31
-rw-r--r--mali_kbase/mali_kbase_gpuprops.c18
-rw-r--r--mali_kbase/mali_kbase_gwt.c13
-rw-r--r--mali_kbase/mali_kbase_hw.c6
-rw-r--r--mali_kbase/mali_kbase_hwaccess_pm.h10
-rw-r--r--mali_kbase/mali_kbase_linux.h7
-rw-r--r--mali_kbase/mali_kbase_mem.c41
-rw-r--r--mali_kbase/mali_kbase_mem.h13
-rw-r--r--mali_kbase/mali_kbase_mem_linux.c64
-rw-r--r--mali_kbase/mali_kbase_mem_linux.h2
-rw-r--r--mali_kbase/mali_kbase_mem_migrate.c30
-rw-r--r--mali_kbase/mali_kbase_mem_migrate.h2
-rw-r--r--mali_kbase/mali_kbase_mem_pool.c31
-rw-r--r--mali_kbase/mali_kbase_native_mgm.c43
-rw-r--r--mali_kbase/mali_kbase_pbha.c6
-rw-r--r--mali_kbase/mali_kbase_pbha_debugfs.c6
-rw-r--r--mali_kbase/mali_kbase_pm.c11
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu.c92
-rw-r--r--mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c2
-rw-r--r--mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c2
-rw-r--r--mali_kbase/tests/include/kutf/kutf_kprobe.h4
-rw-r--r--mali_kbase/thirdparty/mali_kbase_mmap.c191
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.c2
-rw-r--r--mali_kbase/tl/mali_kbase_tracepoints.h2
88 files changed, 1902 insertions, 987 deletions
diff --git a/common/include/linux/mali_hw_access.h b/common/include/linux/mali_hw_access.h
new file mode 100644
index 0000000..ca73036
--- /dev/null
+++ b/common/include/linux/mali_hw_access.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ *
+ * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ *
+ * This program is free software and is provided to you under the terms of the
+ * GNU General Public License version 2 as published by the Free Software
+ * Foundation, and any use by you of this program is subject to the terms
+ * of such GNU license.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, you can access it online at
+ * http://www.gnu.org/licenses/gpl-2.0.html.
+ *
+ */
+
+#ifndef _MALI_HW_ACCESS_H_
+#define _MALI_HW_ACCESS_H_
+
+#include <asm/arch_timer.h>
+#include <linux/io.h>
+
+
+#define mali_readl(addr) readl(addr)
+#define mali_readq(addr) readq(addr)
+#define mali_writel(val, addr) writel(val, addr)
+#define mali_writeq(val, addr) writeq(val, addr)
+#define mali_ioremap(addr, size) ioremap(addr, size)
+#define mali_iounmap(addr) iounmap(addr)
+#define mali_arch_timer_get_cntfrq() arch_timer_get_cntfrq()
+
+
+#endif /* _MALI_HW_ACCESS_H_ */
diff --git a/common/include/linux/memory_group_manager.h b/common/include/linux/memory_group_manager.h
index e72ba40..56fc18a 100644
--- a/common/include/linux/memory_group_manager.h
+++ b/common/include/linux/memory_group_manager.h
@@ -32,6 +32,10 @@ typedef int vm_fault_t;
#define MEMORY_GROUP_MANAGER_NR_GROUPS (4)
+#define PTE_PBHA_SHIFT (59)
+#define PTE_PBHA_MASK ((uint64_t)0xf << PTE_PBHA_SHIFT)
+#define PTE_RES_BIT_MULTI_AS_SHIFT (63)
+
struct memory_group_manager_device;
struct memory_group_manager_import_data;
diff --git a/common/include/linux/version_compat_defs.h b/common/include/linux/version_compat_defs.h
index 366b50c..f8594a6 100644
--- a/common/include/linux/version_compat_defs.h
+++ b/common/include/linux/version_compat_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -348,4 +348,43 @@ static inline long kbase_pin_user_pages_remote(struct task_struct *tsk, struct m
#endif /* (KERNEL_VERSION(4, 11, 0) > LINUX_VERSION_CODE) */
+#if (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE)
+/* Null definition */
+#define ALLOW_ERROR_INJECTION(fname, err_type)
+#endif /* (KERNEL_VERSION(4, 16, 0) > LINUX_VERSION_CODE) */
+
+#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
+#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim)
+
+#elif ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \
+ !(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE)))
+#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) register_shrinker(reclaim, name)
+
+#else
+#define KBASE_REGISTER_SHRINKER(reclaim, name, priv_data) \
+ do { \
+ reclaim->private_data = priv_data; \
+ shrinker_register(reclaim); \
+ } while (0)
+
+#endif /* KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE */
+
+#if ((KERNEL_VERSION(6, 7, 0) > LINUX_VERSION_CODE) && \
+ !(defined(__ANDROID_COMMON_KERNEL__) && (KERNEL_VERSION(6, 6, 0) == LINUX_VERSION_CODE)))
+#define KBASE_UNREGISTER_SHRINKER(reclaim) unregister_shrinker(&reclaim)
+#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) container_of(s, type, var)
+#define DEFINE_KBASE_SHRINKER struct shrinker
+#define KBASE_INIT_RECLAIM(var, attr, name) (&((var)->attr))
+#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = (*reclaim))
+
+#else
+#define KBASE_UNREGISTER_SHRINKER(reclaim) shrinker_free(reclaim)
+#define KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, type, var) s->private_data
+#define DEFINE_KBASE_SHRINKER struct shrinker *
+#define KBASE_SHRINKER_ALLOC(name) shrinker_alloc(0, name)
+#define KBASE_INIT_RECLAIM(var, attr, name) (KBASE_SHRINKER_ALLOC(name))
+#define KBASE_SET_RECLAIM(var, attr, reclaim) ((var)->attr = reclaim)
+
+#endif
+
#endif /* _VERSION_COMPAT_DEFS_H_ */
diff --git a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
index 564f477..b80817f 100644
--- a/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
+++ b/common/include/uapi/gpu/arm/midgard/backend/gpu/mali_kbase_model_dummy.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
index 0fb8242..3fff8b2 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_base_csf_kernel.h
@@ -46,7 +46,11 @@
*/
#define BASE_MEM_CSF_EVENT ((base_mem_alloc_flags)1 << 19)
-#define BASE_MEM_RESERVED_BIT_20 ((base_mem_alloc_flags)1 << 20)
+/* Unused bit for CSF, only used in JM for BASE_MEM_TILER_ALIGN_TOP */
+#define BASE_MEM_UNUSED_BIT_20 ((base_mem_alloc_flags)1 << 20)
+
+/* Unused bit for CSF, only used in JM for BASE_MEM_FLAG_MAP_FIXED */
+#define BASE_MEM_UNUSED_BIT_27 ((base_mem_alloc_flags)1 << 27)
/* Must be FIXABLE memory: its GPU VA will be determined at a later point,
* at which time it will be at a fixed GPU VA.
@@ -62,9 +66,15 @@
*/
#define BASEP_MEM_FLAGS_KERNEL_ONLY (BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE)
-/* A mask of all currently reserved flags
- */
-#define BASE_MEM_FLAGS_RESERVED BASE_MEM_RESERVED_BIT_20
+/* A mask of all flags that should not be queried */
+#define BASE_MEM_DONT_QUERY \
+ (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | BASE_MEM_SAME_VA)
+
+/* A mask of all currently reserved flags */
+#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0)
+
+/* A mask of all bits that are not used by a flag on CSF */
+#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_20 | BASE_MEM_UNUSED_BIT_27)
/* Special base mem handles specific to CSF.
*/
@@ -474,7 +484,26 @@ struct base_gpu_queue_error_fatal_payload {
};
/**
- * enum base_gpu_queue_group_error_type - GPU Fatal error type.
+ * struct base_gpu_queue_error_fault_payload - Recoverable fault
+ * error information related to GPU command queue.
+ *
+ * @sideband: Additional information about this recoverable fault.
+ * @status: Recoverable fault information.
+ * This consists of exception type (least significant byte) and
+ * data (remaining bytes). One example of exception type is
+ * INSTR_INVALID_PC (0x50).
+ * @csi_index: Index of the CSF interface the queue is bound to.
+ * @padding: Padding to make multiple of 64bits
+ */
+struct base_gpu_queue_error_fault_payload {
+ __u64 sideband;
+ __u32 status;
+ __u8 csi_index;
+ __u8 padding[3];
+};
+
+/**
+ * enum base_gpu_queue_group_error_type - GPU error type.
*
* @BASE_GPU_QUEUE_GROUP_ERROR_FATAL: Fatal error associated with GPU
* command queue group.
@@ -484,7 +513,9 @@ struct base_gpu_queue_error_fatal_payload {
* progress timeout.
* @BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM: Fatal error due to running out
* of tiler heap memory.
- * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of fatal error types
+ * @BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT: Fault error reported for GPU
+ * command queue.
+ * @BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT: The number of GPU error types
*
* This type is used for &struct_base_gpu_queue_group_error.error_type.
*/
@@ -493,6 +524,7 @@ enum base_gpu_queue_group_error_type {
BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FATAL,
BASE_GPU_QUEUE_GROUP_ERROR_TIMEOUT,
BASE_GPU_QUEUE_GROUP_ERROR_TILER_HEAP_OOM,
+ BASE_GPU_QUEUE_GROUP_QUEUE_ERROR_FAULT,
BASE_GPU_QUEUE_GROUP_ERROR_FATAL_COUNT
};
@@ -512,6 +544,7 @@ struct base_gpu_queue_group_error {
union {
struct base_gpu_queue_group_error_fatal_payload fatal_group;
struct base_gpu_queue_error_fatal_payload fatal_queue;
+ struct base_gpu_queue_error_fault_payload fault_queue;
} payload;
};
diff --git a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
index 537c90d..c56e071 100644
--- a/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/csf/mali_kbase_csf_ioctl.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -95,15 +95,23 @@
* 1.22:
* - Add comp_pri_threshold and comp_pri_ratio attributes to
* kbase_ioctl_cs_queue_group_create.
+ * - Made the BASE_MEM_DONT_NEED memory flag queryable.
* 1.23:
* - Disallows changing the sharability on the GPU of imported dma-bufs to
* BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE.
* 1.24:
* - Implement full block state support for hardware counters.
+ * 1.25:
+ * - Add support for CS_FAULT reporting to userspace
+ * 1.26:
+ * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory
+ * flags queryable.
+ * 1.27:
+ * - Implement support for HWC block state availability.
*/
#define BASE_UK_VERSION_MAJOR 1
-#define BASE_UK_VERSION_MINOR 24
+#define BASE_UK_VERSION_MINOR 27
/**
* struct kbase_ioctl_version_check - Check version compatibility between
@@ -340,6 +348,8 @@ union kbase_ioctl_cs_queue_group_create_1_18 {
* @in.csi_handlers: Flags to signal that the application intends to use CSI
* exception handlers in some linear buffers to deal with
* the given exception types.
+ * @in.cs_fault_report_enable: Flag to indicate reporting of CS_FAULTs
+ * to userspace.
* @in.padding: Currently unused, must be zero
* @out: Output parameters
* @out.group_handle: Handle of a newly created queue group.
@@ -360,7 +370,8 @@ union kbase_ioctl_cs_queue_group_create {
/**
* @in.reserved: Reserved, currently unused, must be zero.
*/
- __u16 reserved;
+ __u8 reserved;
+ __u8 cs_fault_report_enable;
/**
* @in.dvs_buf: buffer for deferred vertex shader
*/
@@ -637,6 +648,22 @@ union kbase_ioctl_read_user_page {
#define KBASE_IOCTL_READ_USER_PAGE _IOWR(KBASE_IOCTL_TYPE, 60, union kbase_ioctl_read_user_page)
+/**
+ * struct kbase_ioctl_queue_group_clear_faults - Re-enable CS FAULT reporting for the GPU queues
+ *
+ * @addr: CPU VA to an array of GPU VAs of the buffers backing the queues
+ * @nr_queues: Number of queues in the array
+ * @padding: Padding to round up to a multiple of 8 bytes, must be zero
+ */
+struct kbase_ioctl_queue_group_clear_faults {
+ __u64 addr;
+ __u32 nr_queues;
+ __u8 padding[4];
+};
+
+#define KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS \
+ _IOW(KBASE_IOCTL_TYPE, 61, struct kbase_ioctl_queue_group_clear_faults)
+
/***************
* test ioctls *
***************/
diff --git a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
index d347854..d4d12ae 100644
--- a/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
+++ b/common/include/uapi/gpu/arm/midgard/gpu/mali_kbase_gpu_id.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
index 9478334..25563e4 100644
--- a/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_base_jm_kernel.h
@@ -30,15 +30,11 @@
* See base_mem_alloc_flags.
*/
-/* Used as BASE_MEM_FIXED in other backends */
-#define BASE_MEM_RESERVED_BIT_8 ((base_mem_alloc_flags)1 << 8)
+/* Unused bit for JM, only used in CSF for BASE_MEM_FIXED */
+#define BASE_MEM_UNUSED_BIT_8 ((base_mem_alloc_flags)1 << 8)
-/**
- * BASE_MEM_RESERVED_BIT_19 - Bit 19 is reserved.
- *
- * Do not remove, use the next unreserved bit for new flags
- */
-#define BASE_MEM_RESERVED_BIT_19 ((base_mem_alloc_flags)1 << 19)
+/* Unused bit for JM, only used in CSF for BASE_CSF_EVENT */
+#define BASE_MEM_UNUSED_BIT_19 ((base_mem_alloc_flags)1 << 19)
/**
* BASE_MEM_TILER_ALIGN_TOP - Memory starting from the end of the initial commit is aligned
@@ -64,9 +60,15 @@
(BASEP_MEM_PERMANENT_KERNEL_MAPPING | BASEP_MEM_NO_USER_FREE | BASE_MEM_FLAG_MAP_FIXED | \
BASEP_MEM_PERFORM_JIT_TRIM)
-/* A mask of all currently reserved flags
- */
-#define BASE_MEM_FLAGS_RESERVED (BASE_MEM_RESERVED_BIT_8 | BASE_MEM_RESERVED_BIT_19)
+/* A mask of all flags that should not be queried */
+#define BASE_MEM_DONT_QUERY \
+ (BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | BASE_MEM_SAME_VA)
+
+/* A mask of all currently reserved flags */
+#define BASE_MEM_FLAGS_RESERVED ((base_mem_alloc_flags)0)
+
+/* A mask of all bits that are not used by a flag on JM */
+#define BASE_MEM_FLAGS_UNUSED (BASE_MEM_UNUSED_BIT_8 | BASE_MEM_UNUSED_BIT_19)
/* Similar to BASE_MEM_TILER_ALIGN_TOP, memory starting from the end of the
* initial commit is aligned to 'extension' pages, where 'extension' must be a power
diff --git a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
index 2a7a06a..43e35a7 100644
--- a/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
+++ b/common/include/uapi/gpu/arm/midgard/jm/mali_kbase_jm_ioctl.h
@@ -149,15 +149,19 @@
* from the parent process.
* 11.40:
* - Remove KBASE_IOCTL_HWCNT_READER_SETUP and KBASE_HWCNT_READER_* ioctls.
+ * - Made the BASE_MEM_DONT_NEED memory flag queryable.
* 11.41:
* - Disallows changing the sharability on the GPU of imported dma-bufs to
* BASE_MEM_COHERENT_SYSTEM using KBASE_IOCTL_MEM_FLAGS_CHANGE.
* 11.42:
* - Implement full block state support for hardware counters.
+ * 11.43:
+ * - Made the BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP and BASE_MEM_KERNEL_SYNC memory
+ * flags queryable.
*/
#define BASE_UK_VERSION_MAJOR 11
-#define BASE_UK_VERSION_MINOR 42
+#define BASE_UK_VERSION_MINOR 43
/**
* struct kbase_ioctl_version_check - Check version compatibility between
diff --git a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
index 75223bf..3b89778 100644
--- a/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
+++ b/common/include/uapi/gpu/arm/midgard/mali_base_kernel.h
@@ -93,10 +93,9 @@ typedef __u32 base_mem_alloc_flags;
/* A mask of all the flags that can be returned via the base_mem_get_flags()
* interface.
*/
-#define BASE_MEM_FLAGS_QUERYABLE \
- (BASE_MEM_FLAGS_INPUT_MASK & \
- ~(BASE_MEM_SAME_VA | BASE_MEM_COHERENT_SYSTEM_REQUIRED | BASE_MEM_IMPORT_SHARED | \
- BASE_MEM_FLAGS_RESERVED | BASEP_MEM_FLAGS_KERNEL_ONLY))
+#define BASE_MEM_FLAGS_QUERYABLE \
+ (BASE_MEM_FLAGS_INPUT_MASK & ~(BASE_MEM_DONT_QUERY | BASE_MEM_FLAGS_RESERVED | \
+ BASE_MEM_FLAGS_UNUSED | BASEP_MEM_FLAGS_KERNEL_ONLY))
/**
* enum base_mem_import_type - Memory types supported by @a base_mem_import
diff --git a/mali_kbase/Kbuild b/mali_kbase/Kbuild
index 2775ef8..92b4976 100644
--- a/mali_kbase/Kbuild
+++ b/mali_kbase/Kbuild
@@ -72,7 +72,7 @@ endif
CONFIG_MALI_PLATFORM_NAME="pixel"
# Driver version string which is returned to userspace via an ioctl
-MALI_RELEASE_NAME ?= '"r47p0-01eac0"'
+MALI_RELEASE_NAME ?= '"r48p0-01eac0"'
# Set up defaults if not defined by build system
ifeq ($(CONFIG_MALI_DEBUG), y)
MALI_UNIT_TEST = 1
diff --git a/mali_kbase/Kconfig b/mali_kbase/Kconfig
index c49c49b..57e8e32 100644
--- a/mali_kbase/Kconfig
+++ b/mali_kbase/Kconfig
@@ -72,7 +72,6 @@ config MALI_NO_MALI_DEFAULT_GPU
help
This option sets the default GPU to identify as for No Mali builds.
-
endchoice
menu "Platform specific options"
@@ -217,16 +216,6 @@ config MALI_CORESTACK
If unsure, say N.
-comment "Platform options"
- depends on MALI_MIDGARD && MALI_EXPERT
-
-config MALI_ERROR_INJECT
- bool "Enable No Mali error injection"
- depends on MALI_MIDGARD && MALI_EXPERT && MALI_NO_MALI
- default n
- help
- Enables insertion of errors to test module failure and recovery mechanisms.
-
comment "Debug options"
depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/Makefile b/mali_kbase/Makefile
index 9259b89..1062c07 100644
--- a/mali_kbase/Makefile
+++ b/mali_kbase/Makefile
@@ -97,7 +97,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
else
# Prevent misuse when CONFIG_MALI_NO_MALI=n
CONFIG_MALI_REAL_HW = y
- CONFIG_MALI_ERROR_INJECT = n
endif
@@ -128,7 +127,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
CONFIG_MALI_JOB_DUMP = n
CONFIG_MALI_NO_MALI = n
CONFIG_MALI_REAL_HW = y
- CONFIG_MALI_ERROR_INJECT = n
CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED = n
CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE = n
CONFIG_MALI_PRFCNT_SET_SELECT_VIA_DEBUG_FS = n
@@ -189,7 +187,6 @@ ifeq ($(MALI_KCONFIG_EXT_PREFIX),)
CONFIG_MALI_PWRSOFT_765 \
CONFIG_MALI_JOB_DUMP \
CONFIG_MALI_NO_MALI \
- CONFIG_MALI_ERROR_INJECT \
CONFIG_MALI_HW_ERRATA_1485982_NOT_AFFECTED \
CONFIG_MALI_HW_ERRATA_1485982_USE_CLOCK_ALTERNATIVE \
CONFIG_MALI_PRFCNT_SET_PRIMARY \
@@ -296,6 +293,8 @@ CFLAGS_MODULE += -Wmissing-field-initializers
CFLAGS_MODULE += -Wno-type-limits
CFLAGS_MODULE += $(call cc-option, -Wmaybe-uninitialized)
CFLAGS_MODULE += $(call cc-option, -Wunused-macros)
+# The following ensures the stack frame does not get larger than a page
+CFLAGS_MODULE += -Wframe-larger-than=4096
KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN2
diff --git a/mali_kbase/Mconfig b/mali_kbase/Mconfig
index b3c5323..07bb50e 100644
--- a/mali_kbase/Mconfig
+++ b/mali_kbase/Mconfig
@@ -64,7 +64,6 @@ config MALI_NO_MALI
All calls to the simulated hardware will complete immediately as if the hardware
completed the task.
-
endchoice
@@ -206,45 +205,6 @@ config LARGE_PAGE_SUPPORT
If in doubt, say Y.
-choice
- prompt "Error injection level"
- depends on MALI_MIDGARD && MALI_EXPERT
- default MALI_ERROR_INJECT_NONE
- help
- Enables insertion of errors to test module failure and recovery mechanisms.
-
-config MALI_ERROR_INJECT_NONE
- bool "disabled"
- depends on MALI_MIDGARD && MALI_EXPERT
- help
- Error injection is disabled.
-
-config MALI_ERROR_INJECT_TRACK_LIST
- bool "error track list"
- depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
- help
- Errors to inject are pre-configured by the user.
-
-config MALI_ERROR_INJECT_RANDOM
- bool "random error injection"
- depends on MALI_MIDGARD && MALI_EXPERT && NO_MALI
- help
- Injected errors are random, rather than user-driven.
-
-endchoice
-
-config MALI_ERROR_INJECT_ON
- string
- depends on MALI_MIDGARD && MALI_EXPERT
- default "0" if MALI_ERROR_INJECT_NONE
- default "1" if MALI_ERROR_INJECT_TRACK_LIST
- default "2" if MALI_ERROR_INJECT_RANDOM
-
-config MALI_ERROR_INJECT
- bool
- depends on MALI_MIDGARD && MALI_EXPERT
- default y if !MALI_ERROR_INJECT_NONE
-
config MALI_DEBUG
bool "Enable debug build"
depends on MALI_MIDGARD && MALI_EXPERT
diff --git a/mali_kbase/backend/gpu/Kbuild b/mali_kbase/backend/gpu/Kbuild
index c37cc59..42b7edc 100644
--- a/mali_kbase/backend/gpu/Kbuild
+++ b/mali_kbase/backend/gpu/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
-# (C) COPYRIGHT 2014-2022 ARM Limited. All rights reserved.
+# (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
#
# This program is free software and is provided to you under the terms of the
# GNU General Public License version 2 as published by the Free Software
@@ -49,12 +49,7 @@ endif
mali_kbase-$(CONFIG_MALI_DEVFREQ) += \
backend/gpu/mali_kbase_devfreq.o
-ifneq ($(CONFIG_MALI_REAL_HW),y)
- mali_kbase-y += backend/gpu/mali_kbase_model_linux.o
-endif
+mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_linux.o
# NO_MALI Dummy model interface
mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_dummy.o
-# HW error simulation
-mali_kbase-$(CONFIG_MALI_NO_MALI) += backend/gpu/mali_kbase_model_error_generator.o
-
diff --git a/mali_kbase/backend/gpu/mali_kbase_devfreq.c b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
index 905d188..2c1feed 100644
--- a/mali_kbase/backend/gpu/mali_kbase_devfreq.c
+++ b/mali_kbase/backend/gpu/mali_kbase_devfreq.c
@@ -431,7 +431,7 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
err = of_property_read_u64(node, "opp-hz-real", real_freqs);
#endif
if (err < 0) {
- dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d\n",
+ dev_warn(kbdev->dev, "Failed to read opp-hz-real property with error %d",
err);
continue;
}
@@ -439,8 +439,8 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
err = of_property_read_u32_array(node, "opp-microvolt", opp_volts,
kbdev->nr_regulators);
if (err < 0) {
- dev_warn(kbdev->dev,
- "Failed to read opp-microvolt property with error %d\n", err);
+ dev_warn(kbdev->dev, "Failed to read opp-microvolt property with error %d",
+ err);
continue;
}
#endif
@@ -450,11 +450,12 @@ static int kbase_devfreq_init_core_mask_table(struct kbase_device *kbdev)
if (core_mask != shader_present && corestack_driver_control) {
dev_warn(
kbdev->dev,
- "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU\n",
+ "Ignoring OPP %llu - Dynamic Core Scaling not supported on this GPU",
opp_freq);
continue;
}
+
core_count_p = of_get_property(node, "opp-core-count", NULL);
if (core_count_p) {
u64 remaining_core_mask = kbdev->gpu_props.shader_present;
diff --git a/mali_kbase/backend/gpu/mali_kbase_irq_internal.h b/mali_kbase/backend/gpu/mali_kbase_irq_internal.h
index 4374793..4798df9 100644
--- a/mali_kbase/backend/gpu/mali_kbase_irq_internal.h
+++ b/mali_kbase/backend/gpu/mali_kbase_irq_internal.h
@@ -74,7 +74,7 @@ void kbase_synchronize_irqs(struct kbase_device *kbdev);
* Return: 0 on success. Error code (negative) on failure.
*/
int kbase_validate_interrupts(struct kbase_device *const kbdev);
-#endif /* CONFIG_MALI_REAL_HW */
+#endif /* IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* CONFIG_MALI_DEBUG */
/**
diff --git a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
index d938079..aba84d5 100644
--- a/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
+++ b/mali_kbase/backend/gpu/mali_kbase_jm_rb.c
@@ -1437,7 +1437,7 @@ void kbase_backend_reset(struct kbase_device *kbdev, ktime_t *end_timestamp)
* then leave it in the RB and next time we're kicked
* it will be processed again from the starting state.
*/
- if (keep_in_jm_rb) {
+ if (!kbase_is_gpu_removed(kbdev) && keep_in_jm_rb) {
katom->protected_state.exit = KBASE_ATOM_EXIT_PROTECTED_CHECK;
/* As the atom was not removed, increment the
* index so that we read the correct atom in the
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
index c340760..b034ffe 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
+++ b/mali_kbase/backend/gpu/mali_kbase_model_dummy.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -25,42 +25,8 @@
* insmod'ing mali_kbase.ko with no arguments after a build with "scons
* gpu=tXYZ" will yield the expected GPU ID for tXYZ. This can always be
* overridden by passing the 'no_mali_gpu' argument to insmod.
- *
- * - if CONFIG_MALI_ERROR_INJECT is defined the error injection system is
- * activated.
*/
-/* Implementation of failure injection system:
- *
- * Error conditions are generated by gpu_generate_error().
- * According to CONFIG_MALI_ERROR_INJECT definition gpu_generate_error() either
- * generates an error HW condition randomly (CONFIG_MALI_ERROR_INJECT_RANDOM) or
- * checks if there is (in error_track_list) an error configuration to be set for
- * the current job chain (CONFIG_MALI_ERROR_INJECT_RANDOM not defined).
- * Each error condition will trigger a specific "state" for a certain set of
- * registers as per Midgard Architecture Specifications doc.
- *
- * According to Midgard Architecture Specifications doc the following registers
- * are always affected by error conditions:
- *
- * JOB Exception:
- * JOB_IRQ_RAWSTAT
- * JOB<n> STATUS AREA
- *
- * MMU Exception:
- * MMU_IRQ_RAWSTAT
- * AS<n>_FAULTSTATUS
- * AS<n>_FAULTADDRESS
- *
- * GPU Exception:
- * GPU_IRQ_RAWSTAT
- * GPU_FAULTSTATUS
- * GPU_FAULTADDRESS
- *
- * For further clarification on the model behaviour upon specific error
- * conditions the user may refer to the Midgard Architecture Specification
- * document
- */
#include <mali_kbase.h>
#include <device/mali_kbase_device.h>
#include <hw_access/mali_kbase_hw_access_regmap.h>
@@ -126,7 +92,7 @@ struct error_status_t hw_error_status;
*/
struct control_reg_values_t {
const char *name;
- u32 gpu_id;
+ u64 gpu_id;
u32 as_present;
u32 thread_max_threads;
u32 thread_max_workgroup_size;
@@ -524,7 +490,7 @@ MODULE_PARM_DESC(no_mali_gpu, "GPU to identify as");
static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cnt_idx,
bool is_low_word)
{
- u64 *counters_data;
+ u64 *counters_data = NULL;
u32 core_count = 0;
u32 event_index;
u64 value = 0;
@@ -580,6 +546,9 @@ static u32 gpu_model_get_prfcnt_value(enum kbase_ipa_core_type core_type, u32 cn
break;
}
+ if (unlikely(counters_data == NULL))
+ return 0;
+
for (core = 0; core < core_count; core++) {
value += counters_data[event_index];
event_index += KBASE_DUMMY_MODEL_COUNTER_PER_CORE;
@@ -1172,9 +1141,6 @@ static void midgard_model_update(void *h)
/*this job is done assert IRQ lines */
signal_int(dummy, i);
-#ifdef CONFIG_MALI_ERROR_INJECT
- midgard_set_error(i);
-#endif /* CONFIG_MALI_ERROR_INJECT */
update_register_statuses(dummy, i);
/*if this job slot returned failures we cannot use it */
if (hw_error_status.job_irq_rawstat & (1u << (i + 16))) {
@@ -1564,6 +1530,7 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value)
case L2_PWROFF_HI:
case PWR_KEY:
case PWR_OVERRIDE0:
+ case PWR_OVERRIDE1:
#if MALI_USE_CSF
case SHADER_PWRFEATURES:
case CSF_CONFIG:
@@ -1607,8 +1574,7 @@ void midgard_model_read_reg(void *h, u32 addr, u32 *const value)
#else /* !MALI_USE_CSF */
if (addr == GPU_CONTROL_REG(GPU_ID)) {
#endif /* !MALI_USE_CSF */
-
- *value = dummy->control_reg_values->gpu_id;
+ *value = dummy->control_reg_values->gpu_id & U32_MAX;
} else if (addr == JOB_CONTROL_REG(JOB_IRQ_RAWSTAT)) {
*value = hw_error_status.job_irq_rawstat;
pr_debug("%s", "JS_IRQ_RAWSTAT being read");
@@ -2166,9 +2132,3 @@ int gpu_model_control(void *model, struct kbase_model_control_params *params)
return 0;
}
-
-u64 midgard_model_arch_timer_get_cntfrq(void *h)
-{
- CSTD_UNUSED(h);
- return arch_timer_get_cntfrq();
-}
diff --git a/mali_kbase/backend/gpu/mali_kbase_model_linux.h b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
index 65eb620..d38bb88 100644
--- a/mali_kbase/backend/gpu/mali_kbase_model_linux.h
+++ b/mali_kbase/backend/gpu/mali_kbase_model_linux.h
@@ -48,12 +48,8 @@
/*
* Include Model definitions
*/
-
-#if IS_ENABLED(CONFIG_MALI_NO_MALI)
#include <backend/gpu/mali_kbase_model_dummy.h>
-#endif /* IS_ENABLED(CONFIG_MALI_NO_MALI) */
-#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
/**
* kbase_gpu_device_create() - Generic create function.
*
@@ -117,15 +113,6 @@ void midgard_model_write_reg(void *h, u32 addr, u32 value);
void midgard_model_read_reg(void *h, u32 addr, u32 *const value);
/**
- * midgard_model_arch_timer_get_cntfrq - Get Model specific System Timer Frequency
- *
- * @h: Model handle.
- *
- * Return: Frequency in Hz
- */
-u64 midgard_model_arch_timer_get_cntfrq(void *h);
-
-/**
* gpu_device_raise_irq() - Private IRQ raise function.
*
* @model: Model handle.
@@ -155,6 +142,5 @@ void gpu_device_set_data(void *model, void *data);
* Return: Pointer to the data carried by model.
*/
void *gpu_device_get_data(void *model);
-#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
#endif /* _KBASE_MODEL_LINUX_H_ */
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
index d8d55f3..06efeaf 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_backend.c
@@ -36,6 +36,9 @@
#include <linux/version_compat_defs.h>
#include <linux/pm_runtime.h>
#include <mali_kbase_reset_gpu.h>
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+#include <csf/mali_kbase_csf_scheduler.h>
+#endif /* !CONFIG_MALI_ARBITER_SUPPORT */
#endif /* !MALI_USE_CSF */
#include <hwcnt/mali_kbase_hwcnt_context.h>
#include <backend/gpu/mali_kbase_pm_internal.h>
@@ -879,9 +882,11 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
}
KBASE_EXPORT_TEST_API(kbase_pm_set_debug_core_mask);
#else
-void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0,
- u64 new_core_mask_js1, u64 new_core_mask_js2)
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask,
+ size_t new_core_mask_size)
{
+ size_t i;
+
lockdep_assert_held(&kbdev->hwaccess_lock);
lockdep_assert_held(&kbdev->pm.lock);
@@ -889,13 +894,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_
dev_warn_once(
kbdev->dev,
"Change of core mask not supported for slot 0 as dummy job WA is enabled");
- new_core_mask_js0 = kbdev->pm.debug_core_mask[0];
+ new_core_mask[0] = kbdev->pm.debug_core_mask[0];
}
- kbdev->pm.debug_core_mask[0] = new_core_mask_js0;
- kbdev->pm.debug_core_mask[1] = new_core_mask_js1;
- kbdev->pm.debug_core_mask[2] = new_core_mask_js2;
- kbdev->pm.debug_core_mask_all = new_core_mask_js0 | new_core_mask_js1 | new_core_mask_js2;
+ kbdev->pm.debug_core_mask_all = 0;
+ for (i = 0; i < new_core_mask_size; i++) {
+ kbdev->pm.debug_core_mask[i] = new_core_mask[i];
+ kbdev->pm.debug_core_mask_all |= new_core_mask[i];
+ }
kbase_pm_update_dynamic_cores_onoff(kbdev);
}
@@ -981,7 +987,9 @@ void kbase_hwaccess_pm_resume(struct kbase_device *kbdev)
void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
{
unsigned long flags;
-#if !MALI_USE_CSF
+#if MALI_USE_CSF
+ unsigned long flags_sched;
+#else
ktime_t end_timestamp = ktime_get_raw();
#endif
struct kbase_arbiter_vm_state *arb_vm_state = kbdev->pm.arb_vm_state;
@@ -1000,24 +1008,41 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
*/
WARN(!kbase_is_gpu_removed(kbdev), "GPU is still available after GPU lost event\n");
- /* Full GPU reset will have been done by hypervisor, so
- * cancel
- */
+#if MALI_USE_CSF
+ /* Full GPU reset will have been done by hypervisor, so cancel */
+ kbase_reset_gpu_prevent_and_wait(kbdev);
+
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_csf_scheduler_spin_lock(kbdev, &flags_sched);
+ atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING);
+ kbase_csf_scheduler_spin_unlock(kbdev, flags_sched);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+
+ kbase_synchronize_irqs(kbdev);
+
+ /* Scheduler reset happens outside of spinlock due to the mutex it acquires */
+ kbase_csf_scheduler_reset(kbdev);
+
+ /* Update kbase status */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbdev->protected_mode = false;
+ kbase_pm_update_state(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#else
+ /* Full GPU reset will have been done by hypervisor, so cancel */
atomic_set(&kbdev->hwaccess.backend.reset_gpu, KBASE_RESET_GPU_NOT_PENDING);
hrtimer_cancel(&kbdev->hwaccess.backend.reset_timer);
+
kbase_synchronize_irqs(kbdev);
/* Clear all jobs running on the GPU */
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->protected_mode = false;
-#if !MALI_USE_CSF
kbase_backend_reset(kbdev, &end_timestamp);
kbase_pm_metrics_update(kbdev, NULL);
-#endif
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
-#if !MALI_USE_CSF
/* Cancel any pending HWC dumps */
spin_lock_irqsave(&kbdev->hwcnt.lock, flags);
if (kbdev->hwcnt.backend.state == KBASE_INSTR_STATE_DUMPING ||
@@ -1027,12 +1052,11 @@ void kbase_pm_handle_gpu_lost(struct kbase_device *kbdev)
wake_up(&kbdev->hwcnt.backend.wait);
}
spin_unlock_irqrestore(&kbdev->hwcnt.lock, flags);
-#endif
+#endif /* MALI_USE_CSF */
}
mutex_unlock(&arb_vm_state->vm_state_lock);
rt_mutex_unlock(&kbdev->pm.lock);
}
-
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
#if MALI_USE_CSF && defined(KBASE_PM_RUNTIME)
@@ -1088,7 +1112,7 @@ static int pm_handle_mcu_sleep_on_runtime_suspend(struct kbase_device *kbdev)
*/
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
if (kbdev->pm.backend.gpu_sleep_mode_active && kbdev->pm.backend.exit_gpu_sleep_mode &&
- !work_pending(&kbdev->csf.scheduler.gpu_idle_work)) {
+ !atomic_read(&kbdev->csf.scheduler.pending_gpu_idle_work)) {
u32 glb_req =
kbase_csf_firmware_global_input_read(&kbdev->csf.global_iface, GLB_REQ);
u32 glb_ack = kbase_csf_firmware_global_output(&kbdev->csf.global_iface, GLB_ACK);
diff --git a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
index 4637861..435892a 100644
--- a/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
+++ b/mali_kbase/backend/gpu/mali_kbase_pm_driver.c
@@ -3277,6 +3277,7 @@ static int kbase_set_tiler_quirks(struct kbase_device *kbdev)
return 0;
}
+
static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
{
struct device_node *np = kbdev->dev->of_node;
@@ -3329,6 +3330,7 @@ static int kbase_pm_hw_issues_detect(struct kbase_device *kbdev)
error = kbase_set_mmu_quirks(kbdev);
}
+
return error;
}
@@ -3348,6 +3350,7 @@ static void kbase_pm_hw_issues_apply(struct kbase_device *kbdev)
#else
kbase_reg_write32(kbdev, GPU_CONTROL_ENUM(JM_CONFIG), kbdev->hw_quirks_gpu);
#endif
+
}
void kbase_pm_cache_snoop_enable(struct kbase_device *kbdev)
diff --git a/mali_kbase/backend/gpu/mali_kbase_time.c b/mali_kbase/backend/gpu/mali_kbase_time.c
index dfdf469..c403161 100644
--- a/mali_kbase/backend/gpu/mali_kbase_time.c
+++ b/mali_kbase/backend/gpu/mali_kbase_time.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,10 +30,7 @@
#include <mali_kbase_config_defaults.h>
#include <linux/version_compat_defs.h>
#include <asm/arch_timer.h>
-
-#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
-#include <backend/gpu/mali_kbase_model_linux.h>
-#endif
+#include <linux/mali_hw_access.h>
struct kbase_timeout_info {
char *selector_str;
@@ -47,6 +44,7 @@ static struct kbase_timeout_info timeout_info[KBASE_TIMEOUT_SELECTOR_COUNT] = {
[CSF_PM_TIMEOUT] = { "CSF_PM_TIMEOUT", CSF_PM_TIMEOUT_CYCLES },
[CSF_GPU_RESET_TIMEOUT] = { "CSF_GPU_RESET_TIMEOUT", CSF_GPU_RESET_TIMEOUT_CYCLES },
[CSF_CSG_SUSPEND_TIMEOUT] = { "CSF_CSG_SUSPEND_TIMEOUT", CSF_CSG_SUSPEND_TIMEOUT_CYCLES },
+ [CSF_CSG_TERM_TIMEOUT] = { "CSF_CSG_TERM_TIMEOUT", CSF_CSG_TERM_TIMEOUT_CYCLES },
[CSF_FIRMWARE_BOOT_TIMEOUT] = { "CSF_FIRMWARE_BOOT_TIMEOUT",
CSF_FIRMWARE_BOOT_TIMEOUT_CYCLES },
[CSF_FIRMWARE_PING_TIMEOUT] = { "CSF_FIRMWARE_PING_TIMEOUT",
@@ -307,11 +305,7 @@ static void get_cpu_gpu_time(struct kbase_device *kbdev, u64 *cpu_ts, u64 *gpu_t
u64 kbase_arch_timer_get_cntfrq(struct kbase_device *kbdev)
{
- u64 freq = arch_timer_get_cntfrq();
-
-#if !IS_ENABLED(CONFIG_MALI_REAL_HW)
- freq = midgard_model_arch_timer_get_cntfrq(kbdev->model);
-#endif
+ u64 freq = mali_arch_timer_get_cntfrq();
dev_dbg(kbdev->dev, "System Timer Freq = %lluHz", freq);
diff --git a/mali_kbase/build.bp b/mali_kbase/build.bp
index 77e193a..a0570c2 100644
--- a/mali_kbase/build.bp
+++ b/mali_kbase/build.bp
@@ -71,18 +71,6 @@ bob_defaults {
mali_real_hw: {
kbuild_options: ["CONFIG_MALI_REAL_HW=y"],
},
- mali_error_inject_none: {
- kbuild_options: ["CONFIG_MALI_ERROR_INJECT_NONE=y"],
- },
- mali_error_inject_track_list: {
- kbuild_options: ["CONFIG_MALI_ERROR_INJECT_TRACK_LIST=y"],
- },
- mali_error_inject_random: {
- kbuild_options: ["CONFIG_MALI_ERROR_INJECT_RANDOM=y"],
- },
- mali_error_inject: {
- kbuild_options: ["CONFIG_MALI_ERROR_INJECT=y"],
- },
mali_debug: {
kbuild_options: [
"CONFIG_MALI_DEBUG=y",
diff --git a/mali_kbase/context/backend/mali_kbase_context_csf.c b/mali_kbase/context/backend/mali_kbase_context_csf.c
index 17caa77..11a66c3 100644
--- a/mali_kbase/context/backend/mali_kbase_context_csf.c
+++ b/mali_kbase/context/backend/mali_kbase_context_csf.c
@@ -189,11 +189,17 @@ void kbase_destroy_context(struct kbase_context *kctx)
* Customer side that a hang could occur if context termination is
* not blocked until the resume of GPU device.
*/
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ atomic_inc(&kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
while (kbase_pm_context_active_handle_suspend(kbdev,
KBASE_PM_SUSPEND_HANDLER_DONT_INCREASE)) {
dev_info(kbdev->dev, "Suspend in progress when destroying context");
wait_event(kbdev->pm.resume_wait, !kbase_pm_is_suspending(kbdev));
}
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ atomic_dec(&kbdev->pm.gpu_users_waiting);
+#endif /* CONFIG_MALI_ARBITER_SUPPORT */
/* Have synchronized against the System suspend and incremented the
* pm.active_count. So any subsequent invocation of System suspend
diff --git a/mali_kbase/csf/mali_kbase_csf.c b/mali_kbase/csf/mali_kbase_csf.c
index 6ebbd1e..59c82aa 100644
--- a/mali_kbase/csf/mali_kbase_csf.c
+++ b/mali_kbase/csf/mali_kbase_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -537,6 +537,8 @@ static int csf_queue_register_internal(struct kbase_context *kctx,
queue->blocked_reason = CS_STATUS_BLOCKED_REASON_REASON_UNBLOCKED;
+ queue->clear_faults = true;
+
INIT_LIST_HEAD(&queue->link);
atomic_set(&queue->pending_kick, 0);
INIT_LIST_HEAD(&queue->pending_kick_link);
@@ -732,7 +734,7 @@ out:
}
/**
- * get_bound_queue_group - Get the group to which a queue was bound
+ * get_bound_queue_group() - Get the group to which a queue was bound
*
* @queue: Pointer to the queue for this group
*
@@ -845,6 +847,48 @@ void kbase_csf_ring_cs_kernel_doorbell(struct kbase_device *kbdev, int csi_index
kbase_csf_ring_csg_doorbell(kbdev, csg_nr);
}
+int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx,
+ struct kbase_ioctl_queue_group_clear_faults *faults)
+{
+ void __user *user_bufs = u64_to_user_ptr(faults->addr);
+ u32 i;
+ int ret = 0;
+ struct kbase_device *kbdev = kctx->kbdev;
+ const u32 nr_queues = faults->nr_queues;
+
+ if (unlikely(nr_queues > kbdev->csf.global_iface.groups[0].stream_num)) {
+ dev_warn(kbdev->dev, "Invalid nr_queues %u", nr_queues);
+ return -EINVAL;
+ }
+
+ for (i = 0; i < nr_queues; ++i) {
+ u64 buf_gpu_addr;
+ struct kbase_va_region *region;
+
+ if (copy_from_user(&buf_gpu_addr, user_bufs, sizeof(buf_gpu_addr)))
+ return -EFAULT;
+
+ kbase_gpu_vm_lock(kctx);
+ region = kbase_region_tracker_find_region_enclosing_address(kctx, buf_gpu_addr);
+ if (likely(!kbase_is_region_invalid_or_free(region))) {
+ struct kbase_queue *queue = region->user_data;
+
+ queue->clear_faults = true;
+ } else {
+ dev_warn(kbdev->dev, "GPU queue %u without a valid command buffer region",
+ i);
+ ret = -EFAULT;
+ goto out_unlock;
+ }
+ kbase_gpu_vm_unlock(kctx);
+
+ user_bufs = (void __user *)((uintptr_t)user_bufs + sizeof(buf_gpu_addr));
+ }
+out_unlock:
+ kbase_gpu_vm_unlock(kctx);
+ return ret;
+}
+
int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue_kick *kick)
{
struct kbase_device *kbdev = kctx->kbdev;
@@ -866,7 +910,7 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue
struct kbase_queue *queue = region->user_data;
if (queue && (queue->bind_state == KBASE_CSF_QUEUE_BOUND)) {
- spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+ spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock);
if (list_empty(&queue->pending_kick_link)) {
/* Queue termination shall block until this
* kick has been handled.
@@ -874,10 +918,12 @@ int kbase_csf_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_cs_queue
atomic_inc(&queue->pending_kick);
list_add_tail(
&queue->pending_kick_link,
- &kbdev->csf.pending_gpuq_kicks[queue->group_priority]);
- complete(&kbdev->csf.scheduler.kthread_signal);
+ &kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]);
+ if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, false, true) ==
+ false)
+ complete(&kbdev->csf.scheduler.kthread_signal);
}
- spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+ spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
}
} else {
dev_dbg(kbdev->dev,
@@ -1093,12 +1139,11 @@ static int create_normal_suspend_buffer(struct kbase_context *const kctx,
}
static void timer_event_worker(struct work_struct *data);
-static void protm_event_worker(struct kthread_work *work);
static void term_normal_suspend_buffer(struct kbase_context *const kctx,
struct kbase_normal_suspend_buffer *s_buf);
/**
- * create_suspend_buffers - Setup normal and protected mode
+ * create_suspend_buffers() - Setup normal and protected mode
* suspend buffers.
*
* @kctx: Address of the kbase context within which the queue group
@@ -1204,7 +1249,8 @@ static int create_queue_group(struct kbase_context *const kctx,
INIT_LIST_HEAD(&group->link_to_schedule);
INIT_LIST_HEAD(&group->error_fatal.link);
INIT_WORK(&group->timer_event_work, timer_event_worker);
- kthread_init_work(&group->protm_event_work, protm_event_worker);
+ INIT_LIST_HEAD(&group->protm_event_work);
+ atomic_set(&group->pending_protm_event_work, 0);
bitmap_zero(group->protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP);
group->run_state = KBASE_CSF_GROUP_INACTIVE;
@@ -1377,7 +1423,7 @@ void kbase_csf_term_descheduled_queue_group(struct kbase_queue_group *group)
}
/**
- * term_queue_group - Terminate a GPU command queue group.
+ * term_queue_group() - Terminate a GPU command queue group.
*
* @group: Pointer to GPU command queue group data.
*
@@ -1405,8 +1451,8 @@ static void term_queue_group(struct kbase_queue_group *group)
}
/**
- * wait_group_deferred_deschedule_completion - Wait for refcount of the group to
- * become 0 that was taken when the group deschedule had to be deferred.
+ * wait_group_deferred_deschedule_completion() - Wait for refcount of the group
+ * to become 0 that was taken when the group deschedule had to be deferred.
*
* @group: Pointer to GPU command queue group that is being deleted.
*
@@ -1435,7 +1481,10 @@ static void wait_group_deferred_deschedule_completion(struct kbase_queue_group *
static void cancel_queue_group_events(struct kbase_queue_group *group)
{
cancel_work_sync(&group->timer_event_work);
- kthread_cancel_work_sync(&group->protm_event_work);
+
+ /* Drain a pending protected mode request if any */
+ kbase_csf_scheduler_wait_for_kthread_pending_work(group->kctx->kbdev,
+ &group->pending_protm_event_work);
}
static void remove_pending_group_fatal_error(struct kbase_queue_group *group)
@@ -1590,6 +1639,7 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
INIT_LIST_HEAD(&kctx->csf.queue_list);
INIT_LIST_HEAD(&kctx->csf.link);
+ atomic_set(&kctx->csf.pending_sync_update, 0);
kbase_csf_event_init(kctx);
@@ -1612,15 +1662,6 @@ int kbase_csf_ctx_init(struct kbase_context *kctx)
err = kbasep_ctx_user_reg_page_mapping_init(kctx);
- if (likely(!err)) {
- err = kbase_kthread_run_worker_rt(kctx->kbdev,
- &kctx->csf.protm_event_worker, "mali_protm_event");
- if (unlikely(err)) {
- dev_err(kctx->kbdev->dev, "error initializing protm event worker thread");
- kbasep_ctx_user_reg_page_mapping_term(kctx);
- }
- }
-
if (likely(!err))
kbase_csf_cpu_queue_init(kctx);
@@ -1824,7 +1865,6 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
rt_mutex_unlock(&kctx->csf.lock);
- kbase_destroy_kworker_stack(&kctx->csf.protm_event_worker);
kbasep_ctx_user_reg_page_mapping_term(kctx);
kbase_csf_tiler_heap_context_term(kctx);
kbase_csf_kcpu_queue_context_term(kctx);
@@ -1835,7 +1875,7 @@ void kbase_csf_ctx_term(struct kbase_context *kctx)
}
/**
- * handle_oom_event - Handle the OoM event generated by the firmware for the
+ * handle_oom_event() - Handle the OoM event generated by the firmware for the
* CSI.
*
* @group: Pointer to the CSG group the oom-event belongs to.
@@ -1905,7 +1945,7 @@ static int handle_oom_event(struct kbase_queue_group *const group,
}
/**
- * report_tiler_oom_error - Report a CSG error due to a tiler heap OOM event
+ * report_tiler_oom_error() - Report a CSG error due to a tiler heap OOM event
*
* @group: Pointer to the GPU command queue group that encountered the error
*/
@@ -1949,7 +1989,7 @@ static void flush_gpu_cache_on_fatal_error(struct kbase_device *kbdev)
}
/**
- * kbase_queue_oom_event - Handle tiler out-of-memory for a GPU command queue.
+ * kbase_queue_oom_event() - Handle tiler out-of-memory for a GPU command queue.
*
* @queue: Pointer to queue for which out-of-memory event was received.
*
@@ -2037,7 +2077,7 @@ unlock:
}
/**
- * oom_event_worker - Tiler out-of-memory handler called from a workqueue.
+ * oom_event_worker() - Tiler out-of-memory handler called from a workqueue.
*
* @data: Pointer to a work_struct embedded in GPU command queue data.
*
@@ -2065,7 +2105,8 @@ static void oom_event_worker(struct work_struct *data)
}
/**
- * report_group_timeout_error - Report the timeout error for the group to userspace.
+ * report_group_timeout_error() - Report the timeout error for the group to
+ * userspace.
*
* @group: Pointer to the group for which timeout error occurred
*/
@@ -2089,7 +2130,7 @@ static void report_group_timeout_error(struct kbase_queue_group *const group)
}
/**
- * timer_event_worker - Handle the progress timeout error for the group
+ * timer_event_worker() - Handle the progress timeout error for the group
*
* @data: Pointer to a work_struct embedded in GPU command queue group data.
*
@@ -2125,7 +2166,7 @@ static void timer_event_worker(struct work_struct *data)
}
/**
- * handle_progress_timer_event - Progress timer timeout event handler.
+ * handle_progress_timer_event() - Progress timer timeout event handler.
*
* @group: Pointer to GPU queue group for which the timeout event is received.
*
@@ -2216,41 +2257,7 @@ static void report_group_fatal_error(struct kbase_queue_group *const group)
}
/**
- * protm_event_worker - Protected mode switch request event handler
- * called from a kthread.
- *
- * @work: Pointer to a kthread_work struct embedded in GPU command queue group data.
- *
- * Request to switch to protected mode.
- */
-static void protm_event_worker(struct kthread_work *work)
-{
- struct kbase_queue_group *const group =
- container_of(work, struct kbase_queue_group, protm_event_work);
- struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
- int err = 0;
-
- KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u);
-
- err = alloc_grp_protected_suspend_buffer_pages(group);
- if (!err) {
- kbase_csf_scheduler_group_protm_enter(group);
- } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
- sbuf->alloc_retries++;
- /* try again to allocate pages */
- kthread_queue_work(&group->kctx->csf.protm_event_worker, &group->protm_event_work);
- } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
- dev_err(group->kctx->kbdev->dev,
- "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
- group->handle, group->kctx->tgid, group->kctx->id);
- report_group_fatal_error(group);
- }
-
- KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u);
-}
-
-/**
- * handle_fault_event - Handler for CS fault.
+ * handle_fault_event() - Handler for CS fault.
*
* @queue: Pointer to queue for which fault event was received.
* @cs_ack: Value of the CS_ACK register in the CS kernel input page used for
@@ -2346,7 +2353,7 @@ static void report_queue_fatal_error(struct kbase_queue *const queue, u32 cs_fat
}
/**
- * cs_error_worker - Handle the CS_FATAL/CS_FAULT error for the GPU queue
+ * cs_error_worker() - Handle the CS_FATAL/CS_FAULT error for the GPU queue
*
* @data: Pointer to a work_struct embedded in GPU command queue.
*
@@ -2425,7 +2432,7 @@ unlock:
}
/**
- * handle_fatal_event - Handler for CS fatal.
+ * handle_fatal_event() - Handler for CS fatal.
*
* @queue: Pointer to queue for which fatal event was received.
* @stream: Pointer to the structure containing info provided by the
@@ -2487,7 +2494,7 @@ static void handle_fatal_event(struct kbase_queue *const queue,
}
/**
- * process_cs_interrupts - Process interrupts for a CS.
+ * process_cs_interrupts() - Process interrupts for a CS.
*
* @group: Pointer to GPU command queue group data.
* @ginfo: The CSG interface provided by the firmware.
@@ -2601,8 +2608,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
}
if (!group->protected_suspend_buf.pma)
- kthread_queue_work(&group->kctx->csf.protm_event_worker,
- &group->protm_event_work);
+ kbase_csf_scheduler_enqueue_protm_event_work(group);
if (test_bit(group->csg_nr, scheduler->csg_slots_idle_mask)) {
clear_bit(group->csg_nr, scheduler->csg_slots_idle_mask);
@@ -2615,7 +2621,7 @@ static void process_cs_interrupts(struct kbase_queue_group *const group,
}
/**
- * process_csg_interrupts - Process interrupts for a CSG.
+ * process_csg_interrupts() - Process interrupts for a CSG.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @csg_nr: CSG number.
@@ -2735,7 +2741,7 @@ static void process_csg_interrupts(struct kbase_device *const kbdev, u32 const c
}
/**
- * process_prfcnt_interrupts - Process performance counter interrupts.
+ * process_prfcnt_interrupts() - Process performance counter interrupts.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @glb_req: Global request register value.
@@ -2807,7 +2813,7 @@ static void process_prfcnt_interrupts(struct kbase_device *kbdev, u32 glb_req, u
}
/**
- * check_protm_enter_req_complete - Check if PROTM_ENTER request completed
+ * check_protm_enter_req_complete() - Check if PROTM_ENTER request completed
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @glb_req: Global request register value.
@@ -2842,7 +2848,7 @@ static inline void check_protm_enter_req_complete(struct kbase_device *kbdev, u3
}
/**
- * process_protm_exit - Handle the protected mode exit interrupt
+ * process_protm_exit() - Handle the protected mode exit interrupt
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
* @glb_ack: Global acknowledge register value.
@@ -2932,8 +2938,7 @@ static inline void process_tracked_info_for_protm(struct kbase_device *kbdev,
if (!tock_triggered) {
dev_dbg(kbdev->dev, "Group-%d on slot-%d start protm work\n", group->handle,
group->csg_nr);
- kthread_queue_work(&group->kctx->csf.protm_event_worker,
- &group->protm_event_work);
+ kbase_csf_scheduler_enqueue_protm_event_work(group);
}
}
}
@@ -2962,6 +2967,46 @@ static void order_job_irq_clear_with_iface_mem_read(void)
dmb(osh);
}
+static const char *const glb_fatal_status_errors[GLB_FATAL_STATUS_VALUE_COUNT] = {
+ [GLB_FATAL_STATUS_VALUE_OK] = "OK",
+ [GLB_FATAL_STATUS_VALUE_ASSERT] = "Firmware assert triggered",
+ [GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION] =
+ "Hardware raised an exception firmware did not expect",
+ [GLB_FATAL_STATUS_VALUE_HANG] = "Firmware hangs and watchdog timer expired",
+};
+
+/**
+ * handle_glb_fatal_event() - Handle the GLB fatal event
+ *
+ * @kbdev: Instance of GPU device.
+ * @global_iface: CSF global interface
+ */
+static void handle_glb_fatal_event(struct kbase_device *kbdev,
+ const struct kbase_csf_global_iface *const global_iface)
+{
+ const char *error_string = NULL;
+ const u32 fatal_status = kbase_csf_firmware_global_output(global_iface, GLB_FATAL_STATUS);
+
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+ kbase_csf_scheduler_spin_lock_assert_held(kbdev);
+ dev_warn(kbdev->dev, "MCU encountered unrecoverable error");
+
+ if (fatal_status < GLB_FATAL_STATUS_VALUE_COUNT)
+ error_string = glb_fatal_status_errors[fatal_status];
+ else {
+ dev_err(kbdev->dev, "Invalid GLB_FATAL_STATUS (%u)", fatal_status);
+ return;
+ }
+
+ if (fatal_status == GLB_FATAL_STATUS_VALUE_OK)
+ dev_err(kbdev->dev, "GLB_FATAL_STATUS(OK) must be set with proper reason");
+ else {
+ dev_warn(kbdev->dev, "GLB_FATAL_STATUS: %s", error_string);
+ if (kbase_prepare_to_reset_gpu_locked(kbdev, RESET_FLAGS_NONE))
+ kbase_reset_gpu_locked(kbdev);
+ }
+}
+
void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
{
bool deferred_handling_glb_idle_irq = false;
@@ -3036,6 +3081,9 @@ void kbase_csf_interrupt(struct kbase_device *kbdev, u32 val)
deferred_handling_glb_idle_irq = true;
}
+ if (glb_ack & GLB_ACK_FATAL_MASK)
+ handle_glb_fatal_event(kbdev, global_iface);
+
process_prfcnt_interrupts(kbdev, glb_req, glb_ack);
kbase_csf_scheduler_spin_unlock(kbdev, flags);
@@ -3097,6 +3145,11 @@ void kbase_csf_doorbell_mapping_term(struct kbase_device *kbdev)
if (kbdev->csf.db_filp) {
struct page *page = as_page(kbdev->csf.dummy_db_page);
+ /* This is a shared dummy sink page for avoiding potential segmentation fault
+ * to user-side library when a csi is off slot. Additionally, the call is on
+ * module unload path, so the page can be left uncleared before returning it
+ * back to kbdev memory pool.
+ */
kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
fput(kbdev->csf.db_filp);
@@ -3128,26 +3181,27 @@ int kbase_csf_doorbell_mapping_init(struct kbase_device *kbdev)
return 0;
}
-void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev)
+void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev)
{
size_t i;
- for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i)
- INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kicks[i]);
- spin_lock_init(&kbdev->csf.pending_gpuq_kicks_lock);
+ atomic_set(&kbdev->csf.pending_gpuq_kicks, false);
+ for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i)
+ INIT_LIST_HEAD(&kbdev->csf.pending_gpuq_kick_queues[i]);
+ spin_lock_init(&kbdev->csf.pending_gpuq_kick_queues_lock);
}
-void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev)
+void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev)
{
size_t i;
- spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
- for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kicks); ++i) {
- if (!list_empty(&kbdev->csf.pending_gpuq_kicks[i]))
+ spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock);
+ for (i = 0; i != ARRAY_SIZE(kbdev->csf.pending_gpuq_kick_queues); ++i) {
+ if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[i]))
dev_warn(kbdev->dev,
"Some GPU queue kicks for priority %zu were not handled", i);
}
- spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+ spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
}
void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
@@ -3155,6 +3209,11 @@ void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev)
if (kbdev->csf.user_reg.filp) {
struct page *page = as_page(kbdev->csf.user_reg.dummy_page);
+ /* This is a shared dummy page in place of the real USER Register page just
+ * before the GPU is powered down. Additionally, the call is on module unload
+ * path, so the page can be left uncleared before returning it back to kbdev
+ * memory pool.
+ */
kbase_mem_pool_free(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW], page, false);
fput(kbdev->csf.user_reg.filp);
}
@@ -3237,17 +3296,17 @@ void kbase_csf_process_queue_kick(struct kbase_queue *queue)
if (err == -EBUSY) {
retry_kick = true;
- spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
+ spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock);
if (list_empty(&queue->pending_kick_link)) {
/* A failed queue kick shall be pushed to the
* back of the queue to avoid potential abuse.
*/
list_add_tail(
&queue->pending_kick_link,
- &kbdev->csf.pending_gpuq_kicks[queue->group_priority]);
- spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+ &kbdev->csf.pending_gpuq_kick_queues[queue->group_priority]);
+ spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
} else {
- spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
+ spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
WARN_ON(atomic_read(&queue->pending_kick) == 0);
}
@@ -3270,3 +3329,27 @@ out_release_queue:
WARN_ON(atomic_read(&queue->pending_kick) == 0);
atomic_dec(&queue->pending_kick);
}
+
+void kbase_csf_process_protm_event_request(struct kbase_queue_group *group)
+{
+ struct kbase_protected_suspend_buffer *sbuf = &group->protected_suspend_buf;
+ int err = 0;
+
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_START, group, 0u);
+
+ err = alloc_grp_protected_suspend_buffer_pages(group);
+ if (!err) {
+ kbase_csf_scheduler_group_protm_enter(group);
+ } else if (err == -ENOMEM && sbuf->alloc_retries <= PROTM_ALLOC_MAX_RETRIES) {
+ sbuf->alloc_retries++;
+ /* try again to allocate pages */
+ kbase_csf_scheduler_enqueue_protm_event_work(group);
+ } else if (sbuf->alloc_retries >= PROTM_ALLOC_MAX_RETRIES || err != -ENOMEM) {
+ dev_err(group->kctx->kbdev->dev,
+ "Failed to allocate physical pages for Protected mode suspend buffer for the group %d of context %d_%d",
+ group->handle, group->kctx->tgid, group->kctx->id);
+ report_group_fatal_error(group);
+ }
+
+ KBASE_KTRACE_ADD_CSF_GRP(group->kctx->kbdev, PROTM_EVENT_WORKER_END, group, 0u);
+}
diff --git a/mali_kbase/csf/mali_kbase_csf.h b/mali_kbase/csf/mali_kbase_csf.h
index 8e067cb..74f1142 100644
--- a/mali_kbase/csf/mali_kbase_csf.h
+++ b/mali_kbase/csf/mali_kbase_csf.h
@@ -245,6 +245,19 @@ struct kbase_queue_group *kbase_csf_find_queue_group(struct kbase_context *kctx,
int kbase_csf_queue_group_handle_is_valid(struct kbase_context *kctx, u8 group_handle);
/**
+ * kbase_csf_queue_group_clear_faults - Re-enable CS Fault reporting.
+ *
+ * @kctx: Pointer to the kbase context within which the
+ * CS Faults for the queues has to be re-enabled.
+ * @clear_faults: Pointer to the structure which contains details of the
+ * queues for which the CS Fault reporting has to be re-enabled.
+ *
+ * Return: 0 on success, or negative on failure.
+ */
+int kbase_csf_queue_group_clear_faults(struct kbase_context *kctx,
+ struct kbase_ioctl_queue_group_clear_faults *clear_faults);
+
+/**
* kbase_csf_queue_group_create - Create a GPU command queue group.
*
* @kctx: Pointer to the kbase context within which the
@@ -380,20 +393,20 @@ int kbase_csf_setup_dummy_user_reg_page(struct kbase_device *kbdev);
void kbase_csf_free_dummy_user_reg_page(struct kbase_device *kbdev);
/**
- * kbase_csf_pending_gpuq_kicks_init - Initialize the data used for handling
- * GPU queue kicks.
+ * kbase_csf_pending_gpuq_kick_queues_init - Initialize the data used for handling
+ * GPU queue kicks.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/
-void kbase_csf_pending_gpuq_kicks_init(struct kbase_device *kbdev);
+void kbase_csf_pending_gpuq_kick_queues_init(struct kbase_device *kbdev);
/**
- * kbase_csf_pending_gpuq_kicks_term - De-initialize the data used for handling
- * GPU queue kicks.
+ * kbase_csf_pending_gpuq_kick_queues_term - De-initialize the data used for handling
+ * GPU queue kicks.
*
* @kbdev: Instance of a GPU platform device that implements a CSF interface.
*/
-void kbase_csf_pending_gpuq_kicks_term(struct kbase_device *kbdev);
+void kbase_csf_pending_gpuq_kick_queues_term(struct kbase_device *kbdev);
/**
* kbase_csf_ring_csg_doorbell - ring the doorbell for a CSG interface.
@@ -547,4 +560,13 @@ static inline u64 kbase_csf_ktrace_gpu_cycle_cnt(struct kbase_device *kbdev)
*/
void kbase_csf_process_queue_kick(struct kbase_queue *queue);
+/**
+ * kbase_csf_process_protm_event_request - Handle protected mode switch request
+ *
+ * @group: The group to handle protected mode request
+ *
+ * Request to switch to protected mode.
+ */
+void kbase_csf_process_protm_event_request(struct kbase_queue_group *group);
+
#endif /* _KBASE_CSF_H_ */
diff --git a/mali_kbase/csf/mali_kbase_csf_defs.h b/mali_kbase/csf/mali_kbase_csf_defs.h
index 018bf89..1878646 100644
--- a/mali_kbase/csf/mali_kbase_csf_defs.h
+++ b/mali_kbase/csf/mali_kbase_csf_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -268,6 +268,7 @@ enum kbase_queue_group_priority {
* Shader, L2 and MCU state.
* @CSF_GPU_RESET_TIMEOUT: Waiting timeout for GPU reset to complete.
* @CSF_CSG_SUSPEND_TIMEOUT: Timeout given for a CSG to be suspended.
+ * @CSF_CSG_TERM_TIMEOUT: Timeout given for a CSG to be terminated.
* @CSF_FIRMWARE_BOOT_TIMEOUT: Maximum time to wait for firmware to boot.
* @CSF_FIRMWARE_PING_TIMEOUT: Maximum time to wait for firmware to respond
* to a ping from KBase.
@@ -290,6 +291,7 @@ enum kbase_timeout_selector {
CSF_PM_TIMEOUT,
CSF_GPU_RESET_TIMEOUT,
CSF_CSG_SUSPEND_TIMEOUT,
+ CSF_CSG_TERM_TIMEOUT,
CSF_FIRMWARE_BOOT_TIMEOUT,
CSF_FIRMWARE_PING_TIMEOUT,
CSF_SCHED_PROTM_PROGRESS_TIMEOUT,
@@ -398,6 +400,7 @@ struct kbase_csf_notification {
* @cs_error: Records information about the CS fatal event or
* about CS fault event if dump on fault is enabled.
* @cs_error_fatal: Flag to track if the CS fault or CS fatal event occurred.
+ * @clear_faults: Flag to track if the CS fault reporting is enabled for this queue
* @extract_ofs: The current EXTRACT offset, this is only updated when handling
* the GLB IDLE IRQ if the idle timeout value is non-0 in order
* to help detect a queue's true idle status.
@@ -441,6 +444,7 @@ struct kbase_queue {
u64 cs_error_info;
u32 cs_error;
bool cs_error_fatal;
+ bool clear_faults;
u64 extract_ofs;
u64 saved_cmd_ptr;
};
@@ -501,6 +505,8 @@ struct kbase_protected_suspend_buffer {
* @compute_max: Maximum number of compute endpoints the group is
* allowed to use.
* @csi_handlers: Requested CSI exception handler flags for the group.
+ * @cs_fault_report_enable: Indicated if reporting of CS_FAULTs to
+ * userspace is enabled.
* @tiler_mask: Mask of tiler endpoints the group is allowed to use.
* @fragment_mask: Mask of fragment endpoints the group is allowed to use.
* @compute_mask: Mask of compute endpoints the group is allowed to use.
@@ -531,8 +537,13 @@ struct kbase_protected_suspend_buffer {
* @bound_queues: Array of registered queues bound to this queue group.
* @doorbell_nr: Index of the hardware doorbell page assigned to the
* group.
- * @protm_event_work: Work item corresponding to the protected mode entry
- * event for this queue.
+ * @protm_event_work: List item corresponding to the protected mode entry
+ * event for this queue. This would be handled by
+ * kbase_csf_scheduler_kthread().
+ * @pending_protm_event_work: Indicates that kbase_csf_scheduler_kthread() should
+ * handle PROTM request for this group. This would
+ * be set to false when the work is done. This is used
+ * mainly for synchronisation with group termination.
* @protm_pending_bitmap: Bit array to keep a track of CSs that
* have pending protected mode entry requests.
* @error_fatal: An error of type BASE_GPU_QUEUE_GROUP_ERROR_FATAL to be
@@ -569,7 +580,7 @@ struct kbase_queue_group {
u8 compute_max;
u8 csi_handlers;
-
+ __u8 cs_fault_report_enable;
u64 tiler_mask;
u64 fragment_mask;
u64 compute_mask;
@@ -588,7 +599,8 @@ struct kbase_queue_group {
struct kbase_queue *bound_queues[MAX_SUPPORTED_STREAMS_PER_GROUP];
int doorbell_nr;
- struct kthread_work protm_event_work;
+ struct list_head protm_event_work;
+ atomic_t pending_protm_event_work;
DECLARE_BITMAP(protm_pending_bitmap, MAX_SUPPORTED_STREAMS_PER_GROUP);
struct kbase_csf_notification error_fatal;
@@ -625,6 +637,9 @@ struct kbase_queue_group {
* @cmd_seq_num: The sequence number assigned to an enqueued command,
* in incrementing order (older commands shall have a
* smaller number).
+ * @csf_kcpu_worker: Dedicated worker to process KCPU commands for all queues in this
+ * context. This would be used if the context is not prioritised,
+ * otherwise it would be handled by kbase_csf_scheduler_kthread().
* @jit_lock: Lock to serialise JIT operations.
* @jit_cmds_head: A list of the just-in-time memory commands, both
* allocate & free, in submission order, protected
@@ -640,6 +655,7 @@ struct kbase_csf_kcpu_queue_context {
DECLARE_BITMAP(in_use, KBASEP_MAX_KCPU_QUEUES);
atomic64_t cmd_seq_num;
+ struct kthread_worker csf_kcpu_worker;
struct mutex jit_lock;
struct list_head jit_cmds_head;
struct list_head jit_blocked_queues;
@@ -747,13 +763,7 @@ struct kbase_csf_ctx_heap_reclaim_info {
* GPU command queues are idle and at least one of them
* is blocked on a sync wait operation.
* @num_idle_wait_grps: Length of the @idle_wait_groups list.
- * @sync_update_worker: Dedicated workqueue to process work items corresponding
- * to the sync_update events by sync_set/sync_add
- * instruction execution on CSs bound to groups
- * of @idle_wait_groups list.
- * @sync_update_work: work item to process the sync_update events by
- * sync_set / sync_add instruction execution on command
- * streams bound to groups of @idle_wait_groups list.
+ * @sync_update_work: List item to process the SYNC_UPDATE event.
* @ngrp_to_schedule: Number of groups added for the context to the
* 'groups_to_schedule' list of scheduler instance.
* @heap_info: Heap reclaim information data of the kctx. As the
@@ -766,8 +776,7 @@ struct kbase_csf_scheduler_context {
u32 num_runnable_grps;
struct list_head idle_wait_groups;
u32 num_idle_wait_grps;
- struct kthread_worker sync_update_worker;
- struct kthread_work sync_update_work;
+ struct list_head sync_update_work;
u32 ngrp_to_schedule;
struct kbase_csf_ctx_heap_reclaim_info heap_info;
};
@@ -865,10 +874,13 @@ struct kbase_csf_user_reg_context {
* @link: Link to this csf context in the 'runnable_kctxs' list of
* the scheduler instance
* @sched: Object representing the scheduler's context
- * @protm_event_worker: Worker to process requests to enter protected mode.
* @cpu_queue: CPU queue information. Only be available when DEBUG_FS
* is enabled.
* @user_reg: Collective information to support mapping to USER Register page.
+ * @pending_sync_update: Indicates that kbase_csf_scheduler_kthread() should
+ * handle SYNC_UPDATE event for this context. This would
+ * be set to false when the work is done. This is used
+ * mainly for synchronisation with context termination.
*/
struct kbase_csf_context {
struct list_head event_pages_head;
@@ -883,9 +895,9 @@ struct kbase_csf_context {
struct workqueue_struct *wq;
struct list_head link;
struct kbase_csf_scheduler_context sched;
- struct kthread_worker protm_event_worker;
struct kbase_csf_cpu_queue_context cpu_queue;
struct kbase_csf_user_reg_context user_reg;
+ atomic_t pending_sync_update;
};
/**
@@ -930,14 +942,15 @@ struct kbase_csf_csg_slot {
* struct kbase_csf_sched_heap_reclaim_mgr - Object for managing tiler heap reclaim
* kctx lists inside the CSF device's scheduler.
*
- * @heap_reclaim: Tiler heap reclaim shrinker object.
+ * @heap_reclaim: Defines Tiler heap reclaim shrinker object.
* @ctx_lists: Array of kctx lists, size matching CSG defined priorities. The
* lists track the kctxs attached to the reclaim manager.
* @unused_pages: Estimated number of unused pages from the @ctxlist array. The
* number is indicative for use with reclaim shrinker's count method.
*/
struct kbase_csf_sched_heap_reclaim_mgr {
- struct shrinker heap_reclaim;
+ DEFINE_KBASE_SHRINKER heap_reclaim;
+
struct list_head ctx_lists[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
atomic_t unused_pages;
};
@@ -1037,10 +1050,29 @@ struct kbase_csf_mcu_shared_regions {
* workqueue items (kernel-provided delayed_work
* items do not use hrtimer and for some reason do
* not provide sufficiently reliable periodicity).
- * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should perform
- * a scheduling tick.
- * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should perform
- * a scheduling tock.
+ * @pending_sync_update_works: Indicates that kbase_csf_scheduler_kthread()
+ * should handle SYNC_UPDATE events.
+ * @sync_update_work_ctxs_lock: Lock protecting the list of contexts that
+ * require handling SYNC_UPDATE events.
+ * @sync_update_work_ctxs: The list of contexts that require handling
+ * SYNC_UPDATE events.
+ * @pending_protm_event_works: Indicates that kbase_csf_scheduler_kthread()
+ * should handle PROTM requests.
+ * @protm_event_work_grps_lock: Lock protecting the list of groups that
+ * have requested protected mode.
+ * @protm_event_work_grps: The list of groups that have requested
+ * protected mode.
+ * @pending_kcpuq_works: Indicates that kbase_csf_scheduler_kthread()
+ * should process pending KCPU queue works.
+ * @kcpuq_work_queues_lock: Lock protecting the list of KCPU queues that
+ * need to be processed.
+ * @kcpuq_work_queues: The list of KCPU queue that need to be processed
+ * @pending_tick_work: Indicates that kbase_csf_scheduler_kthread() should
+ * perform a scheduling tick.
+ * @pending_tock_work: Indicates that kbase_csf_scheduler_kthread() should
+ * perform a scheduling tock.
+ * @pending_gpu_idle_work: Indicates that kbase_csf_scheduler_kthread() should
+ * handle the GPU IDLE event.
* @ping_work: Work item that would ping the firmware at regular
* intervals, only if there is a single active CSG
* slot, to check if firmware is alive and would
@@ -1058,10 +1090,6 @@ struct kbase_csf_mcu_shared_regions {
* This pointer being set doesn't necessarily indicates
* that GPU is in protected mode, kbdev->protected_mode
* needs to be checked for that.
- * @idle_wq: Workqueue for executing GPU idle notification
- * handler.
- * @gpu_idle_work: Work item for facilitating the scheduler to bring
- * the GPU to a low-power mode on becoming idle.
* @fast_gpu_idle_handling: Indicates whether to relax many of the checks
* normally done in the GPU idle worker. This is
* set to true when handling the GLB IDLE IRQ if the
@@ -1104,7 +1132,8 @@ struct kbase_csf_mcu_shared_regions {
* thread when a queue needs attention.
* @kthread_running: Whether the GPU queue submission thread should keep
* executing.
- * @gpuq_kthread: High-priority thread used to handle GPU queue
+ * @gpuq_kthread: Dedicated thread primarily used to handle
+ * latency-sensitive tasks such as GPU queue
* submissions.
*/
struct kbase_csf_scheduler {
@@ -1130,14 +1159,22 @@ struct kbase_csf_scheduler {
struct kthread_worker csf_worker;
atomic_t timer_enabled;
struct hrtimer tick_timer;
+ atomic_t pending_sync_update_works;
+ spinlock_t sync_update_work_ctxs_lock;
+ struct list_head sync_update_work_ctxs;
+ atomic_t pending_protm_event_works;
+ spinlock_t protm_event_work_grps_lock;
+ struct list_head protm_event_work_grps;
+ atomic_t pending_kcpuq_works;
+ spinlock_t kcpuq_work_queues_lock;
+ struct list_head kcpuq_work_queues;
atomic_t pending_tick_work;
atomic_t pending_tock_work;
+ atomic_t pending_gpu_idle_work;
struct delayed_work ping_work;
struct kbase_context *top_kctx;
struct kbase_queue_group *top_grp;
struct kbase_queue_group *active_protm_grp;
- struct work_struct gpu_idle_work;
- struct workqueue_struct *idle_wq;
bool fast_gpu_idle_handling;
atomic_t gpu_no_longer_idle;
atomic_t non_idle_offslot_grps;
@@ -1650,10 +1687,12 @@ struct kbase_csf_user_reg {
* @dof: Structure for dump on fault.
* @user_reg: Collective information to support the mapping to
* USER Register page for user processes.
- * @pending_gpuq_kicks: Lists of GPU queue that have been kicked but not
- * yet processed, categorised by queue group's priority.
- * @pending_gpuq_kicks_lock: Protect @pending_gpu_kicks and
- * kbase_queue.pending_kick_link.
+ * @pending_gpuq_kicks: Indicates that kbase_csf_scheduler_kthread()
+ * should handle GPU queue kicks.
+ * @pending_gpuq_kick_queues: Lists of GPU queued that have been kicked but not
+ * yet processed, categorised by queue group's priority.
+ * @pending_gpuq_kick_queues_lock: Protect @pending_gpuq_kick_queues and
+ * kbase_queue.pending_kick_link.
* @quirks_ext: Pointer to an allocated buffer containing the firmware
* workarounds configuration.
* @pmode_sync_sem: RW Semaphore to prevent MMU operations during P.Mode entrance.
@@ -1709,8 +1748,9 @@ struct kbase_csf_device {
struct kbase_debug_coresight_device coresight;
#endif /* IS_ENABLED(CONFIG_MALI_CORESIGHT) */
struct kbase_csf_user_reg user_reg;
- struct list_head pending_gpuq_kicks[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
- spinlock_t pending_gpuq_kicks_lock;
+ atomic_t pending_gpuq_kicks;
+ struct list_head pending_gpuq_kick_queues[KBASE_QUEUE_GROUP_PRIORITY_COUNT];
+ spinlock_t pending_gpuq_kick_queues_lock;
u32 *quirks_ext;
struct rw_semaphore pmode_sync_sem;
};
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.c b/mali_kbase/csf/mali_kbase_csf_firmware.c
index 2d56d03..6b1ea59 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -56,7 +56,7 @@
#include <linux/delay.h>
#include <linux/version_compat_defs.h>
-static char release_fw_name[] = "mali_csffw-r47p0.bin";
+static char release_fw_name[] = "mali_csffw-r48p0.bin";
static char default_fw_name[] = "mali_csffw.bin";
module_param_string(fw_name, release_fw_name, sizeof(release_fw_name), 0644);
MODULE_PARM_DESC(fw_name, "firmware image");
@@ -1570,7 +1570,6 @@ static bool global_request_complete(struct kbase_device *const kbdev, u32 const
unsigned long flags;
kbase_csf_scheduler_spin_lock(kbdev, &flags);
-
if ((kbase_csf_firmware_global_output(global_iface, GLB_ACK) & req_mask) ==
(kbase_csf_firmware_global_input_read(global_iface, GLB_REQ) & req_mask))
complete = true;
@@ -1908,6 +1907,7 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
{
struct kbase_device *kbdev =
container_of(work, struct kbase_device, csf.firmware_reload_work);
+ unsigned long flags;
int err;
dev_info(kbdev->dev, "reloading firmware");
@@ -1930,7 +1930,9 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
return;
/* Reboot the firmware */
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_csf_firmware_enable_mcu(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
void kbase_csf_firmware_trigger_reload(struct kbase_device *kbdev)
@@ -2301,8 +2303,9 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
init_rwsem(&kbdev->csf.pmode_sync_sem);
kbdev->csf.glb_init_request_pending = true;
+ init_rwsem(&kbdev->csf.pmode_sync_sem);
mutex_init(&kbdev->csf.reg_lock);
- kbase_csf_pending_gpuq_kicks_init(kbdev);
+ kbase_csf_pending_gpuq_kick_queues_init(kbdev);
kbdev->csf.fw = (struct kbase_csf_mcu_fw){ .data = NULL };
@@ -2311,7 +2314,7 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
{
- kbase_csf_pending_gpuq_kicks_term(kbdev);
+ kbase_csf_pending_gpuq_kick_queues_term(kbdev);
mutex_destroy(&kbdev->csf.reg_lock);
}
@@ -2841,6 +2844,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int
return wait_for_global_request_with_timeout(kbdev, GLB_REQ_PING_MASK, wait_timeout_ms);
}
+
int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout)
{
const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface;
@@ -2944,6 +2948,7 @@ void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *iface = &kbdev->csf.global_iface;
+ lockdep_assert_held(&kbdev->hwaccess_lock);
/* Clear the HALT bit before triggering the boot of MCU firmware */
kbase_csf_firmware_global_input_mask(iface, GLB_REQ, 0, GLB_REQ_HALT_MASK);
@@ -3223,6 +3228,9 @@ void kbase_csf_firmware_mcu_shared_mapping_term(struct kbase_device *kbdev,
}
if (csf_mapping->phys) {
+ /* This is on module unload path, so the pages can be left uncleared before
+ * returning them back to kbdev memory pool.
+ */
kbase_mem_pool_free_pages(&kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
csf_mapping->num_pages, csf_mapping->phys, false, false);
}
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware.h b/mali_kbase/csf/mali_kbase_csf_firmware.h
index db8a1df..ee91637 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware.h
+++ b/mali_kbase/csf/mali_kbase_csf_firmware.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -625,6 +625,7 @@ void kbase_csf_firmware_trigger_mcu_sleep(struct kbase_device *kbdev);
bool kbase_csf_firmware_is_mcu_in_sleep(struct kbase_device *kbdev);
#endif
+
/**
* kbase_csf_firmware_trigger_reload() - Trigger the reboot of MCU firmware, for
* the cold boot case firmware image would
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
index 70c70d2..59c6be4 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_cfg.c
@@ -382,10 +382,10 @@ int kbase_csf_firmware_cfg_fw_wa_init(struct kbase_device *kbdev)
*/
entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks-ext");
- if (entry_count == -EINVAL)
+ if (entry_count < 0)
entry_count = of_property_count_u32_elems(kbdev->dev->of_node, "quirks_ext");
- if (entry_count == -EINVAL || entry_count == -ENODATA)
+ if (entry_count < 0)
return 0;
entry_bytes = (size_t)entry_count * sizeof(u32);
diff --git a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
index 8edf031..28554cd 100644
--- a/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
+++ b/mali_kbase/csf/mali_kbase_csf_firmware_no_mali.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -857,11 +857,11 @@ static void kbase_csf_firmware_reload_worker(struct work_struct *work)
container_of(work, struct kbase_device, csf.firmware_reload_work);
unsigned long flags;
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
/* Reboot the firmware */
kbase_csf_firmware_enable_mcu(kbdev);
/* Tell MCU state machine to transit to next state */
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->csf.firmware_reloaded = true;
kbase_pm_update_state(kbdev);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
@@ -934,7 +934,7 @@ static u32 convert_dur_to_idle_count(struct kbase_device *kbdev, const u32 dur_n
/* add the source flag */
reg_val_u32 = GLB_IDLE_TIMER_TIMER_SOURCE_SET(
reg_val_u32, (src_system_timestamp ? GLB_IDLE_TIMER_TIMER_SOURCE_SYSTEM_TIMESTAMP :
- GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER));
+ GLB_IDLE_TIMER_TIMER_SOURCE_GPU_COUNTER));
return reg_val_u32;
}
@@ -1121,14 +1121,14 @@ int kbase_csf_firmware_early_init(struct kbase_device *kbdev)
init_rwsem(&kbdev->csf.pmode_sync_sem);
mutex_init(&kbdev->csf.reg_lock);
- kbase_csf_pending_gpuq_kicks_init(kbdev);
+ kbase_csf_pending_gpuq_kick_queues_init(kbdev);
return 0;
}
void kbase_csf_firmware_early_term(struct kbase_device *kbdev)
{
- kbase_csf_pending_gpuq_kicks_term(kbdev);
+ kbase_csf_pending_gpuq_kick_queues_term(kbdev);
mutex_destroy(&kbdev->csf.reg_lock);
}
@@ -1310,6 +1310,7 @@ int kbase_csf_firmware_ping_wait(struct kbase_device *const kbdev, unsigned int
return wait_for_global_request(kbdev, GLB_REQ_PING_MASK);
}
+
int kbase_csf_firmware_set_timeout(struct kbase_device *const kbdev, u64 const timeout)
{
const struct kbase_csf_global_iface *const global_iface = &kbdev->csf.global_iface;
@@ -1372,6 +1373,8 @@ void kbase_csf_firmware_trigger_mcu_halt(struct kbase_device *kbdev)
void kbase_csf_firmware_enable_mcu(struct kbase_device *kbdev)
{
+ lockdep_assert_held(&kbdev->hwaccess_lock);
+
/* Trigger the boot of MCU firmware, Use the AUTO mode as
* otherwise on fast reset, to exit protected mode, MCU will
* not reboot by itself to enter normal mode.
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.c b/mali_kbase/csf/mali_kbase_csf_kcpu.c
index ca080cf..214c4da 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,13 +39,7 @@
static DEFINE_SPINLOCK(kbase_csf_fence_lock);
#endif
-#ifdef CONFIG_MALI_FENCE_DEBUG
#define FENCE_WAIT_TIMEOUT_MS 3000
-#endif
-
-static void kcpu_queue_process(struct kbase_kcpu_command_queue *kcpu_queue, bool drain_queue);
-
-static void kcpu_queue_process_worker(struct kthread_work *data);
static int kbase_kcpu_map_import_prepare(struct kbase_kcpu_command_queue *kcpu_queue,
struct base_kcpu_command_import_info *import_info,
@@ -445,6 +439,16 @@ static void kbase_kcpu_jit_allocate_finish(struct kbase_kcpu_command_queue *queu
kfree(cmd->info.jit_alloc.info);
}
+static void enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue)
+{
+ struct kbase_context *const kctx = queue->kctx;
+
+ if (!atomic_read(&kctx->prioritized))
+ kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker, &queue->work);
+ else
+ kbase_csf_scheduler_enqueue_kcpuq_work(queue);
+}
+
/**
* kbase_kcpu_jit_retry_pending_allocs() - Retry blocked JIT_ALLOC commands
*
@@ -464,7 +468,7 @@ static void kbase_kcpu_jit_retry_pending_allocs(struct kbase_context *kctx)
* kbase_csf_kcpu_queue_context.jit_lock .
*/
list_for_each_entry(blocked_queue, &kctx->csf.kcpu_queues.jit_blocked_queues, jit_blocked)
- kthread_queue_work(&blocked_queue->csf_kcpu_worker, &blocked_queue->work);
+ enqueue_kcpuq_work(blocked_queue);
}
static int kbase_kcpu_jit_free_process(struct kbase_kcpu_command_queue *queue,
@@ -716,7 +720,7 @@ static enum kbase_csf_event_callback_action event_cqs_callback(void *param)
{
struct kbase_kcpu_command_queue *kcpu_queue = (struct kbase_kcpu_command_queue *)param;
- kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work);
+ enqueue_kcpuq_work(kcpu_queue);
return KBASE_CSF_EVENT_CALLBACK_KEEP;
}
@@ -1317,7 +1321,7 @@ static void kbase_csf_fence_wait_callback(struct dma_fence *fence, struct dma_fe
fence->seqno);
/* Resume kcpu command queue processing. */
- kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work);
+ enqueue_kcpuq_work(kcpu_queue);
}
static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_queue,
@@ -1353,7 +1357,6 @@ static void kbasep_kcpu_fence_wait_cancel(struct kbase_kcpu_command_queue *kcpu_
fence_info->fence = NULL;
}
-#ifdef CONFIG_MALI_FENCE_DEBUG
/**
* fence_timeout_callback() - Timeout callback function for fence-wait
*
@@ -1392,7 +1395,7 @@ static void fence_timeout_callback(struct timer_list *timer)
kbase_sync_fence_info_get(fence, &info);
if (info.status == 1) {
- kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->work);
+ enqueue_kcpuq_work(kcpu_queue);
} else if (info.status == 0) {
dev_warn(kctx->kbdev->dev, "fence has not yet signalled in %ums",
FENCE_WAIT_TIMEOUT_MS);
@@ -1421,7 +1424,6 @@ static void fence_wait_timeout_start(struct kbase_kcpu_command_queue *cmd)
{
mod_timer(&cmd->fence_timeout, jiffies + msecs_to_jiffies(FENCE_WAIT_TIMEOUT_MS));
}
-#endif
/**
* kbase_kcpu_fence_wait_process() - Process the kcpu fence wait command
@@ -1460,9 +1462,8 @@ static int kbase_kcpu_fence_wait_process(struct kbase_kcpu_command_queue *kcpu_q
fence_status = cb_err;
if (cb_err == 0) {
kcpu_queue->fence_wait_processed = true;
-#ifdef CONFIG_MALI_FENCE_DEBUG
- fence_wait_timeout_start(kcpu_queue);
-#endif
+ if (IS_ENABLED(CONFIG_MALI_FENCE_DEBUG))
+ fence_wait_timeout_start(kcpu_queue);
} else if (cb_err == -ENOENT) {
fence_status = dma_fence_get_status(fence);
if (!fence_status) {
@@ -1683,7 +1684,7 @@ static void fence_signal_timeout_cb(struct timer_list *timer)
if (atomic_read(&kcpu_queue->fence_signal_pending_cnt) > 1)
fence_signal_timeout_start(kcpu_queue);
- kthread_queue_work(&kcpu_queue->csf_kcpu_worker, &kcpu_queue->timeout_work);
+ kthread_queue_work(&kctx->csf.kcpu_queues.csf_kcpu_worker, &kcpu_queue->timeout_work);
}
}
@@ -1964,7 +1965,7 @@ static void kcpu_queue_process_worker(struct kthread_work *data)
container_of(data, struct kbase_kcpu_command_queue, work);
mutex_lock(&queue->lock);
- kcpu_queue_process(queue, false);
+ kbase_csf_kcpu_queue_process(queue, false);
mutex_unlock(&queue->lock);
}
@@ -1997,7 +1998,7 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
/* Drain the remaining work for this queue first and go past
* all the waits.
*/
- kcpu_queue_process(queue, true);
+ kbase_csf_kcpu_queue_process(queue, true);
/* All commands should have been processed */
WARN_ON(queue->num_pending_cmds);
@@ -2012,11 +2013,20 @@ static int delete_queue(struct kbase_context *kctx, u32 id)
mutex_unlock(&queue->lock);
- kbase_destroy_kworker_stack(&queue->csf_kcpu_worker);
+ kthread_cancel_work_sync(&queue->timeout_work);
+ /*
+ * Drain a pending request to process this queue in
+ * kbase_csf_scheduler_kthread() if any. By this point the
+ * queue would be empty so this would be a no-op.
+ */
+ kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev,
+ &queue->pending_kick);
+
+ kthread_cancel_work_sync(&queue->work);
mutex_destroy(&queue->lock);
- kfree(queue);
+ vfree(queue);
} else {
dev_dbg(kctx->kbdev->dev, "Attempt to delete a non-existent KCPU queue");
mutex_unlock(&kctx->csf.kcpu_queues.lock);
@@ -2069,7 +2079,7 @@ KBASE_TLSTREAM_TL_KBASE_KCPUQUEUE_EXECUTE_JIT_FREE_END(struct kbase_device *kbde
KBASE_TLSTREAM_TL_KBASE_ARRAY_END_KCPUQUEUE_EXECUTE_JIT_FREE_END(kbdev, queue);
}
-static void kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue)
+void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue)
{
struct kbase_device *kbdev = queue->kctx->kbdev;
bool process_next = true;
@@ -2632,7 +2642,7 @@ int kbase_csf_kcpu_queue_enqueue(struct kbase_context *kctx,
}
queue->num_pending_cmds += enq->nr_commands;
- kcpu_queue_process(queue, false);
+ kbase_csf_kcpu_queue_process(queue, false);
}
out:
@@ -2643,6 +2653,12 @@ out:
int kbase_csf_kcpu_queue_context_init(struct kbase_context *kctx)
{
+ int ret = kbase_kthread_run_worker_rt(kctx->kbdev, &kctx->csf.kcpu_queues.csf_kcpu_worker, "csf_kcpu_worker");
+ if (ret) {
+ dev_err(kctx->kbdev->dev, "Failed to initialize KCPU worker");
+ return ret;
+ }
+
mutex_init(&kctx->csf.kcpu_queues.lock);
return 0;
}
@@ -2659,6 +2675,7 @@ void kbase_csf_kcpu_queue_context_term(struct kbase_context *kctx)
}
mutex_destroy(&kctx->csf.kcpu_queues.lock);
+ kbase_destroy_kworker_stack(&kctx->csf.kcpu_queues.csf_kcpu_worker);
}
KBASE_EXPORT_TEST_API(kbase_csf_kcpu_queue_context_term);
@@ -2668,15 +2685,42 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
return delete_queue(kctx, (u32)del->id);
}
+static struct kbase_kcpu_dma_fence_meta *
+kbase_csf_kcpu_queue_metadata_new(struct kbase_context *kctx, u64 fence_context)
+{
+ int n;
+ struct kbase_kcpu_dma_fence_meta *metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
+
+ if (!metadata)
+ goto early_ret;
+
+ *metadata = (struct kbase_kcpu_dma_fence_meta){
+ .kbdev = kctx->kbdev,
+ .kctx_id = kctx->id,
+ };
+
+ /* Please update MAX_TIMELINE_NAME macro when making changes to the string. */
+ n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu",
+ kctx->kbdev->id, kctx->tgid, kctx->id, fence_context);
+ if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
+ kfree(metadata);
+ metadata = NULL;
+ goto early_ret;
+ }
+
+ kbase_refcount_set(&metadata->refcount, 1);
+
+early_ret:
+ return metadata;
+}
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_csf_kcpu_queue_metadata_new, ERRNO_NULL);
+
int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu_queue_new *newq)
{
struct kbase_kcpu_command_queue *queue;
+ struct kbase_kcpu_dma_fence_meta *metadata;
int idx;
- int n;
int ret = 0;
-#if IS_ENABLED(CONFIG_SYNC_FILE)
- struct kbase_kcpu_dma_fence_meta *metadata;
-#endif
/* The queue id is of u8 type and we use the index of the kcpu_queues
* array as an id, so the number of elements in the array can't be
* more than 256.
@@ -2696,61 +2740,51 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu
goto out;
}
- queue = kzalloc(sizeof(*queue), GFP_KERNEL);
-
+ queue = vzalloc(sizeof(*queue));
if (!queue) {
ret = -ENOMEM;
goto out;
}
- ret = kbase_kthread_run_worker_rt(kctx->kbdev, &queue->csf_kcpu_worker, "csf_kcpu_%i", idx);
-
- if (ret) {
- kfree(queue);
- goto out;
- }
+ *queue = (struct kbase_kcpu_command_queue)
+ {
+ .kctx = kctx, .start_offset = 0, .num_pending_cmds = 0, .enqueue_failed = false,
+ .command_started = false, .has_error = false, .id = idx,
+#if IS_ENABLED(CONFIG_SYNC_FILE)
+ .fence_context = dma_fence_context_alloc(1), .fence_seqno = 0,
+ .fence_wait_processed = false,
+#endif /* IS_ENABLED(CONFIG_SYNC_FILE) */
+ };
mutex_init(&queue->lock);
- queue->kctx = kctx;
- queue->start_offset = 0;
- queue->num_pending_cmds = 0;
-#if IS_ENABLED(CONFIG_SYNC_FILE)
- queue->fence_context = dma_fence_context_alloc(1);
- queue->fence_seqno = 0;
- queue->fence_wait_processed = false;
-
- metadata = kzalloc(sizeof(*metadata), GFP_KERNEL);
- if (!metadata) {
- kbase_destroy_kworker_stack(&queue->csf_kcpu_worker);
- kfree(queue);
- ret = -ENOMEM;
- goto out;
- }
- metadata->kbdev = kctx->kbdev;
- metadata->kctx_id = kctx->id;
- n = snprintf(metadata->timeline_name, MAX_TIMELINE_NAME, "%u-%d_%u-%llu-kcpu",
- kctx->kbdev->id, kctx->tgid, kctx->id, queue->fence_context);
- if (WARN_ON(n >= MAX_TIMELINE_NAME)) {
- kbase_destroy_kworker_stack(&queue->csf_kcpu_worker);
- kfree(queue);
- kfree(metadata);
- ret = -EINVAL;
- goto out;
- }
- kbase_refcount_set(&metadata->refcount, 1);
- queue->metadata = metadata;
- atomic_inc(&kctx->kbdev->live_fence_metadata);
-#endif /* CONFIG_SYNC_FILE */
- queue->enqueue_failed = false;
- queue->command_started = false;
+ INIT_LIST_HEAD(&queue->high_prio_work);
+ atomic_set(&queue->pending_kick, 0);
INIT_LIST_HEAD(&queue->jit_blocked);
- queue->has_error = false;
+
kthread_init_work(&queue->work, kcpu_queue_process_worker);
kthread_init_work(&queue->timeout_work, kcpu_queue_timeout_worker);
- queue->id = idx;
+ if (IS_ENABLED(CONFIG_SYNC_FILE)) {
+ metadata = kbase_csf_kcpu_queue_metadata_new(kctx, queue->fence_context);
+ if (!metadata) {
+ vfree(queue);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ queue->metadata = metadata;
+ atomic_inc(&kctx->kbdev->live_fence_metadata);
+ atomic_set(&queue->fence_signal_pending_cnt, 0);
+ kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb);
+ }
+
+ if (IS_ENABLED(CONFIG_MALI_FENCE_DEBUG))
+ kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback);
+
+ bitmap_set(kctx->csf.kcpu_queues.in_use, (unsigned int)idx, 1);
+ kctx->csf.kcpu_queues.array[idx] = queue;
newq->id = idx;
/* Fire the tracepoint with the mutex held to enforce correct ordering
@@ -2760,16 +2794,6 @@ int kbase_csf_kcpu_queue_new(struct kbase_context *kctx, struct kbase_ioctl_kcpu
queue->num_pending_cmds);
KBASE_KTRACE_ADD_CSF_KCPU(kctx->kbdev, KCPU_QUEUE_CREATE, queue, queue->fence_context, 0);
-#ifdef CONFIG_MALI_FENCE_DEBUG
- kbase_timer_setup(&queue->fence_timeout, fence_timeout_callback);
-#endif
-
-#if IS_ENABLED(CONFIG_SYNC_FILE)
- atomic_set(&queue->fence_signal_pending_cnt, 0);
- kbase_timer_setup(&queue->fence_signal_timeout, fence_signal_timeout_cb);
-#endif
- bitmap_set(kctx->csf.kcpu_queues.in_use, idx, 1);
- kctx->csf.kcpu_queues.array[idx] = queue;
out:
mutex_unlock(&kctx->csf.kcpu_queues.lock);
diff --git a/mali_kbase/csf/mali_kbase_csf_kcpu.h b/mali_kbase/csf/mali_kbase_csf_kcpu.h
index 4afe251..85d7644 100644
--- a/mali_kbase/csf/mali_kbase_csf_kcpu.h
+++ b/mali_kbase/csf/mali_kbase_csf_kcpu.h
@@ -240,15 +240,25 @@ struct kbase_kcpu_command {
* @kctx: The context to which this command queue belongs.
* @commands: Array of commands which have been successfully
* enqueued to this command queue.
- * @csf_kcpu_worker: Dedicated worker for processing kernel CPU command
- * queues.
* @work: struct kthread_work which contains a pointer to
* the function which handles processing of kcpu
* commands enqueued into a kcpu command queue;
* part of kernel API for processing workqueues
+ * This would be used if the context is not
+ * prioritised, otherwise it would be handled by
+ * kbase_csf_scheduler_kthread().
* @timeout_work: struct kthread_work which contains a pointer to the
* function which handles post-timeout actions
* queue when a fence signal timeout occurs.
+ * @high_prio_work: A counterpart to @work, this queue would be
+ * added to a list to be processed by
+ * kbase_csf_scheduler_kthread() if it is
+ * prioritised.
+ * @pending_kick: Indicates that kbase_csf_scheduler_kthread()
+ * should re-evaluate pending commands for this
+ * queue. This would be set to false when the work
+ * is done. This is used mainly for
+ * synchronisation with queue termination.
* @start_offset: Index of the command to be executed next
* @id: KCPU command queue ID.
* @num_pending_cmds: The number of commands enqueued but not yet
@@ -288,9 +298,10 @@ struct kbase_kcpu_command_queue {
struct mutex lock;
struct kbase_context *kctx;
struct kbase_kcpu_command commands[KBASEP_KCPU_QUEUE_SIZE];
- struct kthread_worker csf_kcpu_worker;
struct kthread_work work;
struct kthread_work timeout_work;
+ struct list_head high_prio_work;
+ atomic_t pending_kick;
u8 start_offset;
u8 id;
u16 num_pending_cmds;
@@ -302,9 +313,7 @@ struct kbase_kcpu_command_queue {
bool command_started;
struct list_head jit_blocked;
bool has_error;
-#ifdef CONFIG_MALI_FENCE_DEBUG
struct timer_list fence_timeout;
-#endif /* CONFIG_MALI_FENCE_DEBUG */
#if IS_ENABLED(CONFIG_SYNC_FILE)
struct kbase_kcpu_dma_fence_meta *metadata;
#endif /* CONFIG_SYNC_FILE */
@@ -338,6 +347,18 @@ int kbase_csf_kcpu_queue_delete(struct kbase_context *kctx,
struct kbase_ioctl_kcpu_queue_delete *del);
/**
+ * kbase_csf_kcpu_queue_process - Proces pending KCPU queue commands
+ *
+ * @queue: The queue to process pending commands for
+ * @drain_queue: Whether to skip all blocking commands in the queue.
+ * This is expected to be set to true on queue
+ * termination.
+ *
+ * Return: 0 if successful or a negative error code on failure.
+ */
+void kbase_csf_kcpu_queue_process(struct kbase_kcpu_command_queue *queue, bool drain_queue);
+
+/**
* kbase_csf_kcpu_queue_enqueue - Enqueue a KCPU command into a KCPU command
* queue.
*
diff --git a/mali_kbase/csf/mali_kbase_csf_registers.h b/mali_kbase/csf/mali_kbase_csf_registers.h
index d01f307..9a7c6e4 100644
--- a/mali_kbase/csf/mali_kbase_csf_registers.h
+++ b/mali_kbase/csf/mali_kbase_csf_registers.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -250,7 +250,7 @@
#define GLB_ACK 0x0000 /* () Global acknowledge */
#define GLB_DB_ACK 0x0008 /* () Global doorbell acknowledge */
-#define GLB_HALT_STATUS 0x0010 /* () Global halt status */
+#define GLB_FATAL_STATUS 0x0010 /* () Global fatal error status */
#define GLB_PRFCNT_STATUS 0x0014 /* () Performance counter status */
#define GLB_PRFCNT_INSERT 0x0018 /* () Performance counter buffer insert index */
#define GLB_DEBUG_FWUTF_RESULT GLB_DEBUG_ARG_OUT0 /* () Firmware debug test result */
@@ -1422,6 +1422,12 @@
#define GLB_REQ_PRFCNT_OVERFLOW_SET(reg_val, value) \
(((reg_val) & ~GLB_REQ_PRFCNT_OVERFLOW_MASK) | \
(((value) << GLB_REQ_PRFCNT_OVERFLOW_SHIFT) & GLB_REQ_PRFCNT_OVERFLOW_MASK))
+#define GLB_ACK_FATAL_SHIFT GPU_U(27)
+#define GLB_ACK_FATAL_MASK (GPU_U(0x1) << GLB_ACK_FATAL_SHIFT)
+#define GLB_ACK_FATAL_GET(reg_val) (((reg_val)&GLB_ACK_FATAL_MASK) >> GLB_ACK_FATAL_SHIFT)
+#define GLB_ACK_FATAL_SET(reg_val, value) \
+ (~(~(reg_val) | GLB_ACK_FATAL_MASK) | \
+ (((value) << GLB_ACK_FATAL_SHIFT) & GLB_ACK_FATAL_MASK))
#define GLB_REQ_DEBUG_CSF_REQ_SHIFT 30
#define GLB_REQ_DEBUG_CSF_REQ_MASK (0x1 << GLB_REQ_DEBUG_CSF_REQ_SHIFT)
#define GLB_REQ_DEBUG_CSF_REQ_GET(reg_val) \
@@ -1822,6 +1828,20 @@
(((reg_val) & ~GLB_DEBUG_REQ_RUN_MODE_MASK) | \
(((value) << GLB_DEBUG_REQ_RUN_MODE_SHIFT) & GLB_DEBUG_REQ_RUN_MODE_MASK))
+/* GLB_FATAL_STATUS register */
+#define GLB_FATAL_STATUS_VALUE_SHIFT GPU_U(0)
+#define GLB_FATAL_STATUS_VALUE_MASK (GPU_U(0xFFFFFFFF) << GLB_FATAL_STATUS_VALUE_SHIFT)
+#define GLB_FATAL_STATUS_VALUE_GET(reg_val) \
+ (((reg_val)&GLB_FATAL_STATUS_VALUE_MASK) >> GLB_FATAL_STATUS_VALUE_SHIFT)
+
+enum glb_fatal_status {
+ GLB_FATAL_STATUS_VALUE_OK,
+ GLB_FATAL_STATUS_VALUE_ASSERT,
+ GLB_FATAL_STATUS_VALUE_UNEXPECTED_EXCEPTION,
+ GLB_FATAL_STATUS_VALUE_HANG,
+ GLB_FATAL_STATUS_VALUE_COUNT
+};
+
/* GLB_DEBUG_ACK register */
#define GLB_DEBUG_ACK_DEBUG_RUN_SHIFT GPU_U(23)
#define GLB_DEBUG_ACK_DEBUG_RUN_MASK (GPU_U(0x1) << GLB_DEBUG_ACK_DEBUG_RUN_SHIFT)
diff --git a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
index c722c96..f1ddae4 100644
--- a/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
+++ b/mali_kbase/csf/mali_kbase_csf_reset_gpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2019-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2019-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -240,6 +240,8 @@ void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
{
struct kbase_csf_global_iface *global_iface = &kbdev->csf.global_iface;
+ unsigned long flags;
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbase_io_history_dump(kbdev);
dev_err(kbdev->dev, "MCU state:");
dev_err(kbdev->dev, "Register state:");
@@ -273,6 +275,7 @@ void kbase_csf_debug_dump_registers(struct kbase_device *kbdev)
kbase_csf_firmware_global_input_read(global_iface, GLB_REQ),
kbase_csf_firmware_global_output(global_iface, GLB_ACK));
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
}
/**
@@ -418,6 +421,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini
*/
if (likely(firmware_inited))
kbase_csf_scheduler_reset(kbdev);
+
cancel_work_sync(&kbdev->csf.firmware_reload_work);
dev_dbg(kbdev->dev, "Disable GPU hardware counters.\n");
@@ -425,6 +429,7 @@ static int kbase_csf_reset_gpu_now(struct kbase_device *kbdev, bool firmware_ini
kbase_hwcnt_context_disable(kbdev->hwcnt_gpu_ctx);
ret = kbase_csf_reset_gpu_once(kbdev, firmware_inited, silent);
+
if (ret == SOFT_RESET_FAILED) {
dev_err(kbdev->dev, "Soft-reset failed");
goto err;
@@ -518,6 +523,13 @@ static void kbase_csf_reset_gpu_worker(struct work_struct *data)
bool kbase_prepare_to_reset_gpu(struct kbase_device *kbdev, unsigned int flags)
{
+#ifdef CONFIG_MALI_ARBITER_SUPPORT
+ if (kbase_pm_is_gpu_lost(kbdev)) {
+ /* GPU access has been removed, reset will be done by Arbiter instead */
+ return false;
+ }
+#endif
+
if (flags & RESET_FLAGS_HWC_UNRECOVERABLE_ERROR)
kbase_hwcnt_backend_csf_on_unrecoverable_error(&kbdev->hwcnt_gpu_iface);
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.c b/mali_kbase/csf/mali_kbase_csf_scheduler.c
index 22a08a6..f1ff8bf 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.c
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -39,6 +39,7 @@
#include "mali_kbase_csf_tiler_heap_reclaim.h"
#include "mali_kbase_csf_mcu_shared_reg.h"
#include <linux/version_compat_defs.h>
+#include <hwcnt/mali_kbase_hwcnt_context.h>
#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
#include <mali_kbase_gpu_metrics.h>
#include <csf/mali_kbase_csf_trace_buffer.h>
@@ -91,7 +92,8 @@ scheduler_get_protm_enter_async_group(struct kbase_device *const kbdev,
struct kbase_queue_group *const group);
static struct kbase_queue_group *get_tock_top_group(struct kbase_csf_scheduler *const scheduler);
static void scheduler_enable_tick_timer_nolock(struct kbase_device *kbdev);
-static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask);
+static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask,
+ bool reset);
static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool system_suspend);
static void schedule_in_cycle(struct kbase_queue_group *group, bool force);
static bool queue_group_scheduled_locked(struct kbase_queue_group *group);
@@ -795,7 +797,8 @@ static void update_on_slot_queues_offsets(struct kbase_device *kbdev)
static void enqueue_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
{
atomic_set(&scheduler->gpu_no_longer_idle, false);
- queue_work(scheduler->idle_wq, &scheduler->gpu_idle_work);
+ atomic_inc(&scheduler->pending_gpu_idle_work);
+ complete(&scheduler->kthread_signal);
}
bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
@@ -807,7 +810,8 @@ bool kbase_csf_scheduler_process_gpu_idle_event(struct kbase_device *kbdev)
lockdep_assert_held(&kbdev->hwaccess_lock);
lockdep_assert_held(&scheduler->interrupt_lock);
- can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev);
+ can_suspend_on_idle = kbase_pm_idle_groups_sched_suspendable(kbdev) &&
+ !kbase_pm_is_mcu_inactive(kbdev, kbdev->pm.backend.mcu_state);
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_EVENT_CAN_SUSPEND, NULL,
(((u64)can_suspend_on_idle) << 32));
@@ -2423,6 +2427,11 @@ static void cancel_tock_work(struct kbase_csf_scheduler *const scheduler)
atomic_set(&scheduler->pending_tock_work, false);
}
+static void cancel_gpu_idle_work(struct kbase_csf_scheduler *const scheduler)
+{
+ atomic_set(&scheduler->pending_gpu_idle_work, false);
+}
+
static void remove_group_from_runnable(struct kbase_csf_scheduler *const scheduler,
struct kbase_queue_group *group,
enum kbase_csf_group_state run_state)
@@ -3148,8 +3157,9 @@ static void sched_evict_group(struct kbase_queue_group *group, bool fault,
static int term_group_sync(struct kbase_queue_group *group)
{
struct kbase_device *kbdev = group->kctx->kbdev;
- const unsigned int fw_timeout_ms = kbase_get_timeout_ms(kbdev, CSF_FIRMWARE_TIMEOUT);
- long remaining = kbase_csf_timeout_in_jiffies(fw_timeout_ms);
+ const unsigned int group_term_timeout_ms =
+ kbase_get_timeout_ms(kbdev, CSF_CSG_TERM_TIMEOUT);
+ long remaining = kbase_csf_timeout_in_jiffies(group_term_timeout_ms);
int err = 0;
term_csg_slot(group);
@@ -3166,7 +3176,7 @@ static int term_group_sync(struct kbase_queue_group *group)
dev_warn(
kbdev->dev,
"[%llu] term request timeout (%d ms) for group %d of context %d_%d on slot %d",
- kbase_backend_get_cycle_cnt(kbdev), fw_timeout_ms, group->handle,
+ kbase_backend_get_cycle_cnt(kbdev), group_term_timeout_ms, group->handle,
group->kctx->tgid, group->kctx->id, group->csg_nr);
if (kbase_csf_firmware_ping_wait(kbdev, FW_PING_AFTER_ERROR_TIMEOUT_MS))
error_type = DF_PING_REQUEST_TIMEOUT;
@@ -4824,8 +4834,9 @@ static int suspend_active_groups_on_powerdown(struct kbase_device *kbdev, bool s
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
DECLARE_BITMAP(slot_mask, MAX_SUPPORTED_CSGS) = { 0 };
+ int ret;
- int ret = suspend_active_queue_groups(kbdev, slot_mask);
+ ret = suspend_active_queue_groups(kbdev, slot_mask, false);
if (unlikely(ret)) {
const int csg_nr = ffs(slot_mask[0]) - 1;
@@ -5016,14 +5027,14 @@ static bool scheduler_suspend_on_idle(struct kbase_device *kbdev)
return true;
}
-static void gpu_idle_worker(struct work_struct *work)
+static void gpu_idle_worker(struct kbase_device *kbdev)
{
- struct kbase_device *kbdev =
- container_of(work, struct kbase_device, csf.scheduler.gpu_idle_work);
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
bool scheduler_is_idle_suspendable = false;
bool all_groups_suspended = false;
+ WARN_ON_ONCE(atomic_read(&scheduler->pending_gpu_idle_work) == 0);
+
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_START, NULL, 0u);
#define __ENCODE_KTRACE_INFO(reset, idle, all_suspend) \
@@ -5033,7 +5044,7 @@ static void gpu_idle_worker(struct work_struct *work)
dev_warn(kbdev->dev, "Quit idle for failing to prevent gpu reset.\n");
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GPU_IDLE_WORKER_END, NULL,
__ENCODE_KTRACE_INFO(true, false, false));
- return;
+ goto exit;
}
kbase_debug_csf_fault_wait_completion(kbdev);
rt_mutex_lock(&scheduler->lock);
@@ -5042,7 +5053,7 @@ static void gpu_idle_worker(struct work_struct *work)
if (unlikely(scheduler->state == SCHED_BUSY)) {
rt_mutex_unlock(&scheduler->lock);
kbase_reset_gpu_allow(kbdev);
- return;
+ goto exit;
}
#endif
@@ -5067,6 +5078,9 @@ static void gpu_idle_worker(struct work_struct *work)
__ENCODE_KTRACE_INFO(false, scheduler_is_idle_suspendable,
all_groups_suspended));
#undef __ENCODE_KTRACE_INFO
+
+exit:
+ atomic_dec(&scheduler->pending_gpu_idle_work);
}
static int scheduler_prepare(struct kbase_device *kbdev)
@@ -5691,7 +5705,9 @@ exit_no_schedule_unlock:
kbase_reset_gpu_allow(kbdev);
}
-static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask)
+
+static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long *slot_mask,
+ bool reset)
{
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
u32 num_groups = kbdev->csf.global_iface.group_num;
@@ -5704,12 +5720,12 @@ static int suspend_active_queue_groups(struct kbase_device *kbdev, unsigned long
struct kbase_queue_group *group = scheduler->csg_slots[slot_num].resident_group;
if (group) {
- suspend_queue_group(group);
+ suspend_queue_group(group);
set_bit(slot_num, slot_mask);
}
}
- ret = wait_csg_slots_suspend(kbdev, slot_mask);
+ ret = wait_csg_slots_suspend(kbdev, slot_mask);
return ret;
}
@@ -5722,7 +5738,7 @@ static int suspend_active_queue_groups_on_reset(struct kbase_device *kbdev)
rt_mutex_lock(&scheduler->lock);
- ret = suspend_active_queue_groups(kbdev, slot_mask);
+ ret = suspend_active_queue_groups(kbdev, slot_mask, true);
if (ret) {
dev_warn(
@@ -5864,9 +5880,9 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
/* Cancel any potential queued delayed work(s) */
- cancel_work_sync(&kbdev->csf.scheduler.gpu_idle_work);
cancel_tick_work(scheduler);
cancel_tock_work(scheduler);
+ cancel_gpu_idle_work(scheduler);
cancel_delayed_work_sync(&scheduler->ping_work);
rt_mutex_lock(&scheduler->lock);
@@ -5894,12 +5910,13 @@ static void scheduler_inner_reset(struct kbase_device *kbdev)
void kbase_csf_scheduler_reset(struct kbase_device *kbdev)
{
struct kbase_context *kctx;
-
WARN_ON(!kbase_reset_gpu_is_active(kbdev));
KBASE_KTRACE_ADD(kbdev, SCHEDULER_RESET_START, NULL, 0u);
- kbase_debug_csf_fault_wait_completion(kbdev);
+ if (kbase_reset_gpu_is_active(kbdev))
+ kbase_debug_csf_fault_wait_completion(kbdev);
+
if (scheduler_handle_reset_in_protected_mode(kbdev) &&
!suspend_active_queue_groups_on_reset(kbdev)) {
@@ -6488,8 +6505,8 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
* check_group_sync_update_worker() - Check the sync wait condition for all the
* blocked queue groups
*
- * @work: Pointer to the context-specific work item for evaluating the wait
- * condition for all the queue groups in idle_wait_groups list.
+ * @kctx: The context to evaluate the wait condition for all the queue groups
+ * in idle_wait_groups list.
*
* This function checks the gpu queues of all the groups present in both
* idle_wait_groups list of a context and all on slot idle groups (if GPU
@@ -6499,25 +6516,14 @@ static void check_sync_update_in_sleep_mode(struct kbase_device *kbdev)
* runnable groups so that Scheduler can consider scheduling the group
* in next tick or exit protected mode.
*/
-static void check_group_sync_update_worker(struct kthread_work *work)
+static void check_group_sync_update_worker(struct kbase_context *kctx)
{
- struct kbase_context *const kctx =
- container_of(work, struct kbase_context, csf.sched.sync_update_work);
struct kbase_device *const kbdev = kctx->kbdev;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
bool sync_updated = false;
rt_mutex_lock(&scheduler->lock);
-#if IS_ENABLED(CONFIG_DEBUG_FS)
- if (unlikely(scheduler->state == SCHED_BUSY)) {
- kthread_queue_work(&kctx->csf.sched.sync_update_worker,
- &kctx->csf.sched.sync_update_work);
- rt_mutex_unlock(&scheduler->lock);
- return;
- }
-#endif
-
KBASE_KTRACE_ADD(kbdev, SCHEDULER_GROUP_SYNC_UPDATE_WORKER_START, kctx, 0u);
if (kctx->csf.sched.num_idle_wait_grps != 0) {
struct kbase_queue_group *group, *temp;
@@ -6558,8 +6564,7 @@ static enum kbase_csf_event_callback_action check_group_sync_update_cb(void *par
KBASE_KTRACE_ADD(kctx->kbdev, SCHEDULER_GROUP_SYNC_UPDATE_EVENT, kctx, 0u);
- kthread_queue_work(&kctx->csf.sched.sync_update_worker,
- &kctx->csf.sched.sync_update_work);
+ kbase_csf_scheduler_enqueue_sync_update_work(kctx);
return KBASE_CSF_EVENT_CALLBACK_KEEP;
}
@@ -6570,6 +6575,8 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
int err;
struct kbase_device *kbdev = kctx->kbdev;
+ WARN_ON_ONCE(!kbdev->csf.scheduler.kthread_running);
+
#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
err = gpu_metrics_ctx_init(kctx);
if (err)
@@ -6582,16 +6589,7 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
INIT_LIST_HEAD(&kctx->csf.sched.idle_wait_groups);
- err = kbase_kthread_run_worker_rt(kctx->kbdev, &kctx->csf.sched.sync_update_worker, "csf_sync_update");
- if (err) {
- dev_err(kctx->kbdev->dev,
- "Failed to initialize scheduler context workqueue");
- err = -ENOMEM;
- goto alloc_wq_failed;
- }
-
- kthread_init_work(&kctx->csf.sched.sync_update_work,
- check_group_sync_update_worker);
+ INIT_LIST_HEAD(&kctx->csf.sched.sync_update_work);
kbase_csf_tiler_heap_reclaim_ctx_init(kctx);
@@ -6605,8 +6603,6 @@ int kbase_csf_scheduler_context_init(struct kbase_context *kctx)
return err;
event_wait_add_failed:
- kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker);
-alloc_wq_failed:
kbase_ctx_sched_remove_ctx(kctx);
#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
gpu_metrics_ctx_term(kctx);
@@ -6617,8 +6613,10 @@ alloc_wq_failed:
void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
{
kbase_csf_event_wait_remove(kctx, check_group_sync_update_cb, kctx);
- kthread_cancel_work_sync(&kctx->csf.sched.sync_update_work);
- kbase_destroy_kworker_stack(&kctx->csf.sched.sync_update_worker);
+
+ /* Drain a pending SYNC_UPDATE work if any */
+ kbase_csf_scheduler_wait_for_kthread_pending_work(kctx->kbdev,
+ &kctx->csf.pending_sync_update);
kbase_ctx_sched_remove_ctx(kctx);
#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
@@ -6626,53 +6624,157 @@ void kbase_csf_scheduler_context_term(struct kbase_context *kctx)
#endif /* CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD */
}
+static void handle_pending_sync_update_works(struct kbase_csf_scheduler *scheduler)
+{
+ struct kbase_context *sync_update_ctx;
+
+ if (atomic_cmpxchg(&scheduler->pending_sync_update_works, true, false) == false)
+ return;
+
+ do {
+ unsigned long flags;
+
+ spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags);
+ sync_update_ctx = NULL;
+ if (!list_empty(&scheduler->sync_update_work_ctxs)) {
+ sync_update_ctx = list_first_entry(&scheduler->sync_update_work_ctxs,
+ struct kbase_context,
+ csf.sched.sync_update_work);
+ list_del_init(&sync_update_ctx->csf.sched.sync_update_work);
+ }
+ spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags);
+
+ if (sync_update_ctx != NULL) {
+ WARN_ON_ONCE(atomic_read(&sync_update_ctx->csf.pending_sync_update) == 0);
+ check_group_sync_update_worker(sync_update_ctx);
+ atomic_dec(&sync_update_ctx->csf.pending_sync_update);
+ }
+ } while (sync_update_ctx != NULL);
+}
+
+static void handle_pending_protm_requests(struct kbase_csf_scheduler *scheduler)
+{
+ struct kbase_queue_group *protm_grp;
+
+ if (atomic_cmpxchg(&scheduler->pending_protm_event_works, true, false) == false)
+ return;
+
+ do {
+ unsigned long flags;
+
+ spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags);
+ protm_grp = NULL;
+ if (!list_empty(&scheduler->protm_event_work_grps)) {
+ protm_grp = list_first_entry(&scheduler->protm_event_work_grps,
+ struct kbase_queue_group, protm_event_work);
+ list_del_init(&protm_grp->protm_event_work);
+ }
+ spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags);
+
+ if (protm_grp != NULL) {
+ WARN_ON_ONCE(atomic_read(&protm_grp->pending_protm_event_work) == 0);
+ kbase_csf_process_protm_event_request(protm_grp);
+ atomic_dec(&protm_grp->pending_protm_event_work);
+ }
+ } while (protm_grp != NULL);
+}
+
+static void handle_pending_kcpuq_commands(struct kbase_csf_scheduler *scheduler)
+{
+ struct kbase_kcpu_command_queue *kcpuq;
+
+ if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, true, false) == false)
+ return;
+
+ do {
+ unsigned long flags;
+
+ spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags);
+ kcpuq = NULL;
+ if (!list_empty(&scheduler->kcpuq_work_queues)) {
+ kcpuq = list_first_entry(&scheduler->kcpuq_work_queues,
+ struct kbase_kcpu_command_queue, high_prio_work);
+ list_del_init(&kcpuq->high_prio_work);
+ }
+ spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags);
+
+ if (kcpuq != NULL) {
+ WARN_ON_ONCE(atomic_read(&kcpuq->pending_kick) == 0);
+
+ mutex_lock(&kcpuq->lock);
+ kbase_csf_kcpu_queue_process(kcpuq, false);
+ mutex_unlock(&kcpuq->lock);
+
+ atomic_dec(&kcpuq->pending_kick);
+ }
+ } while (kcpuq != NULL);
+}
+
+static void handle_pending_queue_kicks(struct kbase_device *kbdev)
+{
+ struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
+ struct kbase_queue *queue;
+
+ if (atomic_cmpxchg(&kbdev->csf.pending_gpuq_kicks, true, false) == false)
+ return;
+
+ do {
+ u8 prio;
+
+ spin_lock(&kbdev->csf.pending_gpuq_kick_queues_lock);
+ queue = NULL;
+ for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) {
+ if (!list_empty(&kbdev->csf.pending_gpuq_kick_queues[prio])) {
+ queue = list_first_entry(&kbdev->csf.pending_gpuq_kick_queues[prio],
+ struct kbase_queue, pending_kick_link);
+ list_del_init(&queue->pending_kick_link);
+ break;
+ }
+ }
+ spin_unlock(&kbdev->csf.pending_gpuq_kick_queues_lock);
+
+ if (queue != NULL) {
+ WARN_ONCE(
+ prio != queue->group_priority,
+ "Queue %pK has priority %u but instead its kick was handled at priority %u",
+ (void *)queue, queue->group_priority, prio);
+ WARN_ON_ONCE(atomic_read(&queue->pending_kick) == 0);
+
+ kbase_csf_process_queue_kick(queue);
+
+ /* Perform a scheduling tock for high-priority queue groups if
+ * required.
+ */
+ BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0);
+ BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1);
+ if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) &&
+ atomic_read(&scheduler->pending_tock_work))
+ schedule_on_tock(kbdev);
+ }
+ } while (queue != NULL);
+}
+
static int kbase_csf_scheduler_kthread(void *data)
{
struct kbase_device *const kbdev = data;
struct kbase_csf_scheduler *const scheduler = &kbdev->csf.scheduler;
while (scheduler->kthread_running) {
- struct kbase_queue *queue;
-
if (wait_for_completion_interruptible(&scheduler->kthread_signal) != 0)
continue;
reinit_completion(&scheduler->kthread_signal);
- /* Iterate through queues with pending kicks */
- do {
- u8 prio;
-
- spin_lock(&kbdev->csf.pending_gpuq_kicks_lock);
- queue = NULL;
- for (prio = 0; prio != KBASE_QUEUE_GROUP_PRIORITY_COUNT; ++prio) {
- if (!list_empty(&kbdev->csf.pending_gpuq_kicks[prio])) {
- queue = list_first_entry(
- &kbdev->csf.pending_gpuq_kicks[prio],
- struct kbase_queue, pending_kick_link);
- list_del_init(&queue->pending_kick_link);
- break;
- }
- }
- spin_unlock(&kbdev->csf.pending_gpuq_kicks_lock);
-
- if (queue != NULL) {
- WARN_ONCE(
- prio != queue->group_priority,
- "Queue %pK has priority %hhu but instead its kick was handled at priority %hhu",
- (void *)queue, queue->group_priority, prio);
-
- kbase_csf_process_queue_kick(queue);
+ /*
+ * The order in which these requests are handled is based on
+ * how they would influence each other's decisions. As a
+ * result, the tick & tock requests must be handled after all
+ * other requests, but before the GPU IDLE work.
+ */
- /* Perform a scheduling tock for high-priority queue groups if
- * required.
- */
- BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_REALTIME != 0);
- BUILD_BUG_ON(KBASE_QUEUE_GROUP_PRIORITY_HIGH != 1);
- if ((prio <= KBASE_QUEUE_GROUP_PRIORITY_HIGH) &&
- atomic_read(&scheduler->pending_tock_work))
- schedule_on_tock(kbdev);
- }
- } while (queue != NULL);
+ handle_pending_sync_update_works(scheduler);
+ handle_pending_protm_requests(scheduler);
+ handle_pending_kcpuq_commands(scheduler);
+ handle_pending_queue_kicks(kbdev);
/* Check if we need to perform a scheduling tick/tock. A tick
* event shall override a tock event but not vice-versa.
@@ -6684,6 +6786,10 @@ static int kbase_csf_scheduler_kthread(void *data)
schedule_on_tock(kbdev);
}
+ /* Drain pending GPU idle works */
+ while (atomic_read(&scheduler->pending_gpu_idle_work) > 0)
+ gpu_idle_worker(kbdev);
+
dev_dbg(kbdev->dev, "Waking up for event after a scheduling iteration.");
wake_up_all(&kbdev->csf.event_wait);
@@ -6756,12 +6862,6 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
atomic_set(&scheduler->timer_enabled, true);
- scheduler->idle_wq = alloc_ordered_workqueue("csf_scheduler_gpu_idle_wq", WQ_HIGHPRI);
- if (!scheduler->idle_wq) {
- dev_err(kbdev->dev, "Failed to allocate GPU idle scheduler workqueue\n");
- return -ENOMEM;
- }
-
INIT_DEFERRABLE_WORK(&scheduler->ping_work, firmware_aliveness_monitor);
rt_mutex_init(&scheduler->lock);
@@ -6779,18 +6879,23 @@ int kbase_csf_scheduler_early_init(struct kbase_device *kbdev)
KBASE_KTRACE_ADD(kbdev, SCHED_SUSPENDED, NULL, scheduler->state);
scheduler->csg_scheduling_period_ms = CSF_SCHEDULER_TIME_TICK_MS;
scheduler_doorbell_init(kbdev);
-
- INIT_WORK(&scheduler->gpu_idle_work, gpu_idle_worker);
- scheduler->fast_gpu_idle_handling = false;
- atomic_set(&scheduler->gpu_no_longer_idle, false);
- atomic_set(&scheduler->non_idle_offslot_grps, 0);
-
hrtimer_init(&scheduler->tick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
scheduler->tick_timer.function = tick_timer_callback;
- kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
+ atomic_set(&scheduler->pending_sync_update_works, false);
+ spin_lock_init(&scheduler->sync_update_work_ctxs_lock);
+ INIT_LIST_HEAD(&scheduler->sync_update_work_ctxs);
+ atomic_set(&scheduler->pending_protm_event_works, false);
+ spin_lock_init(&scheduler->protm_event_work_grps_lock);
+ INIT_LIST_HEAD(&scheduler->protm_event_work_grps);
+ atomic_set(&scheduler->pending_kcpuq_works, false);
+ spin_lock_init(&scheduler->kcpuq_work_queues_lock);
+ INIT_LIST_HEAD(&scheduler->kcpuq_work_queues);
+ atomic_set(&scheduler->pending_tick_work, false);
+ atomic_set(&scheduler->pending_tock_work, false);
+ atomic_set(&scheduler->pending_gpu_idle_work, 0);
- return 0;
+ return kbase_csf_tiler_heap_reclaim_mgr_init(kbdev);
}
void kbase_csf_scheduler_term(struct kbase_device *kbdev)
@@ -6811,7 +6916,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
* to be active at the time of Driver unload.
*/
WARN_ON(kbase_csf_scheduler_get_nr_active_csgs(kbdev));
- flush_work(&kbdev->csf.scheduler.gpu_idle_work);
rt_mutex_lock(&kbdev->csf.scheduler.lock);
if (kbdev->csf.scheduler.state != SCHED_SUSPENDED) {
@@ -6838,9 +6942,6 @@ void kbase_csf_scheduler_term(struct kbase_device *kbdev)
void kbase_csf_scheduler_early_term(struct kbase_device *kbdev)
{
- if (kbdev->csf.scheduler.idle_wq)
- destroy_workqueue(kbdev->csf.scheduler.idle_wq);
-
kbase_csf_tiler_heap_reclaim_mgr_term(kbdev);
}
@@ -7121,6 +7222,65 @@ int kbase_csf_scheduler_handle_runtime_suspend(struct kbase_device *kbdev)
return 0;
}
+void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx)
+{
+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+ unsigned long flags;
+
+ spin_lock_irqsave(&scheduler->sync_update_work_ctxs_lock, flags);
+ if (list_empty(&kctx->csf.sched.sync_update_work)) {
+ list_add_tail(&kctx->csf.sched.sync_update_work, &scheduler->sync_update_work_ctxs);
+ atomic_inc(&kctx->csf.pending_sync_update);
+ if (atomic_cmpxchg(&scheduler->pending_sync_update_works, false, true) == false)
+ complete(&scheduler->kthread_signal);
+ }
+ spin_unlock_irqrestore(&scheduler->sync_update_work_ctxs_lock, flags);
+}
+
+void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group)
+{
+ struct kbase_context *const kctx = group->kctx;
+ struct kbase_csf_scheduler *const scheduler = &kctx->kbdev->csf.scheduler;
+ unsigned long flags;
+
+ spin_lock_irqsave(&scheduler->protm_event_work_grps_lock, flags);
+ if (list_empty(&group->protm_event_work)) {
+ list_add_tail(&group->protm_event_work, &scheduler->protm_event_work_grps);
+ atomic_inc(&group->pending_protm_event_work);
+ if (atomic_cmpxchg(&scheduler->pending_protm_event_works, false, true) == false)
+ complete(&scheduler->kthread_signal);
+ }
+ spin_unlock_irqrestore(&scheduler->protm_event_work_grps_lock, flags);
+}
+
+void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue)
+{
+ struct kbase_csf_scheduler *const scheduler = &queue->kctx->kbdev->csf.scheduler;
+ unsigned long flags;
+
+ spin_lock_irqsave(&scheduler->kcpuq_work_queues_lock, flags);
+ if (list_empty(&queue->high_prio_work)) {
+ list_add_tail(&queue->high_prio_work, &scheduler->kcpuq_work_queues);
+ atomic_inc(&queue->pending_kick);
+ if (atomic_cmpxchg(&scheduler->pending_kcpuq_works, false, true) == false)
+ complete(&scheduler->kthread_signal);
+ }
+ spin_unlock_irqrestore(&scheduler->kcpuq_work_queues_lock, flags);
+}
+
+void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev,
+ atomic_t *pending)
+{
+ /*
+ * Signal kbase_csf_scheduler_kthread() to allow for the
+ * eventual completion of the current iteration. Once the work is
+ * done, the event_wait wait queue shall be signalled.
+ */
+
+ complete(&kbdev->csf.scheduler.kthread_signal);
+ wait_event(kbdev->csf.event_wait, atomic_read(pending) == 0);
+}
+
void kbase_csf_scheduler_reval_idleness_post_sleep(struct kbase_device *kbdev)
{
u32 csg_nr;
diff --git a/mali_kbase/csf/mali_kbase_csf_scheduler.h b/mali_kbase/csf/mali_kbase_csf_scheduler.h
index b299235..a0bf8bc 100644
--- a/mali_kbase/csf/mali_kbase_csf_scheduler.h
+++ b/mali_kbase/csf/mali_kbase_csf_scheduler.h
@@ -235,7 +235,8 @@ void kbase_csf_scheduler_early_term(struct kbase_device *kbdev);
* No explicit re-initialization is done for CSG & CS interface I/O pages;
* instead, that happens implicitly on firmware reload.
*
- * Should be called only after initiating the GPU reset.
+ * Should be called either after initiating the GPU reset or when MCU reset is
+ * expected to follow such as GPU_LOST case.
*/
void kbase_csf_scheduler_reset(struct kbase_device *kbdev);
@@ -488,6 +489,48 @@ static inline bool kbase_csf_scheduler_all_csgs_idle(struct kbase_device *kbdev)
}
/**
+ * kbase_csf_scheduler_enqueue_sync_update_work() - Add a context to the list
+ * of contexts to handle
+ * SYNC_UPDATE events.
+ *
+ * @kctx: The context to handle SYNC_UPDATE event
+ *
+ * This function wakes up kbase_csf_scheduler_kthread() to handle pending
+ * SYNC_UPDATE events for all contexts.
+ */
+void kbase_csf_scheduler_enqueue_sync_update_work(struct kbase_context *kctx);
+
+/**
+ * kbase_csf_scheduler_enqueue_protm_event_work() - Add a group to the list
+ * of groups to handle
+ * PROTM requests.
+ *
+ * @group: The group to handle protected mode request
+ *
+ * This function wakes up kbase_csf_scheduler_kthread() to handle pending
+ * protected mode requests for all groups.
+ */
+void kbase_csf_scheduler_enqueue_protm_event_work(struct kbase_queue_group *group);
+
+/**
+ * kbase_csf_scheduler_enqueue_kcpuq_work() - Wake up kbase_csf_scheduler_kthread() to process
+ * pending commands for a KCPU queue.
+ *
+ * @queue: The queue to process pending commands for
+ */
+void kbase_csf_scheduler_enqueue_kcpuq_work(struct kbase_kcpu_command_queue *queue);
+
+/**
+ * kbase_csf_scheduler_wait_for_kthread_pending_work - Wait until a pending work has completed in
+ * kbase_csf_scheduler_kthread().
+ *
+ * @kbdev: Instance of a GPU platform device that implements a CSF interface
+ * @pending: The work to wait for
+ */
+void kbase_csf_scheduler_wait_for_kthread_pending_work(struct kbase_device *kbdev,
+ atomic_t *pending);
+
+/**
* kbase_csf_scheduler_invoke_tick() - Invoke the scheduling tick
*
* @kbdev: Pointer to the device
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c
index 00b4ebf..4a90f29 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.c
@@ -331,8 +331,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_scan_free_pages(struct kbase_d
static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker *s,
struct shrink_control *sc)
{
- struct kbase_device *kbdev =
- container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
+ struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER(
+ s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
return kbase_csf_tiler_heap_reclaim_count_free_pages(kbdev, sc);
}
@@ -340,8 +340,8 @@ static unsigned long kbase_csf_tiler_heap_reclaim_count_objects(struct shrinker
static unsigned long kbase_csf_tiler_heap_reclaim_scan_objects(struct shrinker *s,
struct shrink_control *sc)
{
- struct kbase_device *kbdev =
- container_of(s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
+ struct kbase_device *kbdev = KBASE_GET_KBASE_DATA_FROM_SHRINKER(
+ s, struct kbase_device, csf.scheduler.reclaim_mgr.heap_reclaim);
return kbase_csf_tiler_heap_reclaim_scan_free_pages(kbdev, sc);
}
@@ -352,11 +352,17 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx)
INIT_LIST_HEAD(&kctx->csf.sched.heap_info.mgr_link);
}
-void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
+int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
{
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
- struct shrinker *reclaim = &scheduler->reclaim_mgr.heap_reclaim;
u8 prio;
+ struct shrinker *reclaim;
+
+ reclaim =
+ KBASE_INIT_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, "mali-csf-tiler-heap");
+ if (!reclaim)
+ return -ENOMEM;
+ KBASE_SET_RECLAIM(&(scheduler->reclaim_mgr), heap_reclaim, reclaim);
for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
prio++)
@@ -367,13 +373,10 @@ void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev)
reclaim->seeks = HEAP_SHRINKER_SEEKS;
reclaim->batch = HEAP_SHRINKER_BATCH;
-#if !defined(CONFIG_MALI_VECTOR_DUMP)
-#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
- register_shrinker(reclaim);
-#else
- register_shrinker(reclaim, "mali-csf-tiler-heap");
-#endif
-#endif
+ if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP))
+ KBASE_REGISTER_SHRINKER(reclaim, "mali-csf-tiler-heap", kbdev);
+
+ return 0;
}
void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev)
@@ -381,9 +384,8 @@ void kbase_csf_tiler_heap_reclaim_mgr_term(struct kbase_device *kbdev)
struct kbase_csf_scheduler *scheduler = &kbdev->csf.scheduler;
u8 prio;
-#if !defined(CONFIG_MALI_VECTOR_DUMP)
- unregister_shrinker(&scheduler->reclaim_mgr.heap_reclaim);
-#endif
+ if (!IS_ENABLED(CONFIG_MALI_VECTOR_DUMP))
+ KBASE_UNREGISTER_SHRINKER(scheduler->reclaim_mgr.heap_reclaim);
for (prio = KBASE_QUEUE_GROUP_PRIORITY_REALTIME; prio < KBASE_QUEUE_GROUP_PRIORITY_COUNT;
prio++)
diff --git a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h
index 7880de0..d41b7ba 100644
--- a/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h
+++ b/mali_kbase/csf/mali_kbase_csf_tiler_heap_reclaim.h
@@ -66,8 +66,10 @@ void kbase_csf_tiler_heap_reclaim_ctx_init(struct kbase_context *kctx);
* @kbdev: Pointer to the device.
*
* This function must be called only when a kbase device is initialized.
+ *
+ * Return: 0 if issuing reclaim_mgr init was successful, otherwise an error code.
*/
-void kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev);
+int kbase_csf_tiler_heap_reclaim_mgr_init(struct kbase_device *kbdev);
/**
* kbase_csf_tiler_heap_reclaim_mgr_term - Termination call for the tiler heap reclaim manger.
diff --git a/mali_kbase/csf/mali_kbase_csf_tl_reader.c b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
index 06163e5..4ee64e1 100644
--- a/mali_kbase/csf/mali_kbase_csf_tl_reader.c
+++ b/mali_kbase/csf/mali_kbase_csf_tl_reader.c
@@ -152,13 +152,22 @@ static bool tl_reader_overflow_check(struct kbase_csf_tl_reader *self, u16 event
*
* Reset the reader to the default state, i.e. set all the
* mutable fields to zero.
+ *
+ * NOTE: this function expects the irq spinlock to be held.
*/
static void tl_reader_reset(struct kbase_csf_tl_reader *self)
{
+ lockdep_assert_held(&self->read_lock);
+
self->got_first_event = false;
self->is_active = false;
self->expected_event_id = 0;
self->tl_header.btc = 0;
+
+ /* There might be data left in the trace buffer from the previous
+ * tracing session. We don't want it to leak into this session.
+ */
+ kbase_csf_firmware_trace_buffer_discard_all(self->trace_buffer);
}
int kbase_csf_tl_reader_flush_buffer(struct kbase_csf_tl_reader *self)
@@ -325,21 +334,16 @@ static int tl_reader_update_enable_bit(struct kbase_csf_tl_reader *self, bool va
void kbase_csf_tl_reader_init(struct kbase_csf_tl_reader *self, struct kbase_tlstream *stream)
{
- self->timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT;
+ *self = (struct kbase_csf_tl_reader){
+ .timer_interval = KBASE_CSF_TL_READ_INTERVAL_DEFAULT,
+ .stream = stream,
+ .kbdev = NULL, /* This will be initialized by tl_reader_init_late() */
+ .is_active = false,
+ };
kbase_timer_setup(&self->read_timer, kbasep_csf_tl_reader_read_callback);
- self->stream = stream;
-
- /* This will be initialized by tl_reader_init_late() */
- self->kbdev = NULL;
- self->trace_buffer = NULL;
- self->tl_header.data = NULL;
- self->tl_header.size = 0;
-
spin_lock_init(&self->read_lock);
-
- tl_reader_reset(self);
}
void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self)
@@ -349,13 +353,19 @@ void kbase_csf_tl_reader_term(struct kbase_csf_tl_reader *self)
int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_device *kbdev)
{
+ unsigned long flags;
int rcode;
+ spin_lock_irqsave(&self->read_lock, flags);
+
/* If already running, early exit. */
- if (self->is_active)
+ if (self->is_active) {
+ spin_unlock_irqrestore(&self->read_lock, flags);
return 0;
+ }
if (tl_reader_init_late(self, kbdev)) {
+ spin_unlock_irqrestore(&self->read_lock, flags);
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
dev_warn(kbdev->dev, "CSFFW timeline is not available for MALI_NO_MALI builds!");
return 0;
@@ -367,6 +377,9 @@ int kbase_csf_tl_reader_start(struct kbase_csf_tl_reader *self, struct kbase_dev
tl_reader_reset(self);
self->is_active = true;
+
+ spin_unlock_irqrestore(&self->read_lock, flags);
+
/* Set bytes to copy to the header size. This is to trigger copying
* of the header to the user space.
*/
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
index f3ffa9d..dd1a1cc 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.c
@@ -525,6 +525,14 @@ void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace
}
EXPORT_SYMBOL(kbase_csf_firmware_trace_buffer_discard);
+void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer)
+{
+ if (WARN_ON(!trace_buffer))
+ return;
+
+ *(trace_buffer->cpu_va.extract_cpu_va) = *(trace_buffer->cpu_va.insert_cpu_va);
+}
+
static void update_trace_buffer_active_mask64(struct firmware_trace_buffer *tb, u64 mask)
{
unsigned int i;
diff --git a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
index 6188dcf..6b66f2b 100644
--- a/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
+++ b/mali_kbase/csf/mali_kbase_csf_trace_buffer.h
@@ -181,6 +181,15 @@ unsigned int kbase_csf_firmware_trace_buffer_read_data(struct firmware_trace_buf
void kbase_csf_firmware_trace_buffer_discard(struct firmware_trace_buffer *trace_buffer);
/**
+ * kbase_csf_firmware_trace_buffer_discard_all - Discard all data from a trace buffer
+ *
+ * @trace_buffer: Trace buffer handle
+ *
+ * Discard all the data in the trace buffer to make it empty.
+ */
+void kbase_csf_firmware_trace_buffer_discard_all(struct firmware_trace_buffer *trace_buffer);
+
+/**
* kbase_csf_firmware_trace_buffer_get_active_mask64 - Get trace buffer active mask
*
* @tb: Trace buffer handle
diff --git a/mali_kbase/csf/mali_kbase_csf_util.c b/mali_kbase/csf/mali_kbase_csf_util.c
index 7dc32a1..504379e 100644
--- a/mali_kbase/csf/mali_kbase_csf_util.c
+++ b/mali_kbase/csf/mali_kbase_csf_util.c
@@ -115,7 +115,7 @@ struct kbasep_printer *kbasep_printer_buffer_init(struct kbase_device *kbdev,
if (kbpr) {
if (kfifo_alloc(&kbpr->fifo, KBASEP_PRINTER_BUFFER_MAX_SIZE, GFP_KERNEL)) {
- kfree(kbpr);
+ vfree(kbpr);
return NULL;
}
kbpr->kbdev = kbdev;
diff --git a/mali_kbase/device/backend/mali_kbase_device_csf.c b/mali_kbase/device/backend/mali_kbase_device_csf.c
index 2333e08..77f356e 100644
--- a/mali_kbase/device/backend/mali_kbase_device_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_csf.c
@@ -279,10 +279,8 @@ static const struct kbase_device_init dev_init[] = {
{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ kbase_get_irqs, NULL, "IRQ search failed" },
-#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
-#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
{ registers_map, registers_unmap, "Register map failed" },
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
{ kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" },
#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */
diff --git a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
index fc13359..73a33b5 100644
--- a/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
+++ b/mali_kbase/device/backend/mali_kbase_device_hw_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2020-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2020-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -173,6 +173,9 @@ void kbase_gpu_interrupt(struct kbase_device *kbdev, u32 val)
kbase_pm_power_changed(kbdev);
}
+ if (val & MCU_STATUS_GPU_IRQ)
+ wake_up_all(&kbdev->csf.event_wait);
+
KBASE_KTRACE_ADD(kbdev, CORE_GPU_IRQ_DONE, NULL, val);
}
KBASE_EXPORT_TEST_API(kbase_gpu_interrupt);
diff --git a/mali_kbase/device/backend/mali_kbase_device_jm.c b/mali_kbase/device/backend/mali_kbase_device_jm.c
index 01e74eb..9d36394 100644
--- a/mali_kbase/device/backend/mali_kbase_device_jm.c
+++ b/mali_kbase/device/backend/mali_kbase_device_jm.c
@@ -219,10 +219,8 @@ static const struct kbase_device_init dev_init[] = {
{ kbase_gpu_device_create, kbase_gpu_device_destroy, "Dummy model initialization failed" },
#else /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
{ kbase_get_irqs, NULL, "IRQ search failed" },
-#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
-#if !IS_ENABLED(CONFIG_MALI_NO_MALI)
{ registers_map, registers_unmap, "Register map failed" },
-#endif /* !IS_ENABLED(CONFIG_MALI_NO_MALI) */
+#endif /* !IS_ENABLED(CONFIG_MALI_REAL_HW) */
#if IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD)
{ kbase_gpu_metrics_init, kbase_gpu_metrics_term, "GPU metrics initialization failed" },
#endif /* IS_ENABLED(CONFIG_MALI_TRACE_POWER_GPU_WORK_PERIOD) */
diff --git a/mali_kbase/device/mali_kbase_device.h b/mali_kbase/device/mali_kbase_device.h
index 9cca6af..b58f0b5 100644
--- a/mali_kbase/device/mali_kbase_device.h
+++ b/mali_kbase/device/mali_kbase_device.h
@@ -58,6 +58,9 @@ void kbase_increment_device_id(void);
* When a device file is opened for the first time,
* load firmware and initialize hardware counter components.
*
+ * It is safe for this function to be called multiple times without ill
+ * effects. Only the first call would be effective.
+ *
* Return: 0 on success. An error code on failure.
*/
int kbase_device_firmware_init_once(struct kbase_device *kbdev);
diff --git a/mali_kbase/device/mali_kbase_device_hw.c b/mali_kbase/device/mali_kbase_device_hw.c
index da597af..8b20c0b 100644
--- a/mali_kbase/device/mali_kbase_device_hw.c
+++ b/mali_kbase/device/mali_kbase_device_hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -32,7 +32,7 @@ bool kbase_is_gpu_removed(struct kbase_device *kbdev)
if (!IS_ENABLED(CONFIG_MALI_ARBITER_SUPPORT))
return false;
- return (kbase_reg_read32(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0);
+ return (KBASE_REG_READ(kbdev, GPU_CONTROL_ENUM(GPU_ID)) == 0);
}
/**
diff --git a/mali_kbase/hw_access/backend/mali_kbase_hw_access_real_hw.c b/mali_kbase/hw_access/backend/mali_kbase_hw_access_real_hw.c
index f4afbf5..21a4fd1 100644
--- a/mali_kbase/hw_access/backend/mali_kbase_hw_access_real_hw.c
+++ b/mali_kbase/hw_access/backend/mali_kbase_hw_access_real_hw.c
@@ -24,12 +24,13 @@
#include <mali_kbase.h>
#include <hw_access/mali_kbase_hw_access.h>
+#include <linux/mali_hw_access.h>
u64 kbase_reg_get_gpu_id(struct kbase_device *kbdev)
{
u32 val[2] = { 0 };
- val[0] = readl(kbdev->reg);
+ val[0] = mali_readl(kbdev->reg);
return (u64)val[0] | ((u64)val[1] << 32);
@@ -45,7 +46,7 @@ u32 kbase_reg_read32(struct kbase_device *kbdev, u32 reg_enum)
KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_32_BIT)))
return 0;
- val = readl(kbdev->regmap.regs[reg_enum]);
+ val = mali_readl(kbdev->regmap.regs[reg_enum]);
#if IS_ENABLED(CONFIG_DEBUG_FS)
if (unlikely(kbdev->io_history.enabled))
@@ -69,8 +70,8 @@ u64 kbase_reg_read64(struct kbase_device *kbdev, u32 reg_enum)
KBASE_REGMAP_PERM_READ | KBASE_REGMAP_WIDTH_64_BIT)))
return 0;
- val = (u64)readl(kbdev->regmap.regs[reg_enum]) |
- ((u64)readl(kbdev->regmap.regs[reg_enum] + 4) << 32);
+ val = (u64)mali_readl(kbdev->regmap.regs[reg_enum]) |
+ ((u64)mali_readl(kbdev->regmap.regs[reg_enum] + 4) << 32);
#if IS_ENABLED(CONFIG_DEBUG_FS)
if (unlikely(kbdev->io_history.enabled)) {
@@ -101,9 +102,9 @@ u64 kbase_reg_read64_coherent(struct kbase_device *kbdev, u32 reg_enum)
return 0;
do {
- hi1 = readl(kbdev->regmap.regs[reg_enum] + 4);
- lo = readl(kbdev->regmap.regs[reg_enum]);
- hi2 = readl(kbdev->regmap.regs[reg_enum] + 4);
+ hi1 = mali_readl(kbdev->regmap.regs[reg_enum] + 4);
+ lo = mali_readl(kbdev->regmap.regs[reg_enum]);
+ hi2 = mali_readl(kbdev->regmap.regs[reg_enum] + 4);
} while (hi1 != hi2);
val = lo | (((u64)hi1) << 32);
@@ -131,7 +132,7 @@ void kbase_reg_write32(struct kbase_device *kbdev, u32 reg_enum, u32 value)
KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_32_BIT)))
return;
- writel(value, kbdev->regmap.regs[reg_enum]);
+ mali_writel(value, kbdev->regmap.regs[reg_enum]);
#if IS_ENABLED(CONFIG_DEBUG_FS)
if (unlikely(kbdev->io_history.enabled))
@@ -151,8 +152,8 @@ void kbase_reg_write64(struct kbase_device *kbdev, u32 reg_enum, u64 value)
KBASE_REGMAP_PERM_WRITE | KBASE_REGMAP_WIDTH_64_BIT)))
return;
- writel(value & 0xFFFFFFFF, kbdev->regmap.regs[reg_enum]);
- writel(value >> 32, kbdev->regmap.regs[reg_enum] + 4);
+ mali_writel(value & 0xFFFFFFFF, kbdev->regmap.regs[reg_enum]);
+ mali_writel(value >> 32, kbdev->regmap.regs[reg_enum] + 4);
#if IS_ENABLED(CONFIG_DEBUG_FS)
if (unlikely(kbdev->io_history.enabled)) {
diff --git a/mali_kbase/hw_access/mali_kbase_hw_access_regmap.h b/mali_kbase/hw_access/mali_kbase_hw_access_regmap.h
index 9bd646d..1ba2598 100644
--- a/mali_kbase/hw_access/mali_kbase_hw_access_regmap.h
+++ b/mali_kbase/hw_access/mali_kbase_hw_access_regmap.h
@@ -308,6 +308,16 @@
#define TC_CLOCK_GATE_OVERRIDE (1ul << 0)
/* End TILER_CONFIG register */
+/* L2_FEATURES register */
+#define L2_FEATURES_CACHE_SIZE_SHIFT GPU_U(16)
+#define L2_FEATURES_CACHE_SIZE_MASK (GPU_U(0xFF) << L2_FEATURES_CACHE_SIZE_SHIFT)
+#define L2_FEATURES_CACHE_SIZE_GET(reg_val) \
+ (((reg_val)&L2_FEATURES_CACHE_SIZE_MASK) >> L2_FEATURES_CACHE_SIZE_SHIFT)
+#define L2_FEATURES_CACHE_SIZE_SET(reg_val, value) \
+ (~(~(reg_val) | L2_FEATURES_CACHE_SIZE_MASK) | \
+ (((value) << L2_FEATURES_CACHE_SIZE_SHIFT) & L2_FEATURES_CACHE_SIZE_MASK))
+/* End L2_FEATURES register */
+
/* L2_CONFIG register */
#define L2_CONFIG_SIZE_SHIFT 16
#define L2_CONFIG_SIZE_MASK (0xFFul << L2_CONFIG_SIZE_SHIFT)
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
index d7911ae..d1290ca 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -21,7 +21,6 @@
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf.h"
#include "hwcnt/mali_kbase_hwcnt_gpu.h"
-#include "hwcnt/mali_kbase_hwcnt_types.h"
#include <linux/log2.h>
#include <linux/kernel.h>
@@ -255,7 +254,8 @@ struct kbase_hwcnt_csf_physical_layout {
* @hwc_threshold_work: Worker for consuming available samples when
* threshold interrupt raised.
* @num_l2_slices: Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
+ * @powered_shader_core_mask: The common mask between the debug_core_mask
+ * and the shader_present_bitmap.
*/
struct kbase_hwcnt_backend_csf {
struct kbase_hwcnt_backend_csf_info *info;
@@ -283,7 +283,7 @@ struct kbase_hwcnt_backend_csf {
struct work_struct hwc_dump_work;
struct work_struct hwc_threshold_work;
size_t num_l2_slices;
- u64 shader_present_bitmap;
+ u64 powered_shader_core_mask;
};
static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_csf_info *csf_info)
@@ -296,7 +296,7 @@ static bool kbasep_hwcnt_backend_csf_backend_exists(struct kbase_hwcnt_backend_c
}
void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface,
- size_t num_l2_slices, u64 shader_present_bitmap)
+ size_t num_l2_slices, u64 powered_shader_core_mask)
{
struct kbase_hwcnt_backend_csf_info *csf_info;
@@ -313,12 +313,12 @@ void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_inte
return;
if (WARN_ON(num_l2_slices > csf_info->backend->phys_layout.mmu_l2_cnt) ||
- WARN_ON((shader_present_bitmap & csf_info->backend->phys_layout.shader_avail_mask) !=
- shader_present_bitmap))
+ WARN_ON((powered_shader_core_mask & csf_info->backend->phys_layout.shader_avail_mask) !=
+ powered_shader_core_mask))
return;
csf_info->backend->num_l2_slices = num_l2_slices;
- csf_info->backend->shader_present_bitmap = shader_present_bitmap;
+ csf_info->backend->powered_shader_core_mask = powered_shader_core_mask;
}
/**
@@ -424,7 +424,7 @@ static void kbasep_hwcnt_backend_csf_init_layout(
WARN_ON(!prfcnt_info);
WARN_ON(!phys_layout);
- shader_core_cnt = (size_t)fls64(prfcnt_info->core_mask);
+ shader_core_cnt = (size_t)fls64(prfcnt_info->sc_core_mask);
values_per_block = prfcnt_info->prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
fw_block_cnt = div_u64(prfcnt_info->prfcnt_fw_size, prfcnt_info->prfcnt_block_size);
hw_block_cnt = div_u64(prfcnt_info->prfcnt_hw_size, prfcnt_info->prfcnt_block_size);
@@ -445,7 +445,7 @@ static void kbasep_hwcnt_backend_csf_init_layout(
.fw_block_cnt = fw_block_cnt,
.hw_block_cnt = hw_block_cnt,
.block_cnt = fw_block_cnt + hw_block_cnt,
- .shader_avail_mask = prfcnt_info->core_mask,
+ .shader_avail_mask = prfcnt_info->sc_core_mask,
.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.values_per_block = values_per_block,
.counters_per_block = values_per_block - KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
@@ -517,34 +517,21 @@ static void kbasep_hwcnt_backend_csf_update_user_sample(struct kbase_hwcnt_backe
memset(backend_csf->block_states, 0, block_state_bytes);
}
-/**
- * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with
- * information from a sample.
- * @phys_layout: Physical memory layout information of HWC
- * sample buffer.
- * @enable_mask: Counter enable mask for the block whose state is being updated.
- * @enable_state: The CSF backend internal enabled state.
- * @exiting_protm: Whether or not the sample is taken when the GPU is exiting
- * protected mode.
- * @block_idx: Index of block within the ringbuffer.
- * @block_state: Pointer to existing block state of the block whose state is being
- * updated.
- * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling.
- */
-static void kbasep_hwcnt_backend_csf_update_block_state(
- const struct kbase_hwcnt_csf_physical_layout *phys_layout, const u32 enable_mask,
- enum kbase_hwcnt_backend_csf_enable_state enable_state, bool exiting_protm,
- size_t block_idx, blk_stt_t *const block_state, bool fw_in_protected_mode)
+void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend,
+ const u32 enable_mask, bool exiting_protm,
+ size_t block_idx, blk_stt_t *const block_state,
+ bool fw_in_protected_mode)
{
+ const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout;
/* Offset of shader core blocks from the start of the HW blocks in the sample */
size_t shader_core_block_offset =
- (size_t)(phys_layout->hw_block_cnt - phys_layout->shader_cnt);
+ (size_t)(phys_layout->block_cnt - phys_layout->shader_cnt);
bool is_shader_core_block;
- is_shader_core_block = block_idx >= shader_core_block_offset;
+ is_shader_core_block = (block_idx >= shader_core_block_offset);
/* Set power bits for the block state for the block, for the sample */
- switch (enable_state) {
+ switch (backend->enable_state) {
/* Disabled states */
case KBASE_HWCNT_BACKEND_CSF_DISABLED:
case KBASE_HWCNT_BACKEND_CSF_TRANSITIONING_TO_ENABLED:
@@ -592,21 +579,45 @@ static void kbasep_hwcnt_backend_csf_update_block_state(
KBASE_HWCNT_STATE_NORMAL);
else
kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_NORMAL);
+
+ /* powered_shader_core_mask stored in the backend is a combination of
+ * the shader present and the debug core mask, so explicit checking of the
+ * core mask is not required here.
+ */
+ if (is_shader_core_block) {
+ u64 current_shader_core = 1ULL << (block_idx - shader_core_block_offset);
+
+ WARN_ON_ONCE(backend->phys_layout.shader_cnt > 64);
+
+ if (current_shader_core & backend->info->backend->powered_shader_core_mask)
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE);
+ else if (current_shader_core & ~backend->info->backend->powered_shader_core_mask)
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_UNAVAILABLE);
+ else
+ WARN_ON_ONCE(true);
+ }
+ else
+ kbase_hwcnt_block_state_append(block_state, KBASE_HWCNT_STATE_AVAILABLE);
}
-static void kbasep_hwcnt_backend_csf_accumulate_sample(
- const struct kbase_hwcnt_csf_physical_layout *phys_layout, size_t dump_bytes,
- u64 *accum_buf, const u32 *old_sample_buf, const u32 *new_sample_buf,
- blk_stt_t *const block_states, bool clearing_samples,
- enum kbase_hwcnt_backend_csf_enable_state enable_state, bool fw_in_protected_mode)
+static void kbasep_hwcnt_backend_csf_accumulate_sample(struct kbase_hwcnt_backend_csf *backend,
+ const u32 *old_sample_buf,
+ const u32 *new_sample_buf)
{
+ const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend->phys_layout;
+ const size_t dump_bytes = backend->info->prfcnt_info.dump_bytes;
+ const size_t values_per_block = phys_layout->values_per_block;
+ blk_stt_t *const block_states = backend->block_states;
+ const bool fw_in_protected_mode = backend->info->fw_in_protected_mode;
+ const bool clearing_samples = backend->info->prfcnt_info.clearing_samples;
+ u64 *accum_buf = backend->accum_buf;
+
size_t block_idx;
const u32 *old_block = old_sample_buf;
const u32 *new_block = new_sample_buf;
u64 *acc_block = accum_buf;
/* Flag to indicate whether current sample is exiting protected mode. */
bool exiting_protm = false;
- const size_t values_per_block = phys_layout->values_per_block;
/* The block pointers now point to the first HW block, which is always a CSHW/front-end
* block. The counter enable mask for this block can be checked to determine whether this
@@ -620,9 +631,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_sample(
const u32 old_enable_mask = old_block[phys_layout->enable_mask_offset];
const u32 new_enable_mask = new_block[phys_layout->enable_mask_offset];
/* Update block state with information of the current sample */
- kbasep_hwcnt_backend_csf_update_block_state(phys_layout, new_enable_mask,
- enable_state, exiting_protm, block_idx,
- &block_states[block_idx],
+ kbasep_hwcnt_backend_csf_update_block_state(backend, new_enable_mask, exiting_protm,
+ block_idx, &block_states[block_idx],
fw_in_protected_mode);
if (!(new_enable_mask & HWCNT_BLOCK_EMPTY_SAMPLE)) {
@@ -706,7 +716,6 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
u8 *cpu_dump_base = (u8 *)backend_csf->ring_buf_cpu_base;
const size_t ring_buf_cnt = backend_csf->info->ring_buf_cnt;
const size_t buf_dump_bytes = backend_csf->info->prfcnt_info.dump_bytes;
- bool clearing_samples = backend_csf->info->prfcnt_info.clearing_samples;
u32 *old_sample_buf = backend_csf->old_sample_buf;
u32 *new_sample_buf = old_sample_buf;
const struct kbase_hwcnt_csf_physical_layout *phys_layout = &backend_csf->phys_layout;
@@ -740,10 +749,8 @@ static void kbasep_hwcnt_backend_csf_accumulate_samples(struct kbase_hwcnt_backe
const u32 buf_idx = raw_idx & (ring_buf_cnt - 1);
new_sample_buf = (u32 *)&cpu_dump_base[buf_idx * buf_dump_bytes];
- kbasep_hwcnt_backend_csf_accumulate_sample(
- phys_layout, buf_dump_bytes, backend_csf->accum_buf, old_sample_buf,
- new_sample_buf, backend_csf->block_states, clearing_samples,
- backend_csf->enable_state, backend_csf->info->fw_in_protected_mode);
+ kbasep_hwcnt_backend_csf_accumulate_sample(backend_csf, old_sample_buf,
+ new_sample_buf);
old_sample_buf = new_sample_buf;
}
@@ -1457,7 +1464,7 @@ static int kbasep_hwcnt_backend_csf_dump_get(struct kbase_hwcnt_backend *backend
ret = kbase_hwcnt_csf_dump_get(dst, backend_csf->to_user_buf,
backend_csf->to_user_block_states, dst_enable_map,
backend_csf->num_l2_slices,
- backend_csf->shader_present_bitmap, accumulate);
+ backend_csf->powered_shader_core_mask, accumulate);
/* If no error occurred (zero ret value), then update block state for all blocks in the
* accumulation with the current sample's block state.
@@ -2098,7 +2105,7 @@ int kbase_hwcnt_backend_csf_metadata_init(struct kbase_hwcnt_backend_interface *
gpu_info.has_fw_counters = csf_info->prfcnt_info.prfcnt_fw_size > 0;
gpu_info.l2_count = csf_info->prfcnt_info.l2_count;
gpu_info.csg_cnt = csf_info->prfcnt_info.csg_count;
- gpu_info.core_mask = csf_info->prfcnt_info.core_mask;
+ gpu_info.sc_core_mask = csf_info->prfcnt_info.sc_core_mask;
gpu_info.clk_cnt = csf_info->prfcnt_info.clk_cnt;
gpu_info.prfcnt_values_per_block =
csf_info->prfcnt_info.prfcnt_block_size / KBASE_HWCNT_VALUE_HW_BYTES;
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
index 2487db2..1b4e16d 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,8 +30,10 @@
#include "hwcnt/backend/mali_kbase_hwcnt_backend.h"
#include "hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h"
#include "hwcnt/mali_kbase_hwcnt_watchdog_if.h"
+#include "hwcnt/mali_kbase_hwcnt_types.h"
struct kbase_hwcnt_physical_enable_map;
+struct kbase_hwcnt_backend_csf;
/**
* kbase_hwcnt_backend_csf_create() - Create a CSF hardware counter backend
@@ -123,11 +125,12 @@ void kbase_hwcnt_backend_csf_on_before_reset(struct kbase_hwcnt_backend_interfac
* this function is called.
* @iface: Non-NULL pointer to HWC backend interface.
* @num_l2_slices: Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
+ * @powered_shader_core_mask: The common mask between the debug_core_mask
+ * and the shader_present_bitmap.
*/
void kbase_hwcnt_backend_csf_set_hw_availability(struct kbase_hwcnt_backend_interface *iface,
size_t num_l2_slices,
- uint64_t shader_present_bitmap);
+ uint64_t powered_shader_core_mask);
/** kbasep_hwcnt_backend_csf_process_enable_map() - Process the enable_map to
* guarantee headers are
@@ -174,4 +177,21 @@ void kbase_hwcnt_backend_csf_on_prfcnt_enable(struct kbase_hwcnt_backend_interfa
*/
void kbase_hwcnt_backend_csf_on_prfcnt_disable(struct kbase_hwcnt_backend_interface *iface);
+/**
+ * kbasep_hwcnt_backend_csf_update_block_state - Update block state of a block instance with
+ * information from a sample.
+ * @backend: CSF hardware counter backend.
+ * @enable_mask: Counter enable mask for the block whose state is being updated.
+ * @exiting_protm: Whether or not the sample is taken when the GPU is exiting
+ * protected mode.
+ * @block_idx: Index of block within the ringbuffer.
+ * @block_state: Pointer to existing block state of the block whose state is being
+ * updated.
+ * @fw_in_protected_mode: Whether or not GPU is in protected mode during sampling.
+ */
+void kbasep_hwcnt_backend_csf_update_block_state(struct kbase_hwcnt_backend_csf *backend,
+ const u32 enable_mask, bool exiting_protm,
+ size_t block_idx, blk_stt_t *const block_state,
+ bool fw_in_protected_mode);
+
#endif /* _KBASE_HWCNT_BACKEND_CSF_H_ */
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
index 65bb965..4ee2c8a 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if.h
@@ -68,7 +68,7 @@ struct kbase_hwcnt_backend_csf_if_enable {
* @prfcnt_block_size: Bytes of each performance counter block.
* @l2_count: The MMU L2 cache count.
* @csg_count: The total number of CSGs in the system
- * @core_mask: Shader core mask.
+ * @sc_core_mask: Shader core mask.
* @clk_cnt: Clock domain count in the system.
* @clearing_samples: Indicates whether counters are cleared after each sample
* is taken.
@@ -80,7 +80,7 @@ struct kbase_hwcnt_backend_csf_if_prfcnt_info {
size_t prfcnt_block_size;
size_t l2_count;
u32 csg_count;
- u64 core_mask;
+ u64 sc_core_mask;
u8 clk_cnt;
bool clearing_samples;
};
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
index 1b7a116..fe81ce1 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_csf_if_fw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -229,7 +229,7 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
*prfcnt_info = (struct kbase_hwcnt_backend_csf_if_prfcnt_info){
.l2_count = KBASE_DUMMY_MODEL_MAX_MEMSYS_BLOCKS,
- .core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
+ .sc_core_mask = (1ull << KBASE_DUMMY_MODEL_MAX_SHADER_CORES) - 1,
.prfcnt_hw_size =
KBASE_DUMMY_MODEL_MAX_NUM_HARDWARE_BLOCKS * KBASE_DUMMY_MODEL_BLOCK_SIZE,
.prfcnt_fw_size =
@@ -290,12 +290,13 @@ static void kbasep_hwcnt_backend_csf_if_fw_get_prfcnt_info(
.dump_bytes = fw_ctx->buf_bytes,
.prfcnt_block_size = prfcnt_block_size,
.l2_count = kbdev->gpu_props.num_l2_slices,
- .core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props),
+ .sc_core_mask = kbasep_hwcnt_backend_csf_core_mask(&kbdev->gpu_props),
.csg_count = fw_block_count > 1 ? csg_count : 0,
.clk_cnt = fw_ctx->clk_cnt,
.clearing_samples = true,
};
+
/* Block size must be multiple of counter size. */
WARN_ON((prfcnt_info->prfcnt_block_size % KBASE_HWCNT_VALUE_HW_BYTES) != 0);
/* Total size must be multiple of block size. */
@@ -513,10 +514,15 @@ kbasep_hwcnt_backend_csf_if_fw_ring_buf_free(struct kbase_hwcnt_backend_csf_if_c
fw_ring_buf->phys, fw_ring_buf->num_pages, fw_ring_buf->num_pages,
MCU_AS_NR));
+ /* Clear the dump ring_buf content to zeros */
+ memset(fw_ring_buf->cpu_dump_base, 0, fw_ring_buf->num_pages * PAGE_SIZE);
vunmap(fw_ring_buf->cpu_dump_base);
+ /* After zeroing, the ring_buf pages are dirty so need to pass the 'dirty' flag
+ * as true when freeing the pages to the Global pool.
+ */
kbase_mem_pool_free_pages(&fw_ctx->kbdev->mem_pools.small[KBASE_MEM_GROUP_CSF_FW],
- fw_ring_buf->num_pages, fw_ring_buf->phys, false, false);
+ fw_ring_buf->num_pages, fw_ring_buf->phys, true, false);
kfree(fw_ring_buf->phys);
diff --git a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
index 4df7dd4..8b337eb 100644
--- a/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
+++ b/mali_kbase/hwcnt/backend/mali_kbase_hwcnt_backend_jm.c
@@ -165,7 +165,7 @@ static int kbasep_hwcnt_backend_jm_gpu_info_init(struct kbase_device *kbdev,
#endif
info->l2_count = l2_count;
- info->core_mask = core_mask;
+ info->sc_core_mask = core_mask;
info->prfcnt_values_per_block = KBASE_HWCNT_V5_DEFAULT_VALUES_PER_BLOCK;
/* Determine the number of available clock domains. */
@@ -186,7 +186,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf
WARN_ON(!gpu_info);
WARN_ON(!phys_layout);
- shader_core_cnt = fls64(gpu_info->core_mask);
+ shader_core_cnt = fls64(gpu_info->sc_core_mask);
*phys_layout = (struct kbase_hwcnt_jm_physical_layout){
.fe_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT,
@@ -195,7 +195,7 @@ static void kbasep_hwcnt_backend_jm_init_layout(const struct kbase_hwcnt_gpu_inf
.shader_cnt = shader_core_cnt,
.block_cnt = KBASE_HWCNT_V5_FE_BLOCK_COUNT + KBASE_HWCNT_V5_TILER_BLOCK_COUNT +
gpu_info->l2_count + shader_core_cnt,
- .shader_avail_mask = gpu_info->core_mask,
+ .shader_avail_mask = gpu_info->sc_core_mask,
.headers_per_block = KBASE_HWCNT_V5_HEADERS_PER_BLOCK,
.values_per_block = gpu_info->prfcnt_values_per_block,
.counters_per_block =
@@ -384,14 +384,12 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
enable = (struct kbase_instr_hwcnt_enable)
{
- .fe_bm = phys_enable_map.fe_bm,
- .shader_bm = phys_enable_map.shader_bm,
- .tiler_bm = phys_enable_map.tiler_bm,
- .mmu_l2_bm = phys_enable_map.mmu_l2_bm,
+ .fe_bm = phys_enable_map.fe_bm, .shader_bm = phys_enable_map.shader_bm,
+ .tiler_bm = phys_enable_map.tiler_bm, .mmu_l2_bm = phys_enable_map.mmu_l2_bm,
.counter_set = phys_counter_set,
#if IS_ENABLED(CONFIG_MALI_NO_MALI)
/* The dummy model needs the CPU mapping. */
- .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va,
+ .dump_buffer = (uintptr_t)backend_jm->cpu_dump_va,
#else
.dump_buffer = backend_jm->gpu_dump_va,
#endif /* CONFIG_MALI_NO_MALI */
@@ -411,7 +409,7 @@ kbasep_hwcnt_backend_jm_dump_enable_nolock(struct kbase_hwcnt_backend *backend,
backend_jm->debug_core_mask = kbase_pm_ca_get_debug_core_mask(kbdev);
backend_jm->max_l2_slices = backend_jm->info->hwcnt_gpu_info.l2_count;
- backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.core_mask;
+ backend_jm->max_core_mask = backend_jm->info->hwcnt_gpu_info.sc_core_mask;
backend_jm->pm_core_mask = kbase_pm_ca_get_instr_core_mask(kbdev);
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
index 252616d..31a4528 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -169,7 +169,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu
/* Calculate number of block instances that aren't cores */
non_core_block_count = 2 + gpu_info->l2_count;
/* Calculate number of block instances that are shader cores */
- sc_block_count = (size_t)fls64(gpu_info->core_mask);
+ sc_block_count = (size_t)fls64(gpu_info->sc_core_mask);
/* Determine the total number of cores */
core_block_count = sc_block_count;
@@ -277,7 +277,7 @@ static int kbasep_hwcnt_backend_gpu_metadata_create(const struct kbase_hwcnt_gpu
kbase_hwcnt_set_avail_mask(&desc.avail_mask, 0, 0);
kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, 0, non_core_block_count, U64_MAX);
kbase_hwcnt_set_avail_mask_bits(&desc.avail_mask, non_core_block_count, sc_block_count,
- gpu_info->core_mask);
+ gpu_info->sc_core_mask);
return kbase_hwcnt_metadata_create(&desc, metadata);
@@ -294,7 +294,7 @@ static size_t kbasep_hwcnt_backend_jm_dump_bytes(const struct kbase_hwcnt_gpu_in
{
WARN_ON(!gpu_info);
- return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->core_mask)) *
+ return (2 + gpu_info->l2_count + (size_t)fls64(gpu_info->sc_core_mask)) *
gpu_info->prfcnt_values_per_block * KBASE_HWCNT_VALUE_HW_BYTES;
}
@@ -384,6 +384,7 @@ bool kbase_hwcnt_is_block_type_shader(const enum kbase_hwcnt_gpu_v5_block_type b
return false;
}
+
bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type blk_type)
{
if (blk_type == KBASE_HWCNT_GPU_V5_BLOCK_TYPE_PERF_MEMSYS ||
@@ -467,9 +468,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
else
hw_res_available = true;
- /*
- * Skip block if no values in the destination block are enabled.
- */
+ /* Skip block if no values in the destination block are enabled. */
if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) {
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
@@ -593,7 +592,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
blk_stt_t *src_block_stt,
const struct kbase_hwcnt_enable_map *dst_enable_map,
- size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate)
+ size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate)
{
const struct kbase_hwcnt_metadata *metadata;
const u64 *dump_src = src;
@@ -615,9 +614,7 @@ int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
blk_stt_t *dst_blk_stt =
kbase_hwcnt_dump_buffer_block_state_instance(dst, blk, blk_inst);
- /*
- * Skip block if no values in the destination block are enabled.
- */
+ /* Skip block if no values in the destination block are enabled. */
if (kbase_hwcnt_enable_map_block_enabled(dst_enable_map, blk, blk_inst)) {
u64 *dst_blk = kbase_hwcnt_dump_buffer_block_instance(dst, blk, blk_inst);
const u64 *src_blk = dump_src + src_offset;
diff --git a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
index 4339fdd..570aad7 100644
--- a/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
+++ b/mali_kbase/hwcnt/mali_kbase_hwcnt_gpu.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2018-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2018-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -169,7 +169,7 @@ enum kbase_hwcnt_physical_set {
/**
* struct kbase_hwcnt_gpu_info - Information about hwcnt blocks on the GPUs.
* @l2_count: L2 cache count.
- * @core_mask: Shader core mask. May be sparse.
+ * @sc_core_mask: Shader core mask. May be sparse.
* @clk_cnt: Number of clock domains available.
* @csg_cnt: Number of CSGs available.
* @prfcnt_values_per_block: Total entries (header + counters) of performance
@@ -178,7 +178,7 @@ enum kbase_hwcnt_physical_set {
*/
struct kbase_hwcnt_gpu_info {
size_t l2_count;
- u64 core_mask;
+ u64 sc_core_mask;
u8 clk_cnt;
u8 csg_cnt;
size_t prfcnt_values_per_block;
@@ -327,15 +327,16 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
* kbase_hwcnt_csf_dump_get() - Copy or accumulate enabled counters from the raw
* dump buffer in src into the dump buffer
* abstraction in dst.
- * @dst: Non-NULL pointer to destination dump buffer.
- * @src: Non-NULL pointer to source raw dump buffer, of same length
- * as dump_buf_bytes in the metadata of dst dump buffer.
- * @src_block_stt: Non-NULL pointer to source block state buffer.
- * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
- * @num_l2_slices: Current number of L2 slices allocated to the GPU.
- * @shader_present_bitmap: Current shader-present bitmap that is allocated to the GPU.
- * @accumulate: True if counters in src should be accumulated into
- * destination, rather than copied.
+ * @dst: Non-NULL pointer to destination dump buffer.
+ * @src: Non-NULL pointer to source raw dump buffer, of same length
+ * as dump_buf_bytes in the metadata of dst dump buffer.
+ * @src_block_stt: Non-NULL pointer to source block state buffer.
+ * @dst_enable_map: Non-NULL pointer to enable map specifying enabled values.
+ * @num_l2_slices: Current number of L2 slices allocated to the GPU.
+ * @powered_shader_core_mask: The common mask between the debug_core_mask
+ * and the shader_present_bitmap.
+ * @accumulate: True if counters in src should be accumulated into
+ * destination, rather than copied.
*
* The dst and dst_enable_map MUST have been created from the same metadata as
* returned from the call to kbase_hwcnt_csf_metadata_create as was used to get
@@ -346,7 +347,7 @@ int kbase_hwcnt_jm_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
int kbase_hwcnt_csf_dump_get(struct kbase_hwcnt_dump_buffer *dst, u64 *src,
blk_stt_t *src_block_stt,
const struct kbase_hwcnt_enable_map *dst_enable_map,
- size_t num_l2_slices, u64 shader_present_bitmap, bool accumulate);
+ size_t num_l2_slices, u64 powered_shader_core_mask, bool accumulate);
/**
* kbase_hwcnt_backend_gpu_block_map_to_physical() - Convert from a block
@@ -453,6 +454,7 @@ bool kbase_hwcnt_is_block_type_memsys(const enum kbase_hwcnt_gpu_v5_block_type b
bool kbase_hwcnt_is_block_type_tiler(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
bool kbase_hwcnt_is_block_type_fe(const enum kbase_hwcnt_gpu_v5_block_type blk_type);
+
/**
* kbase_hwcnt_gpu_enable_map_from_cm() - Builds enable map abstraction from
* counter selection bitmasks.
diff --git a/mali_kbase/mali_base_hwconfig_features.h b/mali_kbase/mali_base_hwconfig_features.h
index 1f32fc9..dd76be3 100644
--- a/mali_kbase/mali_base_hwconfig_features.h
+++ b/mali_kbase/mali_base_hwconfig_features.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_base_hwconfig_issues.h b/mali_kbase/mali_base_hwconfig_issues.h
index 4426bd7..d01977f 100644
--- a/mali_kbase/mali_base_hwconfig_issues.h
+++ b/mali_kbase/mali_base_hwconfig_issues.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2014-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2014-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -462,22 +462,6 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r0p2
BASE_HW_ISSUE_END
};
-__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tBAx_r1p0[] = {
- BASE_HW_ISSUE_9435,
- BASE_HW_ISSUE_TSIX_2033,
- BASE_HW_ISSUE_TTRX_1337,
- BASE_HW_ISSUE_TTRX_2968_TTRX_3162,
- BASE_HW_ISSUE_TTRX_921,
- BASE_HW_ISSUE_TTRX_3414,
- BASE_HW_ISSUE_TTRX_3083,
- BASE_HW_ISSUE_TTRX_3470,
- BASE_HW_ISSUE_TTRX_3464,
- BASE_HW_ISSUE_TITANHW_2710,
- BASE_HW_ISSUE_GPU2022PRO_148,
- BASE_HW_ISSUE_TITANHW_2938,
- BASE_HW_ISSUE_END
-};
-
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tBAx[] = {
BASE_HW_ISSUE_5736, BASE_HW_ISSUE_9435, BASE_HW_ISSUE_TSIX_2033,
BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_TTRX_3414, BASE_HW_ISSUE_TTRX_3083,
@@ -512,7 +496,13 @@ __attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tGR
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p0[] = {
BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878,
BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
- BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
+ BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
+};
+
+__attribute__((unused)) static const enum base_hw_issue base_hw_issues_tVAx_r0p1[] = {
+ BASE_HW_ISSUE_TSIX_2033, BASE_HW_ISSUE_TTRX_1337, BASE_HW_ISSUE_GPU2019_3878,
+ BASE_HW_ISSUE_GPU2019_3901, BASE_HW_ISSUE_TITANHW_2710, BASE_HW_ISSUE_GPU2022PRO_148,
+ BASE_HW_ISSUE_TITANHW_2938, BASE_HW_ISSUE_END
};
__attribute__((unused)) static const enum base_hw_issue base_hw_issues_model_tVAx[] = {
diff --git a/mali_kbase/mali_kbase_caps.h b/mali_kbase/mali_kbase_caps.h
index a92569d..c458ac1 100644
--- a/mali_kbase/mali_kbase_caps.h
+++ b/mali_kbase/mali_kbase_caps.h
@@ -33,15 +33,22 @@
*
* @MALI_KBASE_CAP_SYSTEM_MONITOR: System Monitor
* @MALI_KBASE_CAP_JIT_PRESSURE_LIMIT: JIT Pressure limit
+ * @MALI_KBASE_CAP_MEM_DONT_NEED: Not needed physical memory
* @MALI_KBASE_CAP_MEM_GROW_ON_GPF: Memory grow on page fault
* @MALI_KBASE_CAP_MEM_PROTECTED: Protected memory
+ * @MALI_KBASE_CAP_MEM_IMPORT_SYNC_ON_MAP_UNMAP: CPU cache maintenance required when
+ * imported GPU memory is mapped/unmapped
+ * @MALI_KBASE_CAP_MEM_KERNEL_SYNC: Kernel side cache sync ops required
* @MALI_KBASE_NUM_CAPS: Delimiter
*/
enum mali_kbase_cap {
MALI_KBASE_CAP_SYSTEM_MONITOR = 0,
MALI_KBASE_CAP_JIT_PRESSURE_LIMIT,
+ MALI_KBASE_CAP_MEM_DONT_NEED,
MALI_KBASE_CAP_MEM_GROW_ON_GPF,
MALI_KBASE_CAP_MEM_PROTECTED,
+ MALI_KBASE_CAP_MEM_IMPORT_SYNC_ON_MAP_UNMAP,
+ MALI_KBASE_CAP_MEM_KERNEL_SYNC,
MALI_KBASE_NUM_CAPS
};
@@ -57,6 +64,11 @@ static inline bool mali_kbase_supports_jit_pressure_limit(unsigned long api_vers
return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_JIT_PRESSURE_LIMIT);
}
+static inline bool mali_kbase_supports_mem_dont_need(unsigned long api_version)
+{
+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_DONT_NEED);
+}
+
static inline bool mali_kbase_supports_mem_grow_on_gpf(unsigned long api_version)
{
return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_GROW_ON_GPF);
@@ -67,4 +79,14 @@ static inline bool mali_kbase_supports_mem_protected(unsigned long api_version)
return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_PROTECTED);
}
+static inline bool mali_kbase_supports_mem_import_sync_on_map_unmap(unsigned long api_version)
+{
+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_IMPORT_SYNC_ON_MAP_UNMAP);
+}
+
+static inline bool mali_kbase_supports_mem_kernel_sync(unsigned long api_version)
+{
+ return mali_kbase_supports_cap(api_version, MALI_KBASE_CAP_MEM_KERNEL_SYNC);
+}
+
#endif /* __KBASE_CAPS_H_ */
diff --git a/mali_kbase/mali_kbase_config_defaults.h b/mali_kbase/mali_kbase_config_defaults.h
index 1bedc6b..6a22826 100644
--- a/mali_kbase/mali_kbase_config_defaults.h
+++ b/mali_kbase/mali_kbase_config_defaults.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2013-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2013-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -206,9 +206,22 @@ enum {
/* pixel: b/319408928 - CSF_CSG_SUSPEND_TIMEOUT_CYCLES is set to 2s@100MHz. */
#define CSF_CSG_SUSPEND_TIMEOUT_CYCLES (200000000ull)
+/* Waiting timeout in clock cycles for GPU suspend to complete. */
+#define CSF_GPU_SUSPEND_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES)
+
/* Waiting timeout in clock cycles for GPU reset to complete. */
#define CSF_GPU_RESET_TIMEOUT_CYCLES (CSF_CSG_SUSPEND_TIMEOUT_CYCLES * 2)
+/* Waiting timeout in clock cycles for a CSG to be terminated.
+ *
+ * Based on 0.6s timeout at 100MHZ, scaled from 0.1s at 600Mhz GPU frequency
+ * which is the timeout defined in FW to wait for iterator to complete the
+ * transitioning to DISABLED state.
+ * More cycles (0.4s @ 100Mhz = 40000000) are added up to ensure that
+ * host timeout is always bigger than FW timeout.
+ */
+#define CSF_CSG_TERM_TIMEOUT_CYCLES (100000000)
+
/* Waiting timeout in clock cycles for GPU firmware to boot.
*
* Based on 250ms timeout at 100MHz, scaled from a 50MHz GPU system.
diff --git a/mali_kbase/mali_kbase_core_linux.c b/mali_kbase/mali_kbase_core_linux.c
index c0d8cf2..d757e8a 100644
--- a/mali_kbase/mali_kbase_core_linux.c
+++ b/mali_kbase/mali_kbase_core_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -109,6 +109,7 @@
#include <linux/clk-provider.h>
#include <linux/delay.h>
#include <linux/log2.h>
+#include <linux/mali_hw_access.h>
#include <mali_kbase_config.h>
@@ -154,13 +155,19 @@ static const struct mali_kbase_capability_def kbase_caps_table[MALI_KBASE_NUM_CA
#if MALI_USE_CSF
{ 1, 0 }, /* SYSTEM_MONITOR */
{ 1, 0 }, /* JIT_PRESSURE_LIMIT */
+ { 1, 22 }, /* MEM_DONT_NEED */
{ 1, 0 }, /* MEM_GROW_ON_GPF */
- { 1, 0 } /* MEM_PROTECTED */
+ { 1, 0 }, /* MEM_PROTECTED */
+ { 1, 26 }, /* MEM_IMPORT_SYNC_ON_MAP_UNMAP */
+ { 1, 26 } /* MEM_KERNEL_SYNC */
#else
{ 11, 15 }, /* SYSTEM_MONITOR */
{ 11, 25 }, /* JIT_PRESSURE_LIMIT */
+ { 11, 40 }, /* MEM_DONT_NEED */
{ 11, 2 }, /* MEM_GROW_ON_GPF */
- { 11, 2 } /* MEM_PROTECTED */
+ { 11, 2 }, /* MEM_PROTECTED */
+ { 11, 43 }, /* MEM_IMPORT_SYNC_ON_MAP_UNMAP */
+ { 11, 43 } /* MEM_KERNEL_SYNC */
#endif
};
@@ -1623,6 +1630,12 @@ static int kbasep_cs_queue_kick(struct kbase_context *kctx, struct kbase_ioctl_c
return kbase_csf_queue_kick(kctx, kick);
}
+static int kbasep_queue_group_clear_faults(struct kbase_context *kctx,
+ struct kbase_ioctl_queue_group_clear_faults *faults)
+{
+ return kbase_csf_queue_group_clear_faults(kctx, faults);
+}
+
static int kbasep_cs_queue_group_create_1_6(struct kbase_context *kctx,
union kbase_ioctl_cs_queue_group_create_1_6 *create)
{
@@ -1692,6 +1705,8 @@ static int kbasep_cs_queue_group_create_1_18(struct kbase_context *kctx,
static int kbasep_cs_queue_group_create(struct kbase_context *kctx,
union kbase_ioctl_cs_queue_group_create *create)
{
+ /* create->in.reserved only present pre-TDRX configuration. */
+
if (create->in.reserved != 0) {
dev_warn(kctx->kbdev->dev, "Invalid reserved field not 0 in queue group create\n");
return -EINVAL;
@@ -2202,6 +2217,11 @@ static long kbase_kfile_ioctl(struct kbase_file *kfile, unsigned int cmd, unsign
KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_KCPU_QUEUE_ENQUEUE, kbasep_kcpu_queue_enqueue,
struct kbase_ioctl_kcpu_queue_enqueue, kctx);
break;
+ case KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS:
+ KBASE_HANDLE_IOCTL_IN(KBASE_IOCTL_QUEUE_GROUP_CLEAR_FAULTS,
+ kbasep_queue_group_clear_faults,
+ struct kbase_ioctl_queue_group_clear_faults, kctx);
+ break;
case KBASE_IOCTL_CS_TILER_HEAP_INIT:
KBASE_HANDLE_IOCTL_INOUT(KBASE_IOCTL_CS_TILER_HEAP_INIT, kbasep_cs_tiler_heap_init,
union kbase_ioctl_cs_tiler_heap_init, kctx);
@@ -2673,6 +2693,9 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr,
struct kbase_device *kbdev;
unsigned long flags;
ssize_t ret = 0;
+#if !MALI_USE_CSF
+ size_t i;
+#endif
CSTD_UNUSED(attr);
@@ -2691,154 +2714,191 @@ static ssize_t core_mask_show(struct device *dev, struct device_attribute *attr,
ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret),
"Current in use core mask : 0x%llX\n", kbdev->pm.backend.shaders_avail);
#else
- ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS0) : 0x%llX\n",
- kbdev->pm.debug_core_mask[0]);
- ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS1) : 0x%llX\n",
- kbdev->pm.debug_core_mask[1]);
- ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Current core mask (JS2) : 0x%llX\n",
- kbdev->pm.debug_core_mask[2]);
+ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) {
+ if (PAGE_SIZE < ret)
+ goto out_unlock;
+
+ ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret),
+ "Current core mask (JS%zu) : 0x%llX\n", i,
+ kbdev->pm.debug_core_mask[i]);
+ }
#endif /* MALI_USE_CSF */
ret += scnprintf(buf + ret, (size_t)(PAGE_SIZE - ret), "Available core mask : 0x%llX\n",
kbdev->gpu_props.shader_present);
-
+#if !MALI_USE_CSF
+out_unlock:
+#endif
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
return ret;
}
-/**
- * core_mask_store - Store callback for the core_mask sysfs file.
- *
- * @dev: The device with sysfs file is for
- * @attr: The attributes of the sysfs file
- * @buf: The value written to the sysfs file
- * @count: The number of bytes to write to the sysfs file
- *
- * This function is called when the core_mask sysfs file is written to.
- *
- * Return: @count if the function succeeded. An error code on failure.
- */
-static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr, const char *buf,
- size_t count)
-{
- struct kbase_device *kbdev;
#if MALI_USE_CSF
+struct kbase_core_mask {
u64 new_core_mask;
-#else
- u64 new_core_mask[3];
- u64 group_core_mask;
- int i;
-#endif /* MALI_USE_CSF */
-
- int items;
- ssize_t err = (ssize_t)count;
- unsigned long flags;
- u64 shader_present;
-
- CSTD_UNUSED(attr);
-
- kbdev = to_kbase_device(dev);
-
- if (!kbdev)
- return -ENODEV;
-
-#if MALI_USE_CSF
- items = sscanf(buf, "%llx", &new_core_mask);
+};
- if (items != 1) {
- dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"
- "Use format <core_mask>\n");
- err = -EINVAL;
- goto end;
- }
-#else
- items = sscanf(buf, "%llx %llx %llx", &new_core_mask[0], &new_core_mask[1],
- &new_core_mask[2]);
+static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf,
+ struct kbase_core_mask *const mask)
+{
+ int err = kstrtou64(buf, 0, &mask->new_core_mask);
- if (items != 1 && items != 3) {
- dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"
- "Use format <core_mask>\n"
- "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
- err = -EINVAL;
- goto end;
- }
+ if (err)
+ dev_err(kbdev->dev, "Couldn't process core mask write operation.\n");
- if (items == 1)
- new_core_mask[1] = new_core_mask[2] = new_core_mask[0];
-#endif
+ return err;
+}
- rt_mutex_lock(&kbdev->pm.lock);
- spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask)
+{
+ u64 new_core_mask = new_mask->new_core_mask;
+ u64 shader_present = kbdev->gpu_props.shader_present;
- shader_present = kbdev->gpu_props.shader_present;
+ lockdep_assert_held(&kbdev->pm.lock);
+ lockdep_assert_held(&kbdev->hwaccess_lock);
-#if MALI_USE_CSF
if ((new_core_mask & shader_present) != new_core_mask) {
- dev_err(dev,
+ dev_err(kbdev->dev,
"Invalid core mask 0x%llX: Includes non-existent cores (present = 0x%llX)",
new_core_mask, shader_present);
- err = -EINVAL;
- goto unlock;
+ return -EINVAL;
} else if (!(new_core_mask & shader_present & kbdev->pm.backend.ca_cores_enabled)) {
- dev_err(dev,
- "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n",
+ dev_err(kbdev->dev,
+ "Invalid core mask 0x%llX: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)",
new_core_mask, kbdev->gpu_props.shader_present,
kbdev->pm.backend.ca_cores_enabled);
- err = -EINVAL;
- goto unlock;
+ return -EINVAL;
}
+
if (kbdev->pm.debug_core_mask != new_core_mask)
kbase_pm_set_debug_core_mask(kbdev, new_core_mask);
+
+ return 0;
+}
#else
- group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask;
+struct kbase_core_mask {
+ u64 new_core_mask[BASE_JM_MAX_NR_SLOTS];
+};
+
+static int core_mask_parse(struct kbase_device *const kbdev, const char *const buf,
+ struct kbase_core_mask *const mask)
+{
+ int items;
+
+ items = sscanf(buf, "%llx %llx %llx", &mask->new_core_mask[0], &mask->new_core_mask[1],
+ &mask->new_core_mask[2]);
- for (i = 0; i < 3; ++i) {
+ if (items != 1 && items != BASE_JM_MAX_NR_SLOTS) {
+ dev_err(kbdev->dev, "Couldn't process core mask write operation.\n"
+ "Use format <core_mask>\n"
+ "or <core_mask_js0> <core_mask_js1> <core_mask_js2>\n");
+ return -EINVAL;
+ }
+
+ /* If only one value was provided, set all other core masks equal to the value. */
+ if (items == 1) {
+ size_t i;
+
+ for (i = 1; i < BASE_JM_MAX_NR_SLOTS; i++)
+ mask->new_core_mask[i] = mask->new_core_mask[0];
+ }
+
+ return 0;
+}
+
+static int core_mask_set(struct kbase_device *kbdev, struct kbase_core_mask *const new_mask)
+{
+ u64 shader_present = kbdev->gpu_props.shader_present;
+ u64 group_core_mask = kbdev->gpu_props.coherency_info.group.core_mask;
+ u64 *new_core_mask = &new_mask->new_core_mask[0];
+ size_t i;
+
+ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; ++i) {
if ((new_core_mask[i] & shader_present) != new_core_mask[i]) {
- dev_err(dev,
- "Invalid core mask 0x%llX for JS %d: Includes non-existent cores (present = 0x%llX)",
+ dev_err(kbdev->dev,
+ "Invalid core mask 0x%llX for JS %zu: Includes non-existent cores (present = 0x%llX)",
new_core_mask[i], i, shader_present);
- err = -EINVAL;
- goto unlock;
+ return -EINVAL;
} else if (!(new_core_mask[i] & shader_present &
kbdev->pm.backend.ca_cores_enabled)) {
- dev_err(dev,
- "Invalid core mask 0x%llX for JS %d: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX\n",
+ dev_err(kbdev->dev,
+ "Invalid core mask 0x%llX for JS %zu: No intersection with currently available cores (present = 0x%llX, CA enabled = 0x%llX)",
new_core_mask[i], i, kbdev->gpu_props.shader_present,
kbdev->pm.backend.ca_cores_enabled);
- err = -EINVAL;
- goto unlock;
+ return -EINVAL;
} else if (!(new_core_mask[i] & group_core_mask)) {
- dev_err(dev,
- "Invalid core mask 0x%llX for JS %d: No intersection with group 0 core mask 0x%llX\n",
+ dev_err(kbdev->dev,
+ "Invalid core mask 0x%llX for JS %zu: No intersection with group 0 core mask 0x%llX",
new_core_mask[i], i, group_core_mask);
- err = -EINVAL;
- goto unlock;
+ return -EINVAL;
} else if (!(new_core_mask[i] & kbdev->gpu_props.curr_config.shader_present)) {
- dev_err(dev,
- "Invalid core mask 0x%llX for JS %d: No intersection with current core mask 0x%llX\n",
+ dev_err(kbdev->dev,
+ "Invalid core mask 0x%llX for JS %zu: No intersection with current core mask 0x%llX",
new_core_mask[i], i, kbdev->gpu_props.curr_config.shader_present);
- err = -EINVAL;
- goto unlock;
+ return -EINVAL;
}
}
- if (kbdev->pm.debug_core_mask[0] != new_core_mask[0] ||
- kbdev->pm.debug_core_mask[1] != new_core_mask[1] ||
- kbdev->pm.debug_core_mask[2] != new_core_mask[2]) {
- kbase_pm_set_debug_core_mask(kbdev, new_core_mask[0], new_core_mask[1],
- new_core_mask[2]);
+ for (i = 0; i < BASE_JM_MAX_NR_SLOTS; i++) {
+ if (kbdev->pm.debug_core_mask[i] != new_core_mask[i]) {
+ kbase_pm_set_debug_core_mask(kbdev, new_core_mask, BASE_JM_MAX_NR_SLOTS);
+ break;
+ }
}
-#endif /* MALI_USE_CSF */
-unlock:
+ return 0;
+}
+
+#endif
+
+/**
+ * core_mask_store - Store callback for the core_mask sysfs file.
+ *
+ * @dev: The device with sysfs file is for
+ * @attr: The attributes of the sysfs file
+ * @buf: The value written to the sysfs file
+ * @count: The number of bytes to write to the sysfs file
+ *
+ * This function is called when the core_mask sysfs file is written to.
+ *
+ * Return: @count if the function succeeded. An error code on failure.
+ */
+static ssize_t core_mask_store(struct device *dev, struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct kbase_device *kbdev;
+ struct kbase_core_mask core_mask = {};
+
+ int err;
+ unsigned long flags;
+
+ CSTD_UNUSED(attr);
+
+ kbdev = to_kbase_device(dev);
+
+ if (!kbdev)
+ return -ENODEV;
+
+ err = core_mask_parse(kbdev, buf, &core_mask);
+ if (err)
+ return err;
+
+ rt_mutex_lock(&kbdev->pm.lock);
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+
+ err = core_mask_set(kbdev, &core_mask);
+
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
rt_mutex_unlock(&kbdev->pm.lock);
-end:
- return err;
+
+ if (err)
+ return err;
+
+ return count;
}
/*
@@ -3607,12 +3667,8 @@ int kbase_pm_gpu_freq_init(struct kbase_device *kbdev)
/* convert found frequency to KHz */
found_freq /= 1000;
- /* If lowest frequency in OPP table is still higher
- * than the reference, then keep the reference frequency
- * as the one to use for scaling .
- */
- if (found_freq < lowest_freq_khz)
- lowest_freq_khz = found_freq;
+ /* always use the lowest freqency from opp table */
+ lowest_freq_khz = found_freq;
}
#else
dev_err(kbdev->dev, "No operating-points-v2 node or operating-points property in DT");
@@ -4595,7 +4651,7 @@ static int kbase_common_reg_map(struct kbase_device *kbdev)
goto out_region;
}
- kbdev->reg = ioremap(kbdev->reg_start, kbdev->reg_size);
+ kbdev->reg = mali_ioremap(kbdev->reg_start, kbdev->reg_size);
if (!kbdev->reg) {
dev_err(kbdev->dev, "Can't remap register window\n");
err = -EINVAL;
@@ -4613,7 +4669,7 @@ out_region:
static void kbase_common_reg_unmap(struct kbase_device *const kbdev)
{
if (kbdev->reg) {
- iounmap(kbdev->reg);
+ mali_iounmap(kbdev->reg);
release_mem_region(kbdev->reg_start, kbdev->reg_size);
kbdev->reg = NULL;
kbdev->reg_start = 0;
@@ -5255,6 +5311,7 @@ static struct dentry *init_debugfs(struct kbase_device *kbdev)
return dentry;
}
+
dentry = debugfs_ctx_defaults_init(kbdev);
if (IS_ERR_OR_NULL(dentry))
return dentry;
diff --git a/mali_kbase/mali_kbase_defs.h b/mali_kbase/mali_kbase_defs.h
index 1162b95..84bfa59 100644
--- a/mali_kbase/mali_kbase_defs.h
+++ b/mali_kbase/mali_kbase_defs.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2011-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2011-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -565,7 +565,7 @@ struct kbase_mem_pool {
u8 group_id;
spinlock_t pool_lock;
struct list_head page_list;
- struct shrinker reclaim;
+ DEFINE_KBASE_SHRINKER reclaim;
atomic_t isolation_in_progress_cnt;
struct kbase_mem_pool *next_pool;
@@ -864,8 +864,6 @@ struct kbase_mem_migrate {
* @as_free: Bitpattern of free/available GPU address spaces.
* @mmu_mask_change: Lock to serialize the access to MMU interrupt mask
* register used in the handling of Bus & Page faults.
- * @pagesize_2mb: Boolean to determine whether 2MiB page sizes are
- * supported and used where possible.
* @gpu_props: Object containing complete information about the
* configuration/properties of GPU HW device in use.
* @hw_issues_mask: List of SW workarounds for HW issues
@@ -1177,8 +1175,6 @@ struct kbase_device {
spinlock_t mmu_mask_change;
- bool pagesize_2mb;
-
struct kbase_gpu_props gpu_props;
unsigned long hw_issues_mask[(BASE_HW_ISSUE_END + BITS_PER_LONG - 1) / BITS_PER_LONG];
@@ -2036,7 +2032,8 @@ struct kbase_context {
struct kbase_mem_pool_group mem_pools;
- struct shrinker reclaim;
+ DEFINE_KBASE_SHRINKER reclaim;
+
struct list_head evict_list;
atomic_t evict_nents;
diff --git a/mali_kbase/mali_kbase_fence.h b/mali_kbase/mali_kbase_fence.h
index 06690d4..d45a0fe 100644
--- a/mali_kbase/mali_kbase_fence.h
+++ b/mali_kbase/mali_kbase_fence.h
@@ -35,8 +35,37 @@
#include <linux/version_compat_defs.h>
#if MALI_USE_CSF
+/* Number of digits needed to express the max value of given unsigned type.
+ *
+ * Details: The number of digits needed to express the max value of given type is log10(t_max) + 1
+ * sizeof(t) == log2(t_max)/8
+ * log10(t_max) == log2(t_max) / log2(10)
+ * log2(t_max) == sizeof(type) * 8
+ * 1/log2(10) is approx (1233 >> 12)
+ * Hence, number of digits for given type == log10(t_max) + 1 == sizeof(type) * 8 * (1233 >> 12) + 1
+ */
+#define MAX_DIGITS_FOR_UNSIGNED_TYPE(t) ((((sizeof(t) * BITS_PER_BYTE) * 1233) >> 12) + 1)
+
+/* Number of digits needed to express the max value of given signed type,
+ * including the sign character,
+ */
+#define MAX_DIGITS_FOR_SIGNED_TYPE(t) (MAX_DIGITS_FOR_UNSIGNED_TYPE(t) + 1)
+
+/* Max number of characters for id member of kbase_device struct. */
+#define MAX_KBDEV_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32)
+/* Max number of characters for tgid member of kbase_context struct. */
+#define MAX_KCTX_TGID_LEN MAX_DIGITS_FOR_SIGNED_TYPE(pid_t)
+/* Max number of characters for id member of kbase_context struct. */
+#define MAX_KCTX_ID_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u32)
+/* Max number of characters for fence_context member of kbase_kcpu_command_queue struct. */
+#define MAX_KCTX_QUEUE_FENCE_CTX_LEN MAX_DIGITS_FOR_UNSIGNED_TYPE(u64)
+/* Max number of characters for timeline name fixed format, including null character. */
+#define FIXED_FORMAT_LEN (9)
+
/* Maximum number of characters in DMA fence timeline name. */
-#define MAX_TIMELINE_NAME (32)
+#define MAX_TIMELINE_NAME \
+ (MAX_KBDEV_ID_LEN + MAX_KCTX_TGID_LEN + MAX_KCTX_ID_LEN + MAX_KCTX_QUEUE_FENCE_CTX_LEN + \
+ FIXED_FORMAT_LEN)
/**
* struct kbase_kcpu_dma_fence_meta - Metadata structure for dma fence objects containing
diff --git a/mali_kbase/mali_kbase_gpuprops.c b/mali_kbase/mali_kbase_gpuprops.c
index 3ac1c45..e6c31d4 100644
--- a/mali_kbase/mali_kbase_gpuprops.c
+++ b/mali_kbase/mali_kbase_gpuprops.c
@@ -357,6 +357,7 @@ enum l2_config_override_result {
/**
* kbase_read_l2_config_from_dt - Read L2 configuration
* @kbdev: The kbase device for which to get the L2 configuration.
+ * @regdump: Pointer to struct kbase_gpuprops_regdump structure.
*
* Check for L2 configuration overrides in module parameters and device tree.
* Override values in module parameters take priority over override values in
@@ -366,9 +367,16 @@ enum l2_config_override_result {
* overridden, L2_CONFIG_OVERRIDE_NONE if no overrides are provided.
* L2_CONFIG_OVERRIDE_FAIL otherwise.
*/
-static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_device *const kbdev)
+static enum l2_config_override_result
+kbase_read_l2_config_from_dt(struct kbase_device *const kbdev,
+ struct kbasep_gpuprops_regdump *regdump)
{
struct device_node *np = kbdev->dev->of_node;
+ /*
+ * CACHE_SIZE bit fields in L2_FEATURES register, default value after the reset/powerup
+ * holds the maximum size of the cache that can be programmed in L2_CONFIG register.
+ */
+ const u8 l2_size_max = L2_FEATURES_CACHE_SIZE_GET(regdump->l2_features);
if (!np)
return L2_CONFIG_OVERRIDE_NONE;
@@ -378,8 +386,12 @@ static enum l2_config_override_result kbase_read_l2_config_from_dt(struct kbase_
else if (of_property_read_u8(np, "l2-size", &kbdev->l2_size_override))
kbdev->l2_size_override = 0;
- if (kbdev->l2_size_override != 0 && kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2)
+ if (kbdev->l2_size_override != 0 && (kbdev->l2_size_override < OVERRIDE_L2_SIZE_MIN_LOG2 ||
+ kbdev->l2_size_override > l2_size_max)) {
+ dev_err(kbdev->dev, "Invalid Cache Size in %s",
+ override_l2_size ? "Module parameters" : "Device tree node");
return L2_CONFIG_OVERRIDE_FAIL;
+ }
/* Check overriding value is supported, if not will result in
* undefined behavior.
@@ -429,7 +441,7 @@ int kbase_gpuprops_update_l2_features(struct kbase_device *kbdev)
struct kbasep_gpuprops_regdump *regdump = &PRIV_DATA_REGDUMP(kbdev);
/* Check for L2 cache size & hash overrides */
- switch (kbase_read_l2_config_from_dt(kbdev)) {
+ switch (kbase_read_l2_config_from_dt(kbdev, regdump)) {
case L2_CONFIG_OVERRIDE_FAIL:
err = -EIO;
goto exit;
diff --git a/mali_kbase/mali_kbase_gwt.c b/mali_kbase/mali_kbase_gwt.c
index a138f19..5e59bf6 100644
--- a/mali_kbase/mali_kbase_gwt.c
+++ b/mali_kbase/mali_kbase_gwt.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -30,9 +30,10 @@
#include <linux/module.h>
static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kctx,
- unsigned long flag, struct rb_node *node)
+ unsigned long flag,
+ struct kbase_reg_zone *zone)
{
- struct rb_node *rbnode = node;
+ struct rb_node *rbnode = rb_first(&zone->reg_rbtree);
while (rbnode) {
struct kbase_va_region *reg;
@@ -55,10 +56,8 @@ static inline void kbase_gpu_gwt_setup_page_permission(struct kbase_context *kct
static void kbase_gpu_gwt_setup_pages(struct kbase_context *kctx, unsigned long flag)
{
- kbase_gpu_gwt_setup_page_permission(kctx, flag,
- rb_first(&kctx->reg_zone[SAME_VA_ZONE].reg_rbtree));
- kbase_gpu_gwt_setup_page_permission(kctx, flag,
- rb_first(&kctx->reg_zone[CUSTOM_VA_ZONE].reg_rbtree));
+ kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[SAME_VA_ZONE]);
+ kbase_gpu_gwt_setup_page_permission(kctx, flag, &kctx->reg_zone[CUSTOM_VA_ZONE]);
}
int kbase_gpu_gwt_start(struct kbase_context *kctx)
diff --git a/mali_kbase/mali_kbase_hw.c b/mali_kbase/mali_kbase_hw.c
index 7d4200e..1fde75b 100644
--- a/mali_kbase/mali_kbase_hw.c
+++ b/mali_kbase/mali_kbase_hw.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2012-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2012-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -225,6 +225,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev
{ GPU_ID_PRODUCT_TVAX,
{ { GPU_ID_VERSION_MAKE(0, 0, 0), base_hw_issues_tVAx_r0p0 },
+ { GPU_ID_VERSION_MAKE(0, 0, 5), base_hw_issues_tVAx_r0p0 },
+ { GPU_ID_VERSION_MAKE(0, 1, 0), base_hw_issues_tVAx_r0p1 },
{ U32_MAX, NULL } } },
{ GPU_ID_PRODUCT_TTUX,
@@ -334,6 +336,8 @@ static const enum base_hw_issue *kbase_hw_get_issues_for_new_id(struct kbase_dev
gpu_id->version_id = fallback_version;
}
}
+
+
return issues;
}
diff --git a/mali_kbase/mali_kbase_hwaccess_pm.h b/mali_kbase/mali_kbase_hwaccess_pm.h
index 7a0ea49..982547d 100644
--- a/mali_kbase/mali_kbase_hwaccess_pm.h
+++ b/mali_kbase/mali_kbase_hwaccess_pm.h
@@ -129,14 +129,14 @@ void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask)
* kbase_pm_set_debug_core_mask - Set the debug core mask.
*
* @kbdev: The kbase device structure for the device (must be a valid pointer)
- * @new_core_mask_js0: The core mask to use for job slot 0
- * @new_core_mask_js1: The core mask to use for job slot 1
- * @new_core_mask_js2: The core mask to use for job slot 2
+ * @new_core_mask: The core mask to use, as an array where each element refers
+ * to a job slot.
+ * @new_core_mask_size: Number of elements in the core mask array.
*
* This determines which cores the power manager is allowed to use.
*/
-void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 new_core_mask_js0,
- u64 new_core_mask_js1, u64 new_core_mask_js2);
+void kbase_pm_set_debug_core_mask(struct kbase_device *kbdev, u64 *new_core_mask,
+ size_t new_core_mask_size);
#endif /* MALI_USE_CSF */
/**
diff --git a/mali_kbase/mali_kbase_linux.h b/mali_kbase/mali_kbase_linux.h
index 9195be3..cb55d4b 100644
--- a/mali_kbase/mali_kbase_linux.h
+++ b/mali_kbase/mali_kbase_linux.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -35,8 +35,13 @@
#if IS_ENABLED(MALI_KERNEL_TEST_API)
#define KBASE_EXPORT_TEST_API(func) EXPORT_SYMBOL(func)
+/* Note: due to the 2-layer macro translation, using the NULL _etype does not
+ * compile, and one workaround is to use ERRNO_NULL instead.
+ */
+#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype) ALLOW_ERROR_INJECTION(func, etype)
#else
#define KBASE_EXPORT_TEST_API(func)
+#define KBASE_ALLOW_ERROR_INJECTION_TEST_API(func, etype)
#endif
#define KBASE_EXPORT_SYMBOL(func) EXPORT_SYMBOL(func)
diff --git a/mali_kbase/mali_kbase_mem.c b/mali_kbase/mali_kbase_mem.c
index 337fe2a..c6fb7f0 100644
--- a/mali_kbase/mali_kbase_mem.c
+++ b/mali_kbase/mali_kbase_mem.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -46,6 +46,9 @@
#include <mali_kbase_trace_gpu_mem.h>
#include <linux/version_compat_defs.h>
+/* Static key used to determine if large pages are enabled or not */
+static DEFINE_STATIC_KEY_FALSE(large_pages_static_key);
+
#define VA_REGION_SLAB_NAME_PREFIX "va-region-slab-"
#define VA_REGION_SLAB_NAME_SIZE (DEVNAME_SIZE + sizeof(VA_REGION_SLAB_NAME_PREFIX) + 1)
@@ -143,20 +146,20 @@ MODULE_PARM_DESC(large_page_conf, "User override for large page usage on support
static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
{
if (!IS_ENABLED(CONFIG_LARGE_PAGE_SUPPORT)) {
- kbdev->pagesize_2mb = false;
dev_info(kbdev->dev, "Large page support was disabled at compile-time!");
return;
}
switch (large_page_conf) {
case LARGE_PAGE_AUTO: {
- kbdev->pagesize_2mb = kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC);
+ if (kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC))
+ static_branch_enable(&large_pages_static_key);
dev_info(kbdev->dev, "Large page allocation set to %s after hardware feature check",
- kbdev->pagesize_2mb ? "true" : "false");
+ static_branch_unlikely(&large_pages_static_key) ? "true" : "false");
break;
}
case LARGE_PAGE_ON: {
- kbdev->pagesize_2mb = true;
+ static_branch_enable(&large_pages_static_key);
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_LARGE_PAGE_ALLOC))
dev_warn(kbdev->dev,
"Enabling large page allocations on unsupporting GPU!");
@@ -165,12 +168,10 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
break;
}
case LARGE_PAGE_OFF: {
- kbdev->pagesize_2mb = false;
dev_info(kbdev->dev, "Large page allocation override: turned off\n");
break;
}
default: {
- kbdev->pagesize_2mb = false;
dev_info(kbdev->dev, "Invalid large page override, turning off large pages\n");
break;
}
@@ -180,12 +181,18 @@ static void kbasep_mem_page_size_init(struct kbase_device *kbdev)
* so that userspace could read it to figure out the state of the configuration
* if necessary.
*/
- if (kbdev->pagesize_2mb)
+ if (static_branch_unlikely(&large_pages_static_key))
large_page_conf = LARGE_PAGE_ON;
else
large_page_conf = LARGE_PAGE_OFF;
}
+inline bool kbase_is_large_pages_enabled(void)
+{
+ return static_branch_unlikely(&large_pages_static_key);
+}
+KBASE_EXPORT_TEST_API(kbase_is_large_pages_enabled);
+
int kbase_mem_init(struct kbase_device *kbdev)
{
int err = 0;
@@ -679,7 +686,9 @@ void kbase_sync_single(struct kbase_context *kctx, struct tagged_addr t_cpu_pa,
dma_addr_t dma_addr;
WARN_ON(!cpu_page);
- WARN_ON((size_t)offset + size > PAGE_SIZE);
+
+ if ((size_t)offset + size > PAGE_SIZE)
+ dev_warn(kctx->kbdev->dev, "Size and offset exceed page size");
dma_addr = kbase_dma_addr_from_tagged(t_cpu_pa) + (dma_addr_t)offset;
@@ -1163,7 +1172,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa
/* Check if we have enough pages requested so we can allocate a large
* page (512 * 4KB = 2MB )
*/
- if (kbdev->pagesize_2mb && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
+ if (kbase_is_large_pages_enabled() && nr_left >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE;
res = kbase_mem_pool_alloc_pages(&kctx->mem_pools.large[alloc->group_id],
@@ -1314,6 +1323,7 @@ alloc_failed:
invalid_request:
return -ENOMEM;
}
+KBASE_EXPORT_TEST_API(kbase_alloc_phy_pages_helper);
static size_t free_partial_locked(struct kbase_context *kctx, struct kbase_mem_pool *pool,
struct tagged_addr tp)
@@ -1370,7 +1380,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all
kctx = alloc->imported.native.kctx;
kbdev = kctx->kbdev;
- if (!kbdev->pagesize_2mb)
+ if (!kbase_is_large_pages_enabled())
WARN_ON(pool->order);
if (alloc->reg) {
@@ -1393,7 +1403,7 @@ struct tagged_addr *kbase_alloc_phy_pages_helper_locked(struct kbase_mem_phy_all
tp = alloc->pages + alloc->nents;
new_pages = tp;
- if (kbdev->pagesize_2mb && pool->order) {
+ if (kbase_is_large_pages_enabled() && pool->order) {
size_t nr_lp = nr_left / NUM_PAGES_IN_2MB_LARGE_PAGE;
res = kbase_mem_pool_alloc_pages_locked(pool, nr_lp * NUM_PAGES_IN_2MB_LARGE_PAGE,
@@ -1510,7 +1520,7 @@ alloc_failed:
struct tagged_addr *start_free = alloc->pages + alloc->nents;
- if (kbdev->pagesize_2mb && pool->order) {
+ if (kbase_is_large_pages_enabled() && pool->order) {
while (nr_pages_to_free) {
if (is_huge_head(*start_free)) {
kbase_mem_pool_free_pages_locked(
@@ -1666,6 +1676,7 @@ int kbase_free_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pag
return 0;
}
+KBASE_EXPORT_TEST_API(kbase_free_phy_pages_helper);
void kbase_free_phy_pages_helper_locked(struct kbase_mem_phy_alloc *alloc,
struct kbase_mem_pool *pool, struct tagged_addr *pages,
@@ -2730,7 +2741,7 @@ static int kbase_jit_grow(struct kbase_context *kctx, const struct base_jit_allo
delta = info->commit_pages - reg->gpu_alloc->nents;
pages_required = delta;
- if (kctx->kbdev->pagesize_2mb && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
+ if (kbase_is_large_pages_enabled() && pages_required >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
pool = &kctx->mem_pools.large[kctx->jit_group_id];
/* Round up to number of 2 MB pages required */
pages_required += (NUM_PAGES_IN_2MB_LARGE_PAGE - 1);
@@ -3028,7 +3039,7 @@ struct kbase_va_region *kbase_jit_allocate(struct kbase_context *kctx,
if (!jit_allow_allocate(kctx, info, ignore_pressure_limit))
return NULL;
- if (kctx->kbdev->pagesize_2mb) {
+ if (kbase_is_large_pages_enabled()) {
/* Preallocate memory for the sub-allocation structs */
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
prealloc_sas[i] = kmalloc(sizeof(*prealloc_sas[i]), GFP_KERNEL);
diff --git a/mali_kbase/mali_kbase_mem.h b/mali_kbase/mali_kbase_mem.h
index 371d788..c2b10e0 100644
--- a/mali_kbase/mali_kbase_mem.h
+++ b/mali_kbase/mali_kbase_mem.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -1674,7 +1674,7 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa
*
* @prealloc_sa: Information about the partial allocation if the amount of memory requested
* is not a multiple of 2MB. One instance of struct kbase_sub_alloc must be
- * allocated by the caller if kbdev->pagesize_2mb is enabled.
+ * allocated by the caller if large pages are enabled.
*
* Allocates @nr_pages_requested and updates the alloc object. This function does not allocate new
* pages from the kernel, and therefore will never trigger the OoM killer. Therefore, it can be
@@ -1702,9 +1702,9 @@ int kbase_alloc_phy_pages_helper(struct kbase_mem_phy_alloc *alloc, size_t nr_pa
* This ensures that the pool can be grown to the required size and that the allocation can
* complete without another thread using the newly grown pages.
*
- * If kbdev->pagesize_2mb is enabled and the allocation is >= 2MB, then @pool must be one of the
- * pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it must be one of the
- * mempools from alloc->imported.native.kctx->mem_pools.small[].
+ * If large (2MiB) pages are enabled and the allocation is >= 2MiB, then @pool
+ * must be one of the pools from alloc->imported.native.kctx->mem_pools.large[]. Otherwise it
+ * must be one of the mempools from alloc->imported.native.kctx->mem_pools.small[].
*
* @prealloc_sa is used to manage the non-2MB sub-allocation. It has to be pre-allocated because we
* must not sleep (due to the usage of kmalloc()) whilst holding pool->pool_lock. @prealloc_sa
@@ -2618,4 +2618,7 @@ static inline base_mem_alloc_flags kbase_mem_group_id_set(int id)
{
return BASE_MEM_GROUP_ID_SET(id);
}
+
+bool kbase_is_large_pages_enabled(void);
+
#endif /* _KBASE_MEM_H_ */
diff --git a/mali_kbase/mali_kbase_mem_linux.c b/mali_kbase/mali_kbase_mem_linux.c
index 2eeb2a1..448ede2 100644
--- a/mali_kbase/mali_kbase_mem_linux.c
+++ b/mali_kbase/mali_kbase_mem_linux.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -46,6 +46,7 @@
#include <mali_kbase_caps.h>
#include <mali_kbase_trace_gpu_mem.h>
#include <mali_kbase_reset_gpu.h>
+#include <linux/version_compat_defs.h>
#if (KERNEL_VERSION(5, 0, 0) > LINUX_VERSION_CODE)
/* Enable workaround for ion for kernels prior to v5.0.0
@@ -470,7 +471,7 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
} else /* we control the VA */ {
size_t align = 1;
- if (kctx->kbdev->pagesize_2mb) {
+ if (kbase_is_large_pages_enabled()) {
/* If there's enough (> 33 bits) of GPU VA space, align to 2MB
* boundaries. The similar condition is used for mapping from
* the SAME_VA zone inside kbase_context_get_unmapped_area().
@@ -594,8 +595,10 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co
*out |= BASE_MEM_COHERENT_SYSTEM;
if (KBASE_REG_SHARE_IN & reg->flags)
*out |= BASE_MEM_COHERENT_LOCAL;
- if (KBASE_REG_DONT_NEED & reg->flags)
- *out |= BASE_MEM_DONT_NEED;
+ if (mali_kbase_supports_mem_dont_need(kctx->api_version)) {
+ if (KBASE_REG_DONT_NEED & reg->flags)
+ *out |= BASE_MEM_DONT_NEED;
+ }
if (mali_kbase_supports_mem_grow_on_gpf(kctx->api_version)) {
/* Prior to this version, this was known about by
* user-side but we did not return them. Returning
@@ -632,9 +635,19 @@ int kbase_mem_query(struct kbase_context *kctx, u64 gpu_addr, u64 query, u64 *co
else
*out |= BASE_MEM_FIXABLE;
}
-#endif
+#endif /* MALI_USE_CSF */
if (KBASE_REG_GPU_VA_SAME_4GB_PAGE & reg->flags)
*out |= BASE_MEM_GPU_VA_SAME_4GB_PAGE;
+ if (mali_kbase_supports_mem_import_sync_on_map_unmap(kctx->api_version)) {
+ if (reg->gpu_alloc->type == KBASE_MEM_TYPE_IMPORTED_UMM) {
+ if (reg->gpu_alloc->imported.umm.need_sync)
+ *out |= BASE_MEM_IMPORT_SYNC_ON_MAP_UNMAP;
+ }
+ }
+ if (mali_kbase_supports_mem_kernel_sync(kctx->api_version)) {
+ if (unlikely(reg->cpu_alloc != reg->gpu_alloc))
+ *out |= BASE_MEM_KERNEL_SYNC;
+ }
*out |= kbase_mem_group_id_set(reg->cpu_alloc->group_id);
@@ -665,7 +678,9 @@ out_unlock:
static unsigned long kbase_mem_evictable_reclaim_count_objects(struct shrinker *s,
struct shrink_control *sc)
{
- struct kbase_context *kctx = container_of(s, struct kbase_context, reclaim);
+ struct kbase_context *kctx =
+ KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim);
+
int evict_nents = atomic_read(&kctx->evict_nents);
unsigned long nr_freeable_items;
@@ -715,7 +730,7 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s
struct kbase_mem_phy_alloc *tmp;
unsigned long freed = 0;
- kctx = container_of(s, struct kbase_context, reclaim);
+ kctx = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_context, reclaim);
#if MALI_USE_CSF
if (!down_read_trylock(&kctx->kbdev->csf.pmode_sync_sem)) {
@@ -770,26 +785,28 @@ static unsigned long kbase_mem_evictable_reclaim_scan_objects(struct shrinker *s
int kbase_mem_evictable_init(struct kbase_context *kctx)
{
+ struct shrinker *reclaim;
+
INIT_LIST_HEAD(&kctx->evict_list);
mutex_init(&kctx->jit_evict_lock);
- kctx->reclaim.count_objects = kbase_mem_evictable_reclaim_count_objects;
- kctx->reclaim.scan_objects = kbase_mem_evictable_reclaim_scan_objects;
- kctx->reclaim.seeks = DEFAULT_SEEKS;
- /* Kernel versions prior to 3.1 :
- * struct shrinker does not define batch
- */
-#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
- register_shrinker(&kctx->reclaim);
-#else
- register_shrinker(&kctx->reclaim, "mali-mem");
-#endif
+ reclaim = KBASE_INIT_RECLAIM(kctx, reclaim, "mali-mem");
+ if (!reclaim)
+ return -ENOMEM;
+ KBASE_SET_RECLAIM(kctx, reclaim, reclaim);
+
+ reclaim->count_objects = kbase_mem_evictable_reclaim_count_objects;
+ reclaim->scan_objects = kbase_mem_evictable_reclaim_scan_objects;
+ reclaim->seeks = DEFAULT_SEEKS;
+
+ KBASE_REGISTER_SHRINKER(reclaim, "mali-mem", kctx);
+
return 0;
}
void kbase_mem_evictable_deinit(struct kbase_context *kctx)
{
- unregister_shrinker(&kctx->reclaim);
+ KBASE_UNREGISTER_SHRINKER(kctx->reclaim);
}
/**
@@ -2277,11 +2294,16 @@ int kbase_mem_shrink(struct kbase_context *const kctx, struct kbase_va_region *c
return -EINVAL;
old_pages = kbase_reg_current_backed_size(reg);
- if (WARN_ON(old_pages < new_pages))
+ if (old_pages < new_pages) {
+ dev_warn(
+ kctx->kbdev->dev,
+ "Requested number of pages (%llu) is larger than the current number of pages (%llu)",
+ new_pages, old_pages);
return -EINVAL;
+ }
delta = old_pages - new_pages;
- if (kctx->kbdev->pagesize_2mb) {
+ if (kbase_is_large_pages_enabled()) {
struct tagged_addr *start_free = reg->gpu_alloc->pages + new_pages;
/* Move the end of new commited range to a valid location.
diff --git a/mali_kbase/mali_kbase_mem_linux.h b/mali_kbase/mali_kbase_mem_linux.h
index 2866603..037bdfe 100644
--- a/mali_kbase/mali_kbase_mem_linux.h
+++ b/mali_kbase/mali_kbase_mem_linux.h
@@ -57,6 +57,8 @@ struct kbase_va_region *kbase_mem_alloc(struct kbase_context *kctx, u64 va_pages
/**
* kbase_mem_query - Query properties of a GPU memory region
*
+ * Note: Does not currently report the BASE_MEM_SAME_VA flag for any memory allocation.
+ *
* @kctx: The kernel context
* @gpu_addr: A GPU address contained within the memory region
* @query: The type of query, from KBASE_MEM_QUERY_* flags, which could be
diff --git a/mali_kbase/mali_kbase_mem_migrate.c b/mali_kbase/mali_kbase_mem_migrate.c
index 6638b76..26ddeed 100644
--- a/mali_kbase/mali_kbase_mem_migrate.c
+++ b/mali_kbase/mali_kbase_mem_migrate.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -28,6 +28,9 @@
#include <mali_kbase_mem_migrate.h>
#include <mmu/mali_kbase_mmu.h>
+/* Static key used to determine if page migration is enabled or not */
+static DEFINE_STATIC_KEY_FALSE(page_migration_static_key);
+
/* Global integer used to determine if module parameter value has been
* provided and if page migration feature is enabled.
* Feature is disabled on all platforms by default.
@@ -50,15 +53,6 @@ MODULE_PARM_DESC(kbase_page_migration_enabled,
KBASE_EXPORT_TEST_API(kbase_page_migration_enabled);
-bool kbase_is_page_migration_enabled(void)
-{
- /* Handle uninitialised int case */
- if (kbase_page_migration_enabled < 0)
- return false;
- return IS_ENABLED(CONFIG_PAGE_MIGRATION_SUPPORT) && kbase_page_migration_enabled;
-}
-KBASE_EXPORT_SYMBOL(kbase_is_page_migration_enabled);
-
#if (KERNEL_VERSION(6, 0, 0) <= LINUX_VERSION_CODE)
static const struct movable_operations movable_ops;
#endif
@@ -679,11 +673,15 @@ void kbase_mem_migrate_init(struct kbase_device *kbdev)
* integer for a negative value to see if insmod parameter was
* passed in at all (it will override the default negative value).
*/
- if (kbase_page_migration_enabled < 0)
- kbase_page_migration_enabled = kbdev->pagesize_2mb ? 1 : 0;
- else
+ if (kbase_page_migration_enabled < 0) {
+ if (kbase_is_large_pages_enabled())
+ static_branch_enable(&page_migration_static_key);
+ } else {
dev_info(kbdev->dev, "Page migration support explicitly %s at insmod.",
kbase_page_migration_enabled ? "enabled" : "disabled");
+ if (kbase_page_migration_enabled)
+ static_branch_enable(&page_migration_static_key);
+ }
spin_lock_init(&mem_migrate->free_pages_lock);
INIT_LIST_HEAD(&mem_migrate->free_pages_list);
@@ -708,3 +706,9 @@ void kbase_mem_migrate_term(struct kbase_device *kbdev)
iput(mem_migrate->inode);
#endif
}
+
+bool kbase_is_page_migration_enabled(void)
+{
+ return static_branch_unlikely(&page_migration_static_key);
+}
+KBASE_EXPORT_TEST_API(kbase_is_page_migration_enabled);
diff --git a/mali_kbase/mali_kbase_mem_migrate.h b/mali_kbase/mali_kbase_mem_migrate.h
index ece8734..70c3135 100644
--- a/mali_kbase/mali_kbase_mem_migrate.h
+++ b/mali_kbase/mali_kbase_mem_migrate.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2022-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2022-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/mali_kbase_mem_pool.c b/mali_kbase/mali_kbase_mem_pool.c
index a7e332b..bfd1e70 100644
--- a/mali_kbase/mali_kbase_mem_pool.c
+++ b/mali_kbase/mali_kbase_mem_pool.c
@@ -522,7 +522,7 @@ static unsigned long kbase_mem_pool_reclaim_count_objects(struct shrinker *s,
CSTD_UNUSED(sc);
- pool = container_of(s, struct kbase_mem_pool, reclaim);
+ pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim);
kbase_mem_pool_lock(pool);
if (pool->dont_reclaim && !pool->dying) {
@@ -544,7 +544,7 @@ static unsigned long kbase_mem_pool_reclaim_scan_objects(struct shrinker *s,
struct kbase_mem_pool *pool;
unsigned long freed;
- pool = container_of(s, struct kbase_mem_pool, reclaim);
+ pool = KBASE_GET_KBASE_DATA_FROM_SHRINKER(s, struct kbase_mem_pool, reclaim);
kbase_mem_pool_lock(pool);
if (pool->dont_reclaim && !pool->dying) {
@@ -570,6 +570,8 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool
unsigned int order, int group_id, struct kbase_device *kbdev,
struct kbase_mem_pool *next_pool)
{
+ struct shrinker *reclaim;
+
if (WARN_ON(group_id < 0) || WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS)) {
return -EINVAL;
}
@@ -586,18 +588,17 @@ int kbase_mem_pool_init(struct kbase_mem_pool *pool, const struct kbase_mem_pool
spin_lock_init(&pool->pool_lock);
INIT_LIST_HEAD(&pool->page_list);
- pool->reclaim.count_objects = kbase_mem_pool_reclaim_count_objects;
- pool->reclaim.scan_objects = kbase_mem_pool_reclaim_scan_objects;
- pool->reclaim.seeks = DEFAULT_SEEKS;
- /* Kernel versions prior to 3.1 :
- * struct shrinker does not define batch
- */
- pool->reclaim.batch = 0;
-#if KERNEL_VERSION(6, 0, 0) > LINUX_VERSION_CODE
- register_shrinker(&pool->reclaim);
-#else
- register_shrinker(&pool->reclaim, "mali-mem-pool");
-#endif
+ reclaim = KBASE_INIT_RECLAIM(pool, reclaim, "mali-mem-pool");
+ if (!reclaim)
+ return -ENOMEM;
+ KBASE_SET_RECLAIM(pool, reclaim, reclaim);
+
+ reclaim->count_objects = kbase_mem_pool_reclaim_count_objects;
+ reclaim->scan_objects = kbase_mem_pool_reclaim_scan_objects;
+ reclaim->seeks = DEFAULT_SEEKS;
+ reclaim->batch = 0;
+
+ KBASE_REGISTER_SHRINKER(reclaim, "mali-mem-pool", pool);
pool_dbg(pool, "initialized\n");
@@ -623,7 +624,7 @@ void kbase_mem_pool_term(struct kbase_mem_pool *pool)
pool_dbg(pool, "terminate()\n");
- unregister_shrinker(&pool->reclaim);
+ KBASE_UNREGISTER_SHRINKER(pool->reclaim);
kbase_mem_pool_lock(pool);
pool->max_size = 0;
diff --git a/mali_kbase/mali_kbase_native_mgm.c b/mali_kbase/mali_kbase_native_mgm.c
index 5e3d1ee..d688509 100644
--- a/mali_kbase/mali_kbase_native_mgm.c
+++ b/mali_kbase/mali_kbase_native_mgm.c
@@ -121,44 +121,20 @@ static vm_fault_t kbase_native_mgm_vmf_insert_pfn_prot(struct memory_group_manag
return vmf_insert_pfn_prot(vma, addr, pfn, pgprot);
}
-/**
- * kbase_native_mgm_update_gpu_pte - Native method to modify a GPU page table
- * entry
- *
- * @mgm_dev: The memory group manager the request is being made through.
- * @group_id: A physical memory group ID, which must be valid but is not used.
- * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
- * @mmu_level: The level of the MMU page table where the page is getting mapped.
- * @pte: The prepared page table entry.
- *
- * This function simply returns the @pte without modification.
- *
- * Return: A GPU page table entry to be stored in a page table.
- */
static u64 kbase_native_mgm_update_gpu_pte(struct memory_group_manager_device *mgm_dev,
unsigned int group_id, int mmu_level, u64 pte)
{
- CSTD_UNUSED(mgm_dev);
- CSTD_UNUSED(group_id);
- CSTD_UNUSED(mmu_level);
+ if (WARN_ON(group_id >= MEMORY_GROUP_MANAGER_NR_GROUPS))
+ return pte;
+
+ pte |= ((u64)group_id << PTE_PBHA_SHIFT) & PTE_PBHA_MASK;
+
+ /* Address could be translated into a different bus address here */
+ pte |= ((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT);
return pte;
}
-/**
- * kbase_native_mgm_pte_to_original_pte - Native method to undo changes done in
- * kbase_native_mgm_update_gpu_pte()
- *
- * @mgm_dev: The memory group manager the request is being made through.
- * @group_id: A physical memory group ID, which must be valid but is not used.
- * Its valid range is 0 .. MEMORY_GROUP_MANAGER_NR_GROUPS-1.
- * @mmu_level: The level of the MMU page table where the page is getting mapped.
- * @pte: The prepared page table entry.
- *
- * This function simply returns the @pte without modification.
- *
- * Return: A GPU page table entry to be stored in a page table.
- */
static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_device *mgm_dev,
unsigned int group_id, int mmu_level, u64 pte)
{
@@ -166,6 +142,11 @@ static u64 kbase_native_mgm_pte_to_original_pte(struct memory_group_manager_devi
CSTD_UNUSED(group_id);
CSTD_UNUSED(mmu_level);
+ /* Undo the group ID modification */
+ pte &= ~PTE_PBHA_MASK;
+ /* Undo the bit set */
+ pte &= ~((u64)1 << PTE_RES_BIT_MULTI_AS_SHIFT);
+
return pte;
}
diff --git a/mali_kbase/mali_kbase_pbha.c b/mali_kbase/mali_kbase_pbha.c
index 341ea90..c5b6fad 100644
--- a/mali_kbase/mali_kbase_pbha.c
+++ b/mali_kbase/mali_kbase_pbha.c
@@ -277,16 +277,16 @@ static int kbase_pbha_read_int_id_override_property(struct kbase_device *kbdev,
static int kbase_pbha_read_propagate_bits_property(struct kbase_device *kbdev,
const struct device_node *pbha_node)
{
- u32 bits = 0;
+ u8 bits = 0;
int err;
if (!kbase_hw_has_feature(kbdev, BASE_HW_FEATURE_PBHA_HWU))
return 0;
- err = of_property_read_u32(pbha_node, "propagate-bits", &bits);
+ err = of_property_read_u8(pbha_node, "propagate-bits", &bits);
if (err == -EINVAL) {
- err = of_property_read_u32(pbha_node, "propagate_bits", &bits);
+ err = of_property_read_u8(pbha_node, "propagate_bits", &bits);
}
if (err < 0) {
diff --git a/mali_kbase/mali_kbase_pbha_debugfs.c b/mali_kbase/mali_kbase_pbha_debugfs.c
index f1d2794..8ab0d18 100644
--- a/mali_kbase/mali_kbase_pbha_debugfs.c
+++ b/mali_kbase/mali_kbase_pbha_debugfs.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2021-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2021-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -50,8 +50,8 @@ static int int_id_overrides_show(struct seq_file *sfile, void *data)
#endif /* MALI_USE_CSF */
for (j = 0; j < sizeof(u32); ++j) {
- u8 r_val;
- u8 w_val;
+ u8 r_val = 0;
+ u8 w_val = 0;
switch (j) {
case 0:
diff --git a/mali_kbase/mali_kbase_pm.c b/mali_kbase/mali_kbase_pm.c
index 55c4523..570a002 100644
--- a/mali_kbase/mali_kbase_pm.c
+++ b/mali_kbase/mali_kbase_pm.c
@@ -200,19 +200,24 @@ int kbase_pm_driver_suspend(struct kbase_device *kbdev)
rt_mutex_unlock(&kbdev->pm.lock);
#ifdef CONFIG_MALI_ARBITER_SUPPORT
-#if !MALI_USE_CSF
if (kbdev->arb.arb_if) {
- unsigned int i;
unsigned long flags;
+#if MALI_USE_CSF
+ spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
+ kbase_disjoint_state_up(kbdev);
+ spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#else
+ unsigned int i;
+
spin_lock_irqsave(&kbdev->hwaccess_lock, flags);
kbdev->js_data.runpool_irq.submit_allowed = 0;
kbase_disjoint_state_up(kbdev);
for (i = 0; i < kbdev->gpu_props.num_job_slots; i++)
kbase_job_slot_softstop(kbdev, i, NULL);
spin_unlock_irqrestore(&kbdev->hwaccess_lock, flags);
+#endif
}
-#endif /* !MALI_USE_CSF */
#endif /* CONFIG_MALI_ARBITER_SUPPORT */
/* From now on, the active count will drop towards zero. Sometimes,
diff --git a/mali_kbase/mmu/mali_kbase_mmu.c b/mali_kbase/mmu/mali_kbase_mmu.c
index 6bab554..8a5b92c 100644
--- a/mali_kbase/mmu/mali_kbase_mmu.c
+++ b/mali_kbase/mmu/mali_kbase_mmu.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -774,7 +774,7 @@ static bool page_fault_try_alloc(struct kbase_context *kctx, struct kbase_va_reg
return false;
}
- if (kctx->kbdev->pagesize_2mb && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
+ if (kbase_is_large_pages_enabled() && new_pages >= NUM_PAGES_IN_2MB_LARGE_PAGE) {
root_pool = &kctx->mem_pools.large[region->gpu_alloc->group_id];
*grow_2mb_pool = true;
} else {
@@ -921,7 +921,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
int err;
bool grown = false;
size_t pages_to_grow;
- bool grow_2mb_pool;
+ bool grow_2mb_pool = false;
struct kbase_sub_alloc *prealloc_sas[2] = { NULL, NULL };
int i;
size_t current_backed_size;
@@ -1091,7 +1091,7 @@ void kbase_mmu_page_fault_worker(struct work_struct *data)
}
page_fault_retry:
- if (kbdev->pagesize_2mb) {
+ if (kbase_is_large_pages_enabled()) {
/* Preallocate (or re-allocate) memory for the sub-allocation structs if necessary */
for (i = 0; i != ARRAY_SIZE(prealloc_sas); ++i) {
if (!prealloc_sas[i]) {
@@ -1181,10 +1181,14 @@ page_fault_retry:
*/
op_param.mmu_sync_info = mmu_sync_info;
op_param.kctx_id = kctx->id;
- /* Can safely skip the invalidate for all levels in case
- * of duplicate page faults.
+ /* Usually it is safe to skip the MMU cache invalidate for all levels
+ * in case of duplicate page faults. But for the pathological scenario
+ * where the faulty VA gets mapped by the time page fault worker runs it
+ * becomes imperative to invalidate MMU cache for all levels, otherwise
+ * there is a possibility of repeated page faults on GPUs which supports
+ * fine grained MMU cache invalidation.
*/
- op_param.flush_skip_levels = 0xF;
+ op_param.flush_skip_levels = 0x0;
op_param.vpfn = fault_pfn;
op_param.nr = 1;
spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
@@ -1218,10 +1222,14 @@ page_fault_retry:
/* See comment [1] about UNLOCK usage */
op_param.mmu_sync_info = mmu_sync_info;
op_param.kctx_id = kctx->id;
- /* Can safely skip the invalidate for all levels in case
- * of duplicate page faults.
+ /* Usually it is safe to skip the MMU cache invalidate for all levels
+ * in case of duplicate page faults. But for the pathological scenario
+ * where the faulty VA gets mapped by the time page fault worker runs it
+ * becomes imperative to invalidate MMU cache for all levels, otherwise
+ * there is a possibility of repeated page faults on GPUs which supports
+ * fine grained MMU cache invalidation.
*/
- op_param.flush_skip_levels = 0xF;
+ op_param.flush_skip_levels = 0x0;
op_param.vpfn = fault_pfn;
op_param.nr = 1;
spin_lock_irqsave(&kbdev->hwaccess_lock, hwaccess_flags);
@@ -1383,7 +1391,7 @@ page_fault_retry:
* Otherwise fail the allocation.
*/
if (pages_to_grow > 0) {
- if (kbdev->pagesize_2mb && grow_2mb_pool) {
+ if (kbase_is_large_pages_enabled() && grow_2mb_pool) {
/* Round page requirement up to nearest 2 MB */
struct kbase_mem_pool *const lp_mem_pool =
&kctx->mem_pools.large[group_id];
@@ -1597,6 +1605,7 @@ static int mmu_get_lowest_valid_pgd(struct kbase_device *kbdev, struct kbase_mmu
return err;
}
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(mmu_get_lowest_valid_pgd, ERRNO);
/*
* On success, sets out_pgd to the PGD for the specified level of translation
@@ -1702,8 +1711,16 @@ static void mmu_insert_pages_failure_recovery(struct kbase_device *kbdev,
mmu_mode->entries_invalidate(&page[idx], pcount);
if (!num_of_valid_entries) {
+ mmu_mode->set_num_valid_entries(page, 0);
+
kbase_kunmap(p, page);
+ /* No CPU and GPU cache maintenance is done here as caller would do the
+ * complete flush of GPU cache and invalidation of TLB before the PGD
+ * page is freed. CPU cache flush would be done when the PGD page is
+ * returned to the memory pool.
+ */
+
kbase_mmu_add_to_free_pgds_list(mmut, p);
kbase_mmu_update_and_free_parent_pgds(kbdev, mmut, pgds, vpfn, level,
@@ -1730,7 +1747,8 @@ next:
* going to happen to these pages at this stage. They might return
* movable once they are returned to a memory pool.
*/
- if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys) {
+ if (kbase_is_page_migration_enabled() && !ignore_page_migration && phys &&
+ !is_huge(*phys) && !is_partial(*phys)) {
const u64 num_pages = (to_vpfn - from_vpfn) / GPU_PAGES_PER_CPU_PAGE;
u64 i;
@@ -2645,6 +2663,7 @@ int kbase_mmu_insert_pages(struct kbase_device *kbdev, struct kbase_mmu_table *m
}
KBASE_EXPORT_TEST_API(kbase_mmu_insert_pages);
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_pages, ERRNO);
int kbase_mmu_insert_pages_skip_status_update(struct kbase_device *kbdev,
struct kbase_mmu_table *mmut, u64 vpfn,
@@ -2702,6 +2721,7 @@ int kbase_mmu_insert_aliased_pages(struct kbase_device *kbdev, struct kbase_mmu_
return 0;
}
+KBASE_ALLOW_ERROR_INJECTION_TEST_API(kbase_mmu_insert_aliased_pages, ERRNO);
#if !MALI_USE_CSF
/**
@@ -2918,15 +2938,25 @@ static void kbase_mmu_update_and_free_parent_pgds(struct kbase_device *kbdev,
kbdev->mmu_mode->entries_invalidate(&current_page[index], 1);
if (current_valid_entries == 1 && current_level != MIDGARD_MMU_LEVEL(0)) {
+ kbdev->mmu_mode->set_num_valid_entries(current_page, 0);
+
kbase_kunmap(p, current_page);
- /* Ensure the cacheline containing the last valid entry
- * of PGD is invalidated from the GPU cache, before the
- * PGD page is freed.
- */
- kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx,
- current_pgd + (index * sizeof(u64)), sizeof(u64),
- flush_op);
+ /* Check if fine grained GPU cache maintenance is being used */
+ if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
+ /* Ensure the invalidated PTE is visible in memory right away */
+ kbase_mmu_sync_pgd_cpu(kbdev,
+ kbase_dma_addr(p) + (index * sizeof(u64)),
+ sizeof(u64));
+ /* Invalidate the GPU cache for the whole PGD page and not just for
+ * the cacheline containing the invalidated PTE, as the PGD page is
+ * going to be freed. There is an extremely remote possibility that
+ * other cachelines (containing all invalid PTEs) of PGD page are
+ * also present in the GPU cache.
+ */
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, current_pgd,
+ 512 * sizeof(u64), KBASE_MMU_OP_FLUSH_PT);
+ }
kbase_mmu_add_to_free_pgds_list(mmut, p);
} else {
@@ -3108,14 +3138,25 @@ static int kbase_mmu_teardown_pgd_pages(struct kbase_device *kbdev, struct kbase
mmu_mode->entries_invalidate(&page[index], pcount);
if (!num_of_valid_entries) {
+ mmu_mode->set_num_valid_entries(page, 0);
+
kbase_kunmap(p, page);
- /* Ensure the cacheline(s) containing the last valid entries
- * of PGD is invalidated from the GPU cache, before the
- * PGD page is freed.
- */
- kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd + (index * sizeof(u64)),
- pcount * sizeof(u64), flush_op);
+ /* Check if fine grained GPU cache maintenance is being used */
+ if (flush_op == KBASE_MMU_OP_FLUSH_PT) {
+ /* Ensure the invalidated ATEs are visible in memory right away */
+ kbase_mmu_sync_pgd_cpu(kbdev,
+ kbase_dma_addr(p) + (index * sizeof(u64)),
+ pcount * sizeof(u64));
+ /* Invalidate the GPU cache for the whole PGD page and not just for
+ * the cachelines containing the invalidated ATEs, as the PGD page
+ * is going to be freed. There is an extremely remote possibility
+ * that other cachelines (containing all invalid ATEs) of PGD page
+ * are also present in the GPU cache.
+ */
+ kbase_mmu_sync_pgd_gpu(kbdev, mmut->kctx, pgd, 512 * sizeof(u64),
+ KBASE_MMU_OP_FLUSH_PT);
+ }
kbase_mmu_add_to_free_pgds_list(mmut, p);
@@ -3272,6 +3313,7 @@ int kbase_mmu_teardown_pages(struct kbase_device *kbdev, struct kbase_mmu_table
return mmu_teardown_pages(kbdev, mmut, vpfn, phys, nr_phys_pages, nr_virt_pages, as_nr,
false);
}
+KBASE_EXPORT_TEST_API(kbase_mmu_teardown_pages);
int kbase_mmu_teardown_imported_pages(struct kbase_device *kbdev, struct kbase_mmu_table *mmut,
u64 vpfn, struct tagged_addr *phys, size_t nr_phys_pages,
diff --git a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
index d19579d..e3ad78d 100644
--- a/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
+++ b/mali_kbase/mmu/mali_kbase_mmu_mode_aarch64.c
@@ -32,7 +32,7 @@
*/
#define ENTRY_IS_ATE_L3 3ULL
#define ENTRY_IS_ATE_L02 1ULL
-#define ENTRY_IS_INVAL 2ULL
+#define ENTRY_IS_INVAL 0ULL
#define ENTRY_IS_PTE 3ULL
#define ENTRY_ACCESS_RW (1ULL << 6) /* bits 6:7 */
diff --git a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
index 7c92505..9e3f789 100644
--- a/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
+++ b/mali_kbase/platform/devicetree/mali_kbase_runtime_pm.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2015-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2015-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/tests/include/kutf/kutf_kprobe.h b/mali_kbase/tests/include/kutf/kutf_kprobe.h
index f75cd77..cdcaa46 100644
--- a/mali_kbase/tests/include/kutf/kutf_kprobe.h
+++ b/mali_kbase/tests/include/kutf/kutf_kprobe.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2023-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
@@ -22,6 +22,8 @@
#ifndef _KUTF_KPROBE_H_
#define _KUTF_KPROBE_H_
+struct dentry;
+
int kutf_kprobe_init(struct dentry *base_dir);
void kutf_kprobe_exit(void);
diff --git a/mali_kbase/thirdparty/mali_kbase_mmap.c b/mali_kbase/thirdparty/mali_kbase_mmap.c
index 1592eab..9fad54d 100644
--- a/mali_kbase/thirdparty/mali_kbase_mmap.c
+++ b/mali_kbase/thirdparty/mali_kbase_mmap.c
@@ -20,18 +20,84 @@
* kbase_context_get_unmapped_area() interface.
*/
+#if (KERNEL_VERSION(6, 1, 0) <= LINUX_VERSION_CODE)
+/**
+ * move_mt_gap() - Search the maple tree for an existing gap of a particular size
+ * immediately before another pre-identified gap.
+ * @gap_start: Pre-identified gap starting address.
+ * @gap_end: Pre-identified gap ending address.
+ * @size: Size of the new gap needed before gap_start.
+ *
+ * This function will search the calling process' maple tree
+ * for another gap, one that is immediately preceding the pre-identified
+ * gap, for a specific size, and upon success it will decrement gap_end
+ * by the specified size, and replace gap_start with the new gap_start of
+ * the newly identified gap.
+ *
+ * Return: true if large enough preceding gap is found, false otherwise.
+ */
+static bool move_mt_gap(unsigned long *gap_start, unsigned long *gap_end, unsigned long size)
+{
+ unsigned long new_gap_start, new_gap_end;
+
+ MA_STATE(mas, &current->mm->mm_mt, 0, 0);
+
+ if (*gap_end < size)
+ return false;
+
+ /* Calculate the gap end for the new, resultant gap */
+ new_gap_end = *gap_end - size;
+
+ /* If the new gap_end (i.e. new VA start address) is larger than gap_start, than the
+ * pre-identified gap already has space to shrink to accommodate the decrease in
+ * gap_end.
+ */
+ if (new_gap_end >= *gap_start) {
+ /* Pre-identified gap already has space - just patch gap_end to new
+ * lower value and exit.
+ */
+ *gap_end = new_gap_end;
+ return true;
+ }
+
+ /* Since the new VA start address (new_gap_end) is below the start of the pre-identified
+ * gap in the maple tree, see if there is a free gap directly before the existing gap, of
+ * the same size as the alignment shift, such that the effective gap found is "extended".
+ * This may be larger than needed but leaves the same distance between gap_end and gap_start
+ * that currently exists.
+ */
+ new_gap_start = *gap_start - size;
+ if (mas_empty_area_rev(&mas, new_gap_start, *gap_start - 1, size)) {
+ /* There's no gap between the new start address needed and the
+ * current start address - so return false to find a new
+ * gap from the maple tree.
+ */
+ return false;
+ }
+ /* Suitable gap found - replace gap_start and gap_end with new values. gap_start takes the
+ * value of the start of new gap found, which now correctly precedes gap_end, and gap_end
+ * takes on the new aligned value that has now been decremented by the requested size.
+ */
+ *gap_start = mas.index;
+ *gap_end = new_gap_end;
+ return true;
+}
+
/**
* align_and_check() - Align the specified pointer to the provided alignment and
- * check that it is still in range.
- * @gap_end: Highest possible start address for allocation (end of gap in
- * address space)
- * @gap_start: Start address of current memory area / gap in address space
- * @info: vm_unmapped_area_info structure passed to caller, containing
- * alignment, length and limits for the allocation
- * @is_shader_code: True if the allocation is for shader code (which has
- * additional alignment requirements)
- * @is_same_4gb_page: True if the allocation needs to reside completely within
- * a 4GB chunk
+ * check that it is still in range. On kernel 6.1 onwards
+ * this function does not require that the initial requested
+ * gap is extended with the maximum size needed to guarantee
+ * an alignment.
+ * @gap_end: Highest possible start address for allocation (end of gap in
+ * address space)
+ * @gap_start: Start address of current memory area / gap in address space
+ * @info: vm_unmapped_area_info structure passed to caller, containing
+ * alignment, length and limits for the allocation
+ * @is_shader_code: True if the allocation is for shader code (which has
+ * additional alignment requirements)
+ * @is_same_4gb_page: True if the allocation needs to reside completely within
+ * a 4GB chunk
*
* Return: true if gap_end is now aligned correctly and is still in range,
* false otherwise
@@ -40,9 +106,94 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
struct vm_unmapped_area_info *info, bool is_shader_code,
bool is_same_4gb_page)
{
+ unsigned long alignment_shift;
+
/* Compute highest gap address at the desired alignment */
- (*gap_end) -= info->length;
- (*gap_end) -= (*gap_end - info->align_offset) & info->align_mask;
+ *gap_end -= info->length;
+ alignment_shift = (*gap_end - info->align_offset) & info->align_mask;
+
+ /* Align desired start VA (gap_end) by calculated alignment shift amount */
+ if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
+ return false;
+ /* Alignment is done so far - check for further alignment requirements */
+
+ if (is_shader_code) {
+ /* Shader code allocations must not start or end on a 4GB boundary */
+ alignment_shift = info->align_offset ? info->align_offset : info->length;
+ if (0 == (*gap_end & BASE_MEM_MASK_4GB)) {
+ if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
+ return false;
+ }
+ if (0 == ((*gap_end + info->length) & BASE_MEM_MASK_4GB)) {
+ if (!move_mt_gap(&gap_start, gap_end, alignment_shift))
+ return false;
+ }
+
+ if (!(*gap_end & BASE_MEM_MASK_4GB) ||
+ !((*gap_end + info->length) & BASE_MEM_MASK_4GB))
+ return false;
+ } else if (is_same_4gb_page) {
+ unsigned long start = *gap_end;
+ unsigned long end = *gap_end + info->length;
+ unsigned long mask = ~((unsigned long)U32_MAX);
+
+ /* Check if 4GB boundary is straddled */
+ if ((start & mask) != ((end - 1) & mask)) {
+ unsigned long offset = end - (end & mask);
+ /* This is to ensure that alignment doesn't get
+ * disturbed in an attempt to prevent straddling at
+ * 4GB boundary. The GPU VA is aligned to 2MB when the
+ * allocation size is > 2MB and there is enough CPU &
+ * GPU virtual space.
+ */
+ unsigned long rounded_offset = ALIGN(offset, info->align_mask + 1);
+
+ if (!move_mt_gap(&gap_start, gap_end, rounded_offset))
+ return false;
+ /* Re-calculate start and end values */
+ start = *gap_end;
+ end = *gap_end + info->length;
+
+ /* The preceding 4GB boundary shall not get straddled,
+ * even after accounting for the alignment, as the
+ * size of allocation is limited to 4GB and the initial
+ * start location was already aligned.
+ */
+ WARN_ON((start & mask) != ((end - 1) & mask));
+ }
+ }
+
+ if ((*gap_end < info->low_limit) || (*gap_end < gap_start))
+ return false;
+
+ return true;
+}
+#else
+/**
+ * align_and_check() - Align the specified pointer to the provided alignment and
+ * check that it is still in range. For Kernel versions below
+ * 6.1, it requires that the length of the alignment is already
+ * extended by a worst-case alignment mask.
+ * @gap_end: Highest possible start address for allocation (end of gap in
+ * address space)
+ * @gap_start: Start address of current memory area / gap in address space
+ * @info: vm_unmapped_area_info structure passed to caller, containing
+ * alignment, length and limits for the allocation
+ * @is_shader_code: True if the allocation is for shader code (which has
+ * additional alignment requirements)
+ * @is_same_4gb_page: True if the allocation needs to reside completely within
+ * a 4GB chunk
+ *
+ * Return: true if gap_end is now aligned correctly and is still in range,
+ * false otherwise
+ */
+static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
+ struct vm_unmapped_area_info *info, bool is_shader_code,
+ bool is_same_4gb_page)
+{
+ /* Compute highest gap address at the desired alignment */
+ *gap_end -= info->length;
+ *gap_end -= (*gap_end - info->align_offset) & info->align_mask;
if (is_shader_code) {
/* Check for 4GB boundary */
@@ -73,6 +224,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
start -= rounded_offset;
end -= rounded_offset;
+ /* Patch gap_end to use new starting address for VA region */
*gap_end = start;
/* The preceding 4GB boundary shall not get straddled,
@@ -89,6 +241,7 @@ static bool align_and_check(unsigned long *gap_end, unsigned long gap_start,
return true;
}
+#endif
/**
* kbase_unmapped_area_topdown() - allocates new areas top-down from
@@ -218,31 +371,27 @@ check_current:
}
}
#else
- unsigned long length, high_limit, gap_start, gap_end;
+ unsigned long high_limit, gap_start, gap_end;
MA_STATE(mas, &current->mm->mm_mt, 0, 0);
- /* Adjust search length to account for worst case alignment overhead */
- length = info->length + info->align_mask;
- if (length < info->length)
- return -ENOMEM;
/*
* Adjust search limits by the desired length.
* See implementation comment at top of unmapped_area().
*/
gap_end = info->high_limit;
- if (gap_end < length)
+ if (gap_end < info->length)
return -ENOMEM;
- high_limit = gap_end - length;
+ high_limit = gap_end - info->length;
if (info->low_limit > high_limit)
return -ENOMEM;
while (true) {
- if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, length))
+ if (mas_empty_area_rev(&mas, info->low_limit, info->high_limit - 1, info->length))
return -ENOMEM;
gap_end = mas.last + 1;
- gap_start = mas.min;
+ gap_start = mas.index;
if (align_and_check(&gap_end, gap_start, info, is_shader_code, is_same_4gb_page))
return gap_end;
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.c b/mali_kbase/tl/mali_kbase_tracepoints.c
index 7427358..34cabbd 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.c
+++ b/mali_kbase/tl/mali_kbase_tracepoints.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software
diff --git a/mali_kbase/tl/mali_kbase_tracepoints.h b/mali_kbase/tl/mali_kbase_tracepoints.h
index f5b5b39..dd23f97 100644
--- a/mali_kbase/tl/mali_kbase_tracepoints.h
+++ b/mali_kbase/tl/mali_kbase_tracepoints.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
*
- * (C) COPYRIGHT 2010-2023 ARM Limited. All rights reserved.
+ * (C) COPYRIGHT 2010-2024 ARM Limited. All rights reserved.
*
* This program is free software and is provided to you under the terms of the
* GNU General Public License version 2 as published by the Free Software