summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZuma copybara merger <zuma-automerger@google.com>2023-01-06 11:03:50 +0000
committerCopybara-Service <copybara-worker@google.com>2023-01-06 07:18:52 -0800
commita7120e148f6cf0a69f886a65eab5a4f6e325fee2 (patch)
tree47fce1405b3e110288a7e90eadd012afa1a99156
parentba17d8c15c88c9fef00f8c38ce164f4e88aea813 (diff)
downloadrio-a7120e148f6cf0a69f886a65eab5a4f6e325fee2.tar.gz
[Copybara Auto Merge] Merge branch zuma into android14-gs-pixel-5.15
gcip: correct path of gcip-dma-fence.h Bug: 258876786 gcip: add gcip-dma-fence.h Bug: 258876786 (repeat) edgetpu: Rename cooling to thermal edgetpu: Use edgetpu_thermal_{lock,unlock} edgetpu: Fix thermal deadlock Bug: 262790767 gcip: introduce firmware crash type Bug: 237739631 gcip: style fix up Signed-off-by: Zuma copybara merger <zuma-automerger@google.com> GitOrigin-RevId: bd9eb44d1a6854fcd79ddc0a31387dc5cf48ca11 Change-Id: Idf4cb6f5da868d9c840a0c8903a0d87b3e485742
-rw-r--r--drivers/edgetpu/edgetpu-fs.c22
-rw-r--r--drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-alloc-helper.c5
-rw-r--r--drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-image-config.c2
-rw-r--r--drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-dma-fence.h135
-rw-r--r--drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-firmware.h16
-rw-r--r--drivers/edgetpu/mobile-thermal.c98
6 files changed, 216 insertions, 62 deletions
diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c
index 03b07f4..43b734f 100644
--- a/drivers/edgetpu/edgetpu-fs.c
+++ b/drivers/edgetpu/edgetpu-fs.c
@@ -498,7 +498,7 @@ static int edgetpu_ioctl_release_wakelock(struct edgetpu_client *client)
static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client)
{
int count = 0;
- int ret;
+ int ret = 0;
struct edgetpu_thermal *thermal = client->etdev->thermal;
trace_edgetpu_acquire_wakelock_start(current->pid);
@@ -513,21 +513,23 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client)
client->pid = current->pid;
client->tgid = current->tgid;
edgetpu_thermal_lock(thermal);
- if (edgetpu_thermal_is_suspended(thermal)) {
+ if (edgetpu_thermal_is_suspended(thermal))
/* TPU is thermal suspended, so fail acquiring wakelock */
ret = -EAGAIN;
+ edgetpu_thermal_unlock(thermal);
+
+ if (ret) {
etdev_warn_ratelimited(client->etdev,
"wakelock acquire rejected due to thermal suspend");
- edgetpu_thermal_unlock(thermal);
goto error_client_unlock;
- } else {
- ret = edgetpu_pm_get(client->etdev->pm);
- edgetpu_thermal_unlock(thermal);
- if (ret) {
- etdev_warn(client->etdev, "pm_get failed (%d)", ret);
- goto error_client_unlock;
- }
}
+
+ ret = edgetpu_pm_get(client->etdev->pm);
+ if (ret) {
+ etdev_warn(client->etdev, "pm_get failed (%d)", ret);
+ goto error_client_unlock;
+ }
+
edgetpu_wakelock_lock(client->wakelock);
count = edgetpu_wakelock_acquire(client->wakelock);
if (count < 0) {
diff --git a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-alloc-helper.c b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-alloc-helper.c
index f79149f..33c95e2 100644
--- a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-alloc-helper.c
+++ b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-alloc-helper.c
@@ -25,9 +25,10 @@ static int gcip_vmalloc_to_pages(void *mem, size_t count, struct page **pages)
size_t i = 0;
while (count--) {
- pages[i++] = vmalloc_to_page(mem);
- if (!pages[i - 1])
+ pages[i] = vmalloc_to_page(mem);
+ if (!pages[i])
return -ENOMEM;
+ i++;
mem += PAGE_SIZE;
}
return 0;
diff --git a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-image-config.c b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-image-config.c
index 5fed69c..312bbdc 100644
--- a/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-image-config.c
+++ b/drivers/edgetpu/gcip-kernel-driver/drivers/gcip/gcip-image-config.c
@@ -19,7 +19,7 @@
#define ADDR_MASK ~(BIT(ADDR_SHIFT) - 1u)
/* used by ns_iommu_mappings */
-#define CONFIG_TO_MBSIZE(a) (((a)&NS_SIZE_MASK) << 20)
+#define CONFIG_TO_MBSIZE(a) (((a) & NS_SIZE_MASK) << 20)
/* used by iommu_mappings */
static inline __u32 config_to_size(__u32 cfg)
diff --git a/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-dma-fence.h b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-dma-fence.h
new file mode 100644
index 0000000..a46bcbb
--- /dev/null
+++ b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-dma-fence.h
@@ -0,0 +1,135 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GCIP support of DMA fences.
+ *
+ * Copyright (C) 2023 Google LLC
+ */
+
+#ifndef __GCIP_DMA_FENCE_H__
+#define __GCIP_DMA_FENCE_H__
+
+#include <linux/device.h>
+#include <linux/dma-fence.h>
+#include <linux/seq_file.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define GCIP_FENCE_TIMELINE_NAME_LEN 128
+
+/* Used before accessing the list headed by mgr->fence_list_head. */
+#define GCIP_DMA_FENCE_LIST_LOCK(mgr, flags) spin_lock_irqsave(&mgr->fence_list_lock, flags)
+#define GCIP_DMA_FENCE_LIST_UNLOCK(mgr, flags) spin_lock_irqrestore(&mgr->fence_list_lock, flags)
+
+/*
+ * A macro to loop through all fences under a gcip_dma_fence_manager.
+ * @mgr: struct gcip_dma_fence_manager
+ * @gfence: struct gcip_dma_fence
+ *
+ * This macro must be wrapped by GCIP_DMA_FENCE_LIST_(UN)LOCK.
+ */
+#define gcip_for_each_fence(mgr, gfence) \
+ list_for_each_entry(gfence, &mgr->fence_list_head, fence_list)
+
+struct gcip_dma_fence_manager {
+ /* The list of all fence objects for debugging. */
+ struct list_head fence_list_head;
+ /* Protects the list headed by @fence_list_head. */
+ spinlock_t fence_list_lock;
+ /* For logging. */
+ struct device *dev;
+};
+
+struct gcip_dma_fence {
+ struct dma_fence fence;
+ /* The manager used to init this object. */
+ struct gcip_dma_fence_manager *mgr;
+ char timeline_name[GCIP_FENCE_TIMELINE_NAME_LEN];
+ /* Protects @fence. */
+ spinlock_t lock;
+ /* Is protected by manager->fence_list_lock. */
+ struct list_head fence_list;
+};
+
+struct gcip_dma_fence_data {
+ /*
+ * A null-terminated string with length less than GCIP_FENCE_TIMELINE_NAME_LEN.
+ * The content of this buffer will be copied so it's fine to release this pointer after
+ * the gcip_dma_fence_init() call.
+ */
+ char *timeline_name;
+ /* The DMA fence operators to initialize the fence with. */
+ const struct dma_fence_ops *ops;
+ /* The sequence number to initialize the fence with. */
+ u32 seqno;
+ /* Output: The fd of the new sync_file with the new fence. */
+ int fence;
+ /*
+ * The callback to be called after @gfence is initialized, before an FD has been installed.
+ * Returns 0 on success. A non-zero return value will revert the initialization of
+ * @gfence and the returned error is returned by gcip_dma_fence_init().
+ *
+ * There is no 'before_exit' callback because the user is supposed to set a custom
+ * dma_fence_ops.release callback which does the revert of after_init and then call
+ * gcip_dma_fence_exit().
+ *
+ * This callback is optional.
+ */
+ int (*after_init)(struct gcip_dma_fence *gfence);
+};
+
+/*
+ * Allocates and returns a GCIP DMA fence manager. Memory is allocated as @dev managed so there is
+ * no release function of the manager.
+ *
+ * Returns a negative errno on error.
+ */
+struct gcip_dma_fence_manager *gcip_dma_fence_manager_create(struct device *dev);
+
+/* Helpers for setting dma_fence_ops. */
+
+/* Returns the timeline name. @fence must be contained within a gcip_dma_fence. */
+const char *gcip_dma_fence_get_timeline_name(struct dma_fence *fence);
+
+/* Always return true. Can be used for the enable_signaling callback. */
+bool gcip_dma_fence_always_true(struct dma_fence *fence);
+
+/* End of helpers for setting dma_fence_ops. */
+
+int gcip_dma_fence_init(struct gcip_dma_fence_manager *mgr, struct gcip_dma_fence *gfence,
+ struct gcip_dma_fence_data *data);
+
+/*
+ * Reverts gcip_dma_fence_init(). Removes @gfence from the manager's list.
+ * This function will not free @gfence.
+ */
+void gcip_dma_fence_exit(struct gcip_dma_fence *gfence);
+
+/*
+ * Sets @status to the DMA fence status of DMA fence FD @fence.
+ * @status is only set when this function returns 0.
+ *
+ * It is OK if @fence does not refer to a gcip_dma_fence.
+ *
+ * Returns 0 on success. Otherwise a negative errno.
+ */
+int gcip_dma_fence_status(int fence, int *status);
+
+/*
+ * Signals the fence error of DMA fence FD @fence.
+ *
+ * If the fence has been signaled,
+ * - if @ignore_signaled is true, this function does nothing.
+ * - otherwise, returns -EALREADY.
+ *
+ * It is OK if @fence does not refer to a gcip_dma_fence.
+ *
+ * Returns 0 on success. Otherwise a negative errno.
+ */
+int gcip_dma_fence_signal(int fence, int error, bool ignore_signaled);
+/* Identical to gcip_dma_fence_signal except this function accepts gcip_dma_fence as the input. */
+int gcip_dma_fenceptr_signal(struct gcip_dma_fence *gfence, int error, bool ignore_signaled);
+
+/* Prints data of @gfence to the sequence file @s. For debug purpose only. */
+void gcip_dma_fence_show(struct gcip_dma_fence *gfence, struct seq_file *s);
+
+#endif /* __GCIP_DMA_FENCE_H__ */
diff --git a/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-firmware.h b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-firmware.h
index b856e5c..012a79a 100644
--- a/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-firmware.h
+++ b/drivers/edgetpu/gcip-kernel-driver/include/gcip/gcip-firmware.h
@@ -35,6 +35,22 @@ enum gcip_fw_flavor {
GCIP_FW_FLAVOR_CUSTOM = 4,
};
+/* Type of firmware crash which will be sent by GCIP_RKCI_FIRMWARE_CRASH RKCI command. */
+enum gcip_fw_crash_type {
+ /* Assert happened. */
+ GCIP_FW_CRASH_ASSERT_FAIL = 0,
+ /* Data abort exception. */
+ GCIP_FW_CRASH_DATA_ABORT = 1,
+ /* Prefetch abort exception. */
+ GCIP_FW_CRASH_PREFETCH_ABORT = 2,
+ /* Undefined exception. */
+ GCIP_FW_CRASH_UNDEFINED_EXCEPTION = 3,
+ /* Exception which cannot be recovered by the firmware itself. */
+ GCIP_FW_CRASH_UNRECOVERABLE_FAULT = 4,
+ /* Used in debug dump. */
+ GCIP_FW_CRASH_DUMMY_CRASH_TYPE = 0xFF,
+};
+
/* Firmware info filled out via KCI FIRMWARE_INFO command. */
struct gcip_fw_info {
uint64_t fw_build_time; /* BuildData::Timestamp() */
diff --git a/drivers/edgetpu/mobile-thermal.c b/drivers/edgetpu/mobile-thermal.c
index ec7986e..df3a6e0 100644
--- a/drivers/edgetpu/mobile-thermal.c
+++ b/drivers/edgetpu/mobile-thermal.c
@@ -47,20 +47,20 @@ static int edgetpu_get_max_state(struct thermal_cooling_device *cdev, unsigned l
static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev, unsigned long state_original)
{
int ret;
- struct edgetpu_thermal *cooling = cdev->devdata;
- struct device *dev = cooling->dev;
+ struct edgetpu_thermal *thermal = cdev->devdata;
+ struct device *dev = thermal->dev;
unsigned long pwr_state;
- if (state_original >= cooling->tpu_num_states) {
+ if (state_original >= thermal->tpu_num_states) {
dev_err(dev, "%s: invalid cooling state %lu\n", __func__, state_original);
return -EINVAL;
}
- state_original = max(cooling->sysfs_req, state_original);
+ state_original = max(thermal->sysfs_req, state_original);
- mutex_lock(&cooling->lock);
+ edgetpu_thermal_lock(thermal);
pwr_state = state_pwr_map[state_original].state;
- if (state_original == cooling->cooling_state) {
+ if (state_original == thermal->cooling_state) {
ret = -EALREADY;
goto out;
}
@@ -73,43 +73,43 @@ static int edgetpu_set_cur_state(struct thermal_cooling_device *cdev, unsigned l
if (pwr_state < TPU_ACTIVE_UUD) {
dev_warn_ratelimited(dev,
"Setting lowest DVFS state, waiting for FW to shutdown TPU");
- ret = edgetpu_thermal_kci_if_powered(cooling->etdev, TPU_ACTIVE_UUD);
+ ret = edgetpu_thermal_kci_if_powered(thermal->etdev, TPU_ACTIVE_UUD);
} else {
- ret = edgetpu_thermal_kci_if_powered(cooling->etdev, pwr_state);
+ ret = edgetpu_thermal_kci_if_powered(thermal->etdev, pwr_state);
}
if (ret) {
dev_err(dev, "error setting tpu policy: %d\n", ret);
goto out;
}
- cooling->cooling_state = state_original;
+ thermal->cooling_state = state_original;
out:
- mutex_unlock(&cooling->lock);
+ edgetpu_thermal_unlock(thermal);
return ret;
}
static int edgetpu_get_cur_state(struct thermal_cooling_device *cdev, unsigned long *state)
{
int ret = 0;
- struct edgetpu_thermal *cooling = cdev->devdata;
+ struct edgetpu_thermal *thermal = cdev->devdata;
- *state = cooling->cooling_state;
- if (*state < cooling->tpu_num_states)
+ *state = thermal->cooling_state;
+ if (*state < thermal->tpu_num_states)
return 0;
- dev_warn(cooling->dev, "Unknown cooling state: %lu, resetting\n", *state);
- mutex_lock(&cooling->lock);
+ dev_warn(thermal->dev, "Unknown cooling state: %lu, resetting\n", *state);
+ edgetpu_thermal_lock(thermal);
- ret = edgetpu_thermal_kci_if_powered(cooling->etdev, TPU_ACTIVE_NOM);
+ ret = edgetpu_thermal_kci_if_powered(thermal->etdev, TPU_ACTIVE_NOM);
if (ret) {
- dev_err(cooling->dev, "error setting tpu policy: %d\n", ret);
- mutex_unlock(&cooling->lock);
+ dev_err(thermal->dev, "error setting tpu policy: %d\n", ret);
+ edgetpu_thermal_unlock(thermal);
return ret;
}
/* setting back to "no cooling" */
- cooling->cooling_state = 0;
- mutex_unlock(&cooling->lock);
+ thermal->cooling_state = 0;
+ edgetpu_thermal_unlock(thermal);
return 0;
}
@@ -137,10 +137,10 @@ static int edgetpu_get_requested_power(struct thermal_cooling_device *cdev,
u32 *power)
{
unsigned long state_original;
- struct edgetpu_thermal *cooling = cdev->devdata;
+ struct edgetpu_thermal *thermal = cdev->devdata;
- state_original = edgetpu_soc_pm_get_rate(cooling->etdev, 0);
- return edgetpu_state2power_internal(state_original, power, cooling);
+ state_original = edgetpu_soc_pm_get_rate(thermal->etdev, 0);
+ return edgetpu_state2power_internal(state_original, power, thermal);
}
static int edgetpu_state2power(struct thermal_cooling_device *cdev,
@@ -149,14 +149,14 @@ static int edgetpu_state2power(struct thermal_cooling_device *cdev,
#endif
unsigned long state, u32 *power)
{
- struct edgetpu_thermal *cooling = cdev->devdata;
+ struct edgetpu_thermal *thermal = cdev->devdata;
- if (state >= cooling->tpu_num_states) {
- dev_err(cooling->dev, "%s: invalid state: %lu\n", __func__, state);
+ if (state >= thermal->tpu_num_states) {
+ dev_err(thermal->dev, "%s: invalid state: %lu\n", __func__, state);
return -EINVAL;
}
- return edgetpu_state2power_internal(state_pwr_map[state].state, power, cooling);
+ return edgetpu_state2power_internal(state_pwr_map[state].state, power, thermal);
}
static int edgetpu_power2state(struct thermal_cooling_device *cdev,
@@ -261,12 +261,12 @@ static ssize_t user_vote_show(struct device *dev, struct device_attribute *attr,
{
struct thermal_cooling_device *cdev =
container_of(dev, struct thermal_cooling_device, device);
- struct edgetpu_thermal *cooling = cdev->devdata;
+ struct edgetpu_thermal *thermal = cdev->devdata;
- if (!cooling)
+ if (!thermal)
return -ENODEV;
- return sysfs_emit(buf, "%lu\n", cooling->sysfs_req);
+ return sysfs_emit(buf, "%lu\n", thermal->sysfs_req);
}
static ssize_t user_vote_store(struct device *dev, struct device_attribute *attr, const char *buf,
@@ -274,22 +274,22 @@ static ssize_t user_vote_store(struct device *dev, struct device_attribute *attr
{
struct thermal_cooling_device *cdev =
container_of(dev, struct thermal_cooling_device, device);
- struct edgetpu_thermal *cooling = cdev->devdata;
+ struct edgetpu_thermal *thermal = cdev->devdata;
int ret;
unsigned long state;
- if (!cooling)
+ if (!thermal)
return -ENODEV;
ret = kstrtoul(buf, 0, &state);
if (ret)
return ret;
- if (state >= cooling->tpu_num_states)
+ if (state >= thermal->tpu_num_states)
return -EINVAL;
mutex_lock(&cdev->lock);
- cooling->sysfs_req = state;
+ thermal->sysfs_req = state;
cdev->updated = false;
mutex_unlock(&cdev->lock);
@@ -377,19 +377,19 @@ int edgetpu_thermal_suspend(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct edgetpu_dev *etdev = platform_get_drvdata(pdev);
- struct edgetpu_thermal *cooling = etdev->thermal;
+ struct edgetpu_thermal *thermal = etdev->thermal;
int ret = 0;
- if (IS_ERR(cooling))
- return PTR_ERR(cooling);
- mutex_lock(&cooling->lock);
+ if (IS_ERR(thermal))
+ return PTR_ERR(thermal);
+ edgetpu_thermal_lock(thermal);
/*
* Always set as suspended even when the FW cannot handle the KCI (it's dead for some
* unknown reasons) because we still want to prevent the runtime from using TPU.
*/
- cooling->thermal_suspended = true;
+ thermal->thermal_suspended = true;
ret = edgetpu_thermal_kci_if_powered(etdev, TPU_OFF);
- mutex_unlock(&cooling->lock);
+ edgetpu_thermal_unlock(thermal);
return ret;
}
@@ -397,24 +397,24 @@ int edgetpu_thermal_resume(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
struct edgetpu_dev *etdev = platform_get_drvdata(pdev);
- struct edgetpu_thermal *cooling = etdev->thermal;
+ struct edgetpu_thermal *thermal = etdev->thermal;
int ret = 0;
- if (IS_ERR(cooling))
- return PTR_ERR(cooling);
- mutex_lock(&cooling->lock);
+ if (IS_ERR(thermal))
+ return PTR_ERR(thermal);
+ edgetpu_thermal_lock(thermal);
- if (cooling->cooling_state >= cooling->tpu_num_states)
- cooling->cooling_state = 0;
+ if (thermal->cooling_state >= thermal->tpu_num_states)
+ thermal->cooling_state = 0;
- ret = edgetpu_thermal_kci_if_powered(etdev, state_pwr_map[cooling->cooling_state].state);
+ ret = edgetpu_thermal_kci_if_powered(etdev, state_pwr_map[thermal->cooling_state].state);
/*
* Unlike edgetpu_thermal_suspend(), only set the device is resumed if the FW handled the
* KCI request.
*/
if (!ret)
- cooling->thermal_suspended = false;
- mutex_unlock(&cooling->lock);
+ thermal->thermal_suspended = false;
+ edgetpu_thermal_unlock(thermal);
return ret;
}