summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWhi copybara merger <whitechapel-automerger@google.com>2022-04-25 22:55:04 +0000
committerTodd Poynor <toddpoynor@google.com>2022-04-26 22:55:00 +0000
commit0f2e98ab5247ba056469c7724151623ce7688879 (patch)
tree923a5df7fbb209dd2ff131d06f8949f91af9dba0
parent2b5db85f5e6ded9d168bee4761c822f35472043a (diff)
downloadjaneiro-0f2e98ab5247ba056469c7724151623ce7688879.tar.gz
[Copybara Auto Merge] Merge branch 'pro' into android13-gs-pixel-5.10
edgetpu: cast access_ok address param to avoid type warning edgetpu: retry buffer map read-only on EFAULT edgetpu: update client pid on wakelock acquire edgetpu: add new firmware metrics Bug: 201243473 Bug: 201243473 Bug: 228193834 Bug: 229311738 GitOrigin-RevId: d4d12d04147cd1bafae07dc4594d3b017028d518 Change-Id: Ia7d65b4d73bd0c65bac845f9047b08c32339a220
-rw-r--r--drivers/edgetpu/edgetpu-device-group.c22
-rw-r--r--drivers/edgetpu/edgetpu-fs.c14
-rw-r--r--drivers/edgetpu/edgetpu-pm.c3
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.c156
-rw-r--r--drivers/edgetpu/edgetpu-usage-stats.h18
5 files changed, 204 insertions, 9 deletions
diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c
index 066bb9c..eb2c39d 100644
--- a/drivers/edgetpu/edgetpu-device-group.c
+++ b/drivers/edgetpu/edgetpu-device-group.c
@@ -1168,10 +1168,16 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
if (size == 0)
return ERR_PTR(-EINVAL);
+ if (!access_ok((const void *)host_addr, size)) {
+ etdev_err(etdev, "invalid address range in buffer map request");
+ return ERR_PTR(-EFAULT);
+ }
offset = host_addr & (PAGE_SIZE - 1);
- /* overflow check */
- if (unlikely((size + offset) / PAGE_SIZE >= UINT_MAX - 1 || size + offset < size))
- return ERR_PTR(-ENOMEM);
+ /* overflow check (should also be caught by access_ok) */
+ if (unlikely((size + offset) / PAGE_SIZE >= UINT_MAX - 1 || size + offset < size)) {
+ etdev_err(etdev, "address overflow in buffer map request");
+ return ERR_PTR(-EFAULT);
+ }
num_pages = DIV_ROUND_UP((size + offset), PAGE_SIZE);
etdev_dbg(etdev, "%s: hostaddr=%#llx pages=%u", __func__, host_addr, num_pages);
/*
@@ -1204,10 +1210,20 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group,
*pnum_pages = num_pages;
return pages;
}
+ if (ret == -EFAULT && !*preadonly) {
+ foll_flags &= ~FOLL_WRITE;
+ *preadonly = true;
+ ret = pin_user_pages_fast(host_addr & PAGE_MASK, num_pages,
+ foll_flags, pages);
+ }
if (ret < 0) {
etdev_dbg(etdev, "pin_user_pages failed %u:%pK-%u: %d",
group->workload_id, (void *)host_addr, num_pages,
ret);
+ if (ret == -EFAULT)
+ etdev_err(etdev,
+ "bad address locking %u pages for %s",
+ num_pages, *preadonly ? "read" : "write");
if (ret != -ENOMEM) {
num_pages = 0;
goto error;
diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c
index e722738..d0efb67 100644
--- a/drivers/edgetpu/edgetpu-fs.c
+++ b/drivers/edgetpu/edgetpu-fs.c
@@ -582,6 +582,14 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client)
struct edgetpu_thermal *thermal = client->etdev->thermal;
LOCK(client);
+ /*
+ * Update client PID; the client may have been passed from the
+ * edgetpu service that originally created it to a new process.
+ * By the time the client holds TPU wakelocks it will have been
+ * passed to the new owning process.
+ */
+ client->pid = current->pid;
+ client->tgid = current->tgid;
edgetpu_thermal_lock(thermal);
if (edgetpu_thermal_is_suspended(thermal)) {
/* TPU is thermal suspended, so fail acquiring wakelock */
@@ -629,7 +637,8 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client)
return 0;
error_unlock:
UNLOCK(client);
- etdev_err(client->etdev, "PID: %d failed to acquire wakelock", client->pid);
+ etdev_err(client->etdev, "client pid %d failed to acquire wakelock",
+ client->pid);
return ret;
}
@@ -658,7 +667,8 @@ edgetpu_ioctl_acquire_ext_mailbox(struct edgetpu_client *client,
ret = edgetpu_chip_acquire_ext_mailbox(client, &ext_mailbox);
if (ret)
- etdev_err(client->etdev, "PID: %d failed to acquire ext mailbox", client->pid);
+ etdev_err(client->etdev, "client pid %d failed to acquire ext mailbox",
+ client->pid);
return ret;
}
diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c
index ae075d0..a71232d 100644
--- a/drivers/edgetpu/edgetpu-pm.c
+++ b/drivers/edgetpu/edgetpu-pm.c
@@ -378,7 +378,8 @@ int edgetpu_pm_suspend(struct edgetpu_dev *etdev)
if (NO_WAKELOCK(lc->client->wakelock) ||
!lc->client->wakelock->req_count)
continue;
- etdev_warn_ratelimited(etdev, "pid %d tgid %d count %d\n",
+ etdev_warn_ratelimited(etdev,
+ "client pid %d tgid %d count %d\n",
lc->client->pid,
lc->client->tgid,
lc->client->wakelock->req_count);
diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c
index 4c6dfcc..41d149d 100644
--- a/drivers/edgetpu/edgetpu-usage-stats.c
+++ b/drivers/edgetpu/edgetpu-usage-stats.c
@@ -570,6 +570,69 @@ static ssize_t hardware_preempt_count_store(struct device *dev, struct device_at
static DEVICE_ATTR(hardware_preempt_count, 0664, hardware_preempt_count_show,
hardware_preempt_count_store);
+static ssize_t hardware_ctx_save_time_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ int64_t val;
+
+ val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_HARDWARE_CTX_SAVE_TIME_US);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t hardware_ctx_save_time_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_HARDWARE_CTX_SAVE_TIME_US);
+ return count;
+}
+static DEVICE_ATTR(hardware_ctx_save_time, 0664, hardware_ctx_save_time_show,
+ hardware_ctx_save_time_store);
+
+static ssize_t scalar_fence_wait_time_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ int64_t val;
+
+ val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_SCALAR_FENCE_WAIT_TIME_US);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t scalar_fence_wait_time_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_SCALAR_FENCE_WAIT_TIME_US);
+ return count;
+}
+static DEVICE_ATTR(scalar_fence_wait_time, 0664, scalar_fence_wait_time_show,
+ scalar_fence_wait_time_store);
+
+static ssize_t long_suspend_count_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ int64_t val;
+
+ val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_LONG_SUSPEND);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t long_suspend_count_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+
+ edgetpu_counter_clear(etdev, EDGETPU_COUNTER_LONG_SUSPEND);
+ return count;
+}
+static DEVICE_ATTR(long_suspend_count, 0664, long_suspend_count_show,
+ long_suspend_count_store);
+
static ssize_t outstanding_commands_max_show(
struct device *dev, struct device_attribute *attr, char *buf)
{
@@ -629,6 +692,93 @@ static ssize_t preempt_depth_max_store(
static DEVICE_ATTR(preempt_depth_max, 0664, preempt_depth_max_show,
preempt_depth_max_store);
+static ssize_t hardware_ctx_save_time_max_show(
+ struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ int64_t val;
+
+ val = edgetpu_usage_get_max_watermark(
+ etdev, EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t hardware_ctx_save_time_max_store(
+ struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+
+ if (ustats) {
+ mutex_lock(&ustats->usage_stats_lock);
+ ustats->max_watermark[EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US] = 0;
+ mutex_unlock(&ustats->usage_stats_lock);
+ }
+
+ return count;
+}
+static DEVICE_ATTR(hardware_ctx_save_time_max, 0664, hardware_ctx_save_time_max_show,
+ hardware_ctx_save_time_max_store);
+
+static ssize_t scalar_fence_wait_time_max_show(
+ struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ int64_t val;
+
+ val = edgetpu_usage_get_max_watermark(
+ etdev, EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t scalar_fence_wait_time_max_store(
+ struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+
+ if (ustats) {
+ mutex_lock(&ustats->usage_stats_lock);
+ ustats->max_watermark[EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US] = 0;
+ mutex_unlock(&ustats->usage_stats_lock);
+ }
+
+ return count;
+}
+static DEVICE_ATTR(scalar_fence_wait_time_max, 0664, scalar_fence_wait_time_max_show,
+ scalar_fence_wait_time_max_store);
+
+static ssize_t suspend_time_max_show(
+ struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ int64_t val;
+
+ val = edgetpu_usage_get_max_watermark(
+ etdev, EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US);
+ return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t suspend_time_max_store(
+ struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct edgetpu_dev *etdev = dev_get_drvdata(dev);
+ struct edgetpu_usage_stats *ustats = etdev->usage_stats;
+
+ if (ustats) {
+ mutex_lock(&ustats->usage_stats_lock);
+ ustats->max_watermark[EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US] = 0;
+ mutex_unlock(&ustats->usage_stats_lock);
+ }
+
+ return count;
+}
+static DEVICE_ATTR(suspend_time_max, 0664, suspend_time_max_show,
+ suspend_time_max_store);
+
static ssize_t fw_thread_stats_show(
struct device *dev, struct device_attribute *attr, char *buf)
{
@@ -681,8 +831,14 @@ static struct attribute *usage_stats_dev_attrs[] = {
&dev_attr_param_cache_miss_count.attr,
&dev_attr_context_preempt_count.attr,
&dev_attr_hardware_preempt_count.attr,
+ &dev_attr_hardware_ctx_save_time.attr,
+ &dev_attr_scalar_fence_wait_time.attr,
+ &dev_attr_long_suspend_count.attr,
&dev_attr_outstanding_commands_max.attr,
&dev_attr_preempt_depth_max.attr,
+ &dev_attr_hardware_ctx_save_time_max.attr,
+ &dev_attr_scalar_fence_wait_time_max.attr,
+ &dev_attr_suspend_time_max.attr,
&dev_attr_fw_thread_stats.attr,
NULL,
};
diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h
index c76899a..6b5d612 100644
--- a/drivers/edgetpu/edgetpu-usage-stats.h
+++ b/drivers/edgetpu/edgetpu-usage-stats.h
@@ -74,8 +74,14 @@ enum edgetpu_usage_counter_type {
EDGETPU_COUNTER_CONTEXT_PREEMPTS = 6,
/* Number of times a hardware preemption occurred. */
EDGETPU_COUNTER_HARDWARE_PREEMPTS = 7,
-
- EDGETPU_COUNTER_COUNT = 8, /* number of counters above */
+ /* Total time(us) spent in saving hw ctx during hw preemption */
+ EDGETPU_COUNTER_HARDWARE_CTX_SAVE_TIME_US = 8,
+ /* Total time(us) spent in waiting to hit scalar fence during hw preemption */
+ EDGETPU_COUNTER_SCALAR_FENCE_WAIT_TIME_US = 9,
+ /* Number of times (firmware)suspend function takes longer than SLA time. */
+ EDGETPU_COUNTER_LONG_SUSPEND = 10,
+
+ EDGETPU_COUNTER_COUNT = 11, /* number of counters above */
};
/* Generic counter. Only reported if it has a value larger than 0. */
@@ -94,9 +100,15 @@ enum edgetpu_usage_max_watermark_type {
EDGETPU_MAX_WATERMARK_OUT_CMDS = 0,
/* Number of preempted contexts at any given time. */
EDGETPU_MAX_WATERMARK_PREEMPT_DEPTH = 1,
+ /* Max time(us) spent in saving hw ctx during hw preemption */
+ EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US = 2,
+ /* Max time(us) spent in waiting to hit scalar fence during hw preemption */
+ EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US = 3,
+ /* Max time(us) spent during (firmware)suspend function. */
+ EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US = 4,
/* Number of watermark types above */
- EDGETPU_MAX_WATERMARK_TYPE_COUNT = 2,
+ EDGETPU_MAX_WATERMARK_TYPE_COUNT = 5,
};
/* Max watermark. Only reported if it has a value larger than 0. */