diff options
author | Whi copybara merger <whitechapel-automerger@google.com> | 2022-04-25 22:55:04 +0000 |
---|---|---|
committer | Todd Poynor <toddpoynor@google.com> | 2022-04-26 22:55:00 +0000 |
commit | 0f2e98ab5247ba056469c7724151623ce7688879 (patch) | |
tree | 923a5df7fbb209dd2ff131d06f8949f91af9dba0 | |
parent | 2b5db85f5e6ded9d168bee4761c822f35472043a (diff) | |
download | janeiro-0f2e98ab5247ba056469c7724151623ce7688879.tar.gz |
[Copybara Auto Merge] Merge branch 'pro' into android13-gs-pixel-5.10
edgetpu: cast access_ok address param to avoid type warning
edgetpu: retry buffer map read-only on EFAULT
edgetpu: update client pid on wakelock acquire
edgetpu: add new firmware metrics
Bug: 201243473
Bug: 201243473
Bug: 228193834
Bug: 229311738
GitOrigin-RevId: d4d12d04147cd1bafae07dc4594d3b017028d518
Change-Id: Ia7d65b4d73bd0c65bac845f9047b08c32339a220
-rw-r--r-- | drivers/edgetpu/edgetpu-device-group.c | 22 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-fs.c | 14 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-pm.c | 3 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-usage-stats.c | 156 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-usage-stats.h | 18 |
5 files changed, 204 insertions, 9 deletions
diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c index 066bb9c..eb2c39d 100644 --- a/drivers/edgetpu/edgetpu-device-group.c +++ b/drivers/edgetpu/edgetpu-device-group.c @@ -1168,10 +1168,16 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group, if (size == 0) return ERR_PTR(-EINVAL); + if (!access_ok((const void *)host_addr, size)) { + etdev_err(etdev, "invalid address range in buffer map request"); + return ERR_PTR(-EFAULT); + } offset = host_addr & (PAGE_SIZE - 1); - /* overflow check */ - if (unlikely((size + offset) / PAGE_SIZE >= UINT_MAX - 1 || size + offset < size)) - return ERR_PTR(-ENOMEM); + /* overflow check (should also be caught by access_ok) */ + if (unlikely((size + offset) / PAGE_SIZE >= UINT_MAX - 1 || size + offset < size)) { + etdev_err(etdev, "address overflow in buffer map request"); + return ERR_PTR(-EFAULT); + } num_pages = DIV_ROUND_UP((size + offset), PAGE_SIZE); etdev_dbg(etdev, "%s: hostaddr=%#llx pages=%u", __func__, host_addr, num_pages); /* @@ -1204,10 +1210,20 @@ static struct page **edgetpu_pin_user_pages(struct edgetpu_device_group *group, *pnum_pages = num_pages; return pages; } + if (ret == -EFAULT && !*preadonly) { + foll_flags &= ~FOLL_WRITE; + *preadonly = true; + ret = pin_user_pages_fast(host_addr & PAGE_MASK, num_pages, + foll_flags, pages); + } if (ret < 0) { etdev_dbg(etdev, "pin_user_pages failed %u:%pK-%u: %d", group->workload_id, (void *)host_addr, num_pages, ret); + if (ret == -EFAULT) + etdev_err(etdev, + "bad address locking %u pages for %s", + num_pages, *preadonly ? "read" : "write"); if (ret != -ENOMEM) { num_pages = 0; goto error; diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c index e722738..d0efb67 100644 --- a/drivers/edgetpu/edgetpu-fs.c +++ b/drivers/edgetpu/edgetpu-fs.c @@ -582,6 +582,14 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client) struct edgetpu_thermal *thermal = client->etdev->thermal; LOCK(client); + /* + * Update client PID; the client may have been passed from the + * edgetpu service that originally created it to a new process. + * By the time the client holds TPU wakelocks it will have been + * passed to the new owning process. + */ + client->pid = current->pid; + client->tgid = current->tgid; edgetpu_thermal_lock(thermal); if (edgetpu_thermal_is_suspended(thermal)) { /* TPU is thermal suspended, so fail acquiring wakelock */ @@ -629,7 +637,8 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client) return 0; error_unlock: UNLOCK(client); - etdev_err(client->etdev, "PID: %d failed to acquire wakelock", client->pid); + etdev_err(client->etdev, "client pid %d failed to acquire wakelock", + client->pid); return ret; } @@ -658,7 +667,8 @@ edgetpu_ioctl_acquire_ext_mailbox(struct edgetpu_client *client, ret = edgetpu_chip_acquire_ext_mailbox(client, &ext_mailbox); if (ret) - etdev_err(client->etdev, "PID: %d failed to acquire ext mailbox", client->pid); + etdev_err(client->etdev, "client pid %d failed to acquire ext mailbox", + client->pid); return ret; } diff --git a/drivers/edgetpu/edgetpu-pm.c b/drivers/edgetpu/edgetpu-pm.c index ae075d0..a71232d 100644 --- a/drivers/edgetpu/edgetpu-pm.c +++ b/drivers/edgetpu/edgetpu-pm.c @@ -378,7 +378,8 @@ int edgetpu_pm_suspend(struct edgetpu_dev *etdev) if (NO_WAKELOCK(lc->client->wakelock) || !lc->client->wakelock->req_count) continue; - etdev_warn_ratelimited(etdev, "pid %d tgid %d count %d\n", + etdev_warn_ratelimited(etdev, + "client pid %d tgid %d count %d\n", lc->client->pid, lc->client->tgid, lc->client->wakelock->req_count); diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c index 4c6dfcc..41d149d 100644 --- a/drivers/edgetpu/edgetpu-usage-stats.c +++ b/drivers/edgetpu/edgetpu-usage-stats.c @@ -570,6 +570,69 @@ static ssize_t hardware_preempt_count_store(struct device *dev, struct device_at static DEVICE_ATTR(hardware_preempt_count, 0664, hardware_preempt_count_show, hardware_preempt_count_store); +static ssize_t hardware_ctx_save_time_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + int64_t val; + + val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_HARDWARE_CTX_SAVE_TIME_US); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); +} + +static ssize_t hardware_ctx_save_time_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + + edgetpu_counter_clear(etdev, EDGETPU_COUNTER_HARDWARE_CTX_SAVE_TIME_US); + return count; +} +static DEVICE_ATTR(hardware_ctx_save_time, 0664, hardware_ctx_save_time_show, + hardware_ctx_save_time_store); + +static ssize_t scalar_fence_wait_time_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + int64_t val; + + val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_SCALAR_FENCE_WAIT_TIME_US); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); +} + +static ssize_t scalar_fence_wait_time_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + + edgetpu_counter_clear(etdev, EDGETPU_COUNTER_SCALAR_FENCE_WAIT_TIME_US); + return count; +} +static DEVICE_ATTR(scalar_fence_wait_time, 0664, scalar_fence_wait_time_show, + scalar_fence_wait_time_store); + +static ssize_t long_suspend_count_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + int64_t val; + + val = edgetpu_usage_get_counter(etdev, EDGETPU_COUNTER_LONG_SUSPEND); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); +} + +static ssize_t long_suspend_count_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + + edgetpu_counter_clear(etdev, EDGETPU_COUNTER_LONG_SUSPEND); + return count; +} +static DEVICE_ATTR(long_suspend_count, 0664, long_suspend_count_show, + long_suspend_count_store); + static ssize_t outstanding_commands_max_show( struct device *dev, struct device_attribute *attr, char *buf) { @@ -629,6 +692,93 @@ static ssize_t preempt_depth_max_store( static DEVICE_ATTR(preempt_depth_max, 0664, preempt_depth_max_show, preempt_depth_max_store); +static ssize_t hardware_ctx_save_time_max_show( + struct device *dev, struct device_attribute *attr, char *buf) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + int64_t val; + + val = edgetpu_usage_get_max_watermark( + etdev, EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); +} + +static ssize_t hardware_ctx_save_time_max_store( + struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + struct edgetpu_usage_stats *ustats = etdev->usage_stats; + + if (ustats) { + mutex_lock(&ustats->usage_stats_lock); + ustats->max_watermark[EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US] = 0; + mutex_unlock(&ustats->usage_stats_lock); + } + + return count; +} +static DEVICE_ATTR(hardware_ctx_save_time_max, 0664, hardware_ctx_save_time_max_show, + hardware_ctx_save_time_max_store); + +static ssize_t scalar_fence_wait_time_max_show( + struct device *dev, struct device_attribute *attr, char *buf) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + int64_t val; + + val = edgetpu_usage_get_max_watermark( + etdev, EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); +} + +static ssize_t scalar_fence_wait_time_max_store( + struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + struct edgetpu_usage_stats *ustats = etdev->usage_stats; + + if (ustats) { + mutex_lock(&ustats->usage_stats_lock); + ustats->max_watermark[EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US] = 0; + mutex_unlock(&ustats->usage_stats_lock); + } + + return count; +} +static DEVICE_ATTR(scalar_fence_wait_time_max, 0664, scalar_fence_wait_time_max_show, + scalar_fence_wait_time_max_store); + +static ssize_t suspend_time_max_show( + struct device *dev, struct device_attribute *attr, char *buf) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + int64_t val; + + val = edgetpu_usage_get_max_watermark( + etdev, EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US); + return scnprintf(buf, PAGE_SIZE, "%llu\n", val); +} + +static ssize_t suspend_time_max_store( + struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct edgetpu_dev *etdev = dev_get_drvdata(dev); + struct edgetpu_usage_stats *ustats = etdev->usage_stats; + + if (ustats) { + mutex_lock(&ustats->usage_stats_lock); + ustats->max_watermark[EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US] = 0; + mutex_unlock(&ustats->usage_stats_lock); + } + + return count; +} +static DEVICE_ATTR(suspend_time_max, 0664, suspend_time_max_show, + suspend_time_max_store); + static ssize_t fw_thread_stats_show( struct device *dev, struct device_attribute *attr, char *buf) { @@ -681,8 +831,14 @@ static struct attribute *usage_stats_dev_attrs[] = { &dev_attr_param_cache_miss_count.attr, &dev_attr_context_preempt_count.attr, &dev_attr_hardware_preempt_count.attr, + &dev_attr_hardware_ctx_save_time.attr, + &dev_attr_scalar_fence_wait_time.attr, + &dev_attr_long_suspend_count.attr, &dev_attr_outstanding_commands_max.attr, &dev_attr_preempt_depth_max.attr, + &dev_attr_hardware_ctx_save_time_max.attr, + &dev_attr_scalar_fence_wait_time_max.attr, + &dev_attr_suspend_time_max.attr, &dev_attr_fw_thread_stats.attr, NULL, }; diff --git a/drivers/edgetpu/edgetpu-usage-stats.h b/drivers/edgetpu/edgetpu-usage-stats.h index c76899a..6b5d612 100644 --- a/drivers/edgetpu/edgetpu-usage-stats.h +++ b/drivers/edgetpu/edgetpu-usage-stats.h @@ -74,8 +74,14 @@ enum edgetpu_usage_counter_type { EDGETPU_COUNTER_CONTEXT_PREEMPTS = 6, /* Number of times a hardware preemption occurred. */ EDGETPU_COUNTER_HARDWARE_PREEMPTS = 7, - - EDGETPU_COUNTER_COUNT = 8, /* number of counters above */ + /* Total time(us) spent in saving hw ctx during hw preemption */ + EDGETPU_COUNTER_HARDWARE_CTX_SAVE_TIME_US = 8, + /* Total time(us) spent in waiting to hit scalar fence during hw preemption */ + EDGETPU_COUNTER_SCALAR_FENCE_WAIT_TIME_US = 9, + /* Number of times (firmware)suspend function takes longer than SLA time. */ + EDGETPU_COUNTER_LONG_SUSPEND = 10, + + EDGETPU_COUNTER_COUNT = 11, /* number of counters above */ }; /* Generic counter. Only reported if it has a value larger than 0. */ @@ -94,9 +100,15 @@ enum edgetpu_usage_max_watermark_type { EDGETPU_MAX_WATERMARK_OUT_CMDS = 0, /* Number of preempted contexts at any given time. */ EDGETPU_MAX_WATERMARK_PREEMPT_DEPTH = 1, + /* Max time(us) spent in saving hw ctx during hw preemption */ + EDGETPU_MAX_WATERMARK_HARDWARE_CTX_SAVE_TIME_US = 2, + /* Max time(us) spent in waiting to hit scalar fence during hw preemption */ + EDGETPU_MAX_WATERMARK_SCALAR_FENCE_WAIT_TIME_US = 3, + /* Max time(us) spent during (firmware)suspend function. */ + EDGETPU_MAX_WATERMARK_SUSPEND_TIME_US = 4, /* Number of watermark types above */ - EDGETPU_MAX_WATERMARK_TYPE_COUNT = 2, + EDGETPU_MAX_WATERMARK_TYPE_COUNT = 5, }; /* Max watermark. Only reported if it has a value larger than 0. */ |