diff options
author | Nishant Prajapati <nishantpjpt@google.com> | 2021-07-01 22:50:51 +0530 |
---|---|---|
committer | Nishant Prajapati <nishantpjpt@google.com> | 2021-07-02 17:54:59 +0530 |
commit | 418bcd71862114b9c810d326c53917a885d24b15 (patch) | |
tree | 7927093d10985ec420fab7b313b921374bf8edf5 | |
parent | f0240508d904897d78944ff773aef88efae8440f (diff) | |
download | janeiro-418bcd71862114b9c810d326c53917a885d24b15.tar.gz |
Merge remote-tracking branch 'pro' into android-gs-cloudripper-5.10
* origin/darwinn-2.0: (64 commits)
edgetpu: janeiro: fix mailbox offset calculation
edgetpu: unittests: add get_fatal_errors ioctl tests
edgetpu: unittests: add iremap-pool tests
edgetpu: detach mailbox when activation failed
edgetpu: reduce IOMMU fault reports severity
edgetpu: unittest handle enhanced open device KCI
edgetpu: gem5: Increasing the mask of gsa
edgetpu: unittests: upgrade to KUnit 5.10
edgetpu: unittests: fix tests with disabled IOMMU
edgetpu: allow buffer unmapping on errored groups
edgetpu: abrolhos return actual error for throttling kci
edgetpu: handle job lockup notification from firmware
edgetpu: add API to send fatal error notifications to a specific group
edgetpu: add fatal error event for firmware-detected job timeout
edgetpu: Improve the check in edgetpu_thermal_* functions
edgetpu: Modify the log function due to thermal suspended
edgetpu: fail wakelock acquiring if suspended
edgetpu: ignore offset arg in edgetpu_map_dmabuf
edgetpu: hermosa assume single die when config not set
edgetpu: unittests: add thermal test
edgetpu: abrolhos: hook exynos acpm functions
edgetpu: unittests: add helper of device tree
...
Signed-off-by: Nishant Prajapati <nishantpjpt@google.com>
Change-Id: I270c8e29f14ea9585ea258741130f060b56fc410
-rw-r--r-- | drivers/edgetpu/edgetpu-core.c | 7 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-device-group.c | 191 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-device-group.h | 19 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-dmabuf.c | 36 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-firmware.c | 51 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-fs.c | 57 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-google-iommu.c | 28 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-internal.h | 15 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-kci.c | 91 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-kci.h | 35 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-mailbox.c | 395 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-mailbox.h | 63 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-mapping.c | 2 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-mmu.h | 5 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-thermal.h | 34 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu-usage-stats.c | 1 | ||||
-rw-r--r-- | drivers/edgetpu/edgetpu.h | 37 | ||||
-rw-r--r-- | drivers/edgetpu/janeiro-device.c | 22 | ||||
-rw-r--r-- | drivers/edgetpu/janeiro-platform.c | 7 | ||||
-rw-r--r-- | drivers/edgetpu/janeiro-pm.c | 8 | ||||
-rw-r--r-- | drivers/edgetpu/janeiro/config-mailbox.h | 52 | ||||
-rw-r--r-- | drivers/edgetpu/janeiro/config.h | 4 |
22 files changed, 877 insertions, 283 deletions
diff --git a/drivers/edgetpu/edgetpu-core.c b/drivers/edgetpu/edgetpu-core.c index 0820e95..7701a71 100644 --- a/drivers/edgetpu/edgetpu-core.c +++ b/drivers/edgetpu/edgetpu-core.c @@ -323,6 +323,7 @@ static struct edgetpu_mailbox_manager_desc mailbox_manager_desc = { .num_mailbox = EDGETPU_NUM_MAILBOXES, .num_vii_mailbox = EDGETPU_NUM_VII_MAILBOXES, .num_p2p_mailbox = EDGETPU_NUM_P2P_MAILBOXES, + .num_ext_mailbox = EDGETPU_NUM_EXT_MAILBOXES, .get_context_csr_base = edgetpu_mailbox_get_context_csr_base, .get_cmd_queue_csr_base = edgetpu_mailbox_get_cmd_queue_csr_base, .get_resp_queue_csr_base = edgetpu_mailbox_get_resp_queue_csr_base, @@ -370,6 +371,7 @@ int edgetpu_device_add(struct edgetpu_dev *etdev, INIT_LIST_HEAD(&etdev->groups); etdev->n_groups = 0; etdev->group_join_lockout = false; + etdev->vcid_pool = (1u << EDGETPU_NUM_VCIDS) - 1; mutex_init(&etdev->state_lock); etdev->state = ETDEV_STATE_NOFW; @@ -582,11 +584,12 @@ void edgetpu_handle_firmware_crash(struct edgetpu_dev *etdev, if (crash_type == EDGETPU_FW_CRASH_UNRECOV_FAULT) { etdev_err(etdev, "firmware unrecoverable crash"); etdev->firmware_crash_count++; - edgetpu_fatal_error_notify(etdev); + edgetpu_fatal_error_notify(etdev, EDGETPU_ERROR_FW_CRASH); /* Restart firmware without chip reset */ edgetpu_watchdog_bite(etdev, false); } else { - etdev_err(etdev, "firmware crash event: %u", crash_type); + etdev_err(etdev, "firmware non-fatal crash event: %u", + crash_type); } } diff --git a/drivers/edgetpu/edgetpu-device-group.c b/drivers/edgetpu/edgetpu-device-group.c index ae3a8c3..6172b2c 100644 --- a/drivers/edgetpu/edgetpu-device-group.c +++ b/drivers/edgetpu/edgetpu-device-group.c @@ -6,7 +6,7 @@ */ #include <linux/atomic.h> -#include <linux/bits.h> +#include <linux/bitops.h> #include <linux/dma-direction.h> #include <linux/dma-mapping.h> #include <linux/eventfd.h> @@ -94,7 +94,7 @@ static int edgetpu_kci_join_group_worker(struct kci_worker_param *param) etdev_dbg(etdev, "%s: join group %u %u/%u", __func__, group->workload_id, i + 1, group->n_clients); - return edgetpu_kci_join_group(etdev->kci, etdev, group->n_clients, i); + return edgetpu_kci_join_group(etdev->kci, group->n_clients, i); } static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param) @@ -111,7 +111,12 @@ static int edgetpu_kci_leave_group_worker(struct kci_worker_param *param) #endif /* EDGETPU_HAS_MCP */ -static int edgetpu_group_kci_open_device(struct edgetpu_device_group *group) +/* + * Activates the VII mailbox @group owns. + * + * Caller holds group->lock. + */ +static int edgetpu_group_activate(struct edgetpu_device_group *group) { u8 mailbox_id; int ret; @@ -119,14 +124,22 @@ static int edgetpu_group_kci_open_device(struct edgetpu_device_group *group) if (edgetpu_group_mailbox_detached_locked(group)) return 0; mailbox_id = edgetpu_group_context_id_locked(group); - ret = edgetpu_mailbox_activate(group->etdev, BIT(mailbox_id)); + ret = edgetpu_mailbox_activate(group->etdev, mailbox_id, group->vcid, !group->activated); if (ret) - etdev_err(group->etdev, "activate mailbox failed with %d", ret); + etdev_err(group->etdev, "activate mailbox for VCID %d failed with %d", group->vcid, + ret); + else + group->activated = true; atomic_inc(&group->etdev->job_count); return ret; } -static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group) +/* + * Deactivates the VII mailbox @group owns. + * + * Caller holds group->lock. + */ +static void edgetpu_group_deactivate(struct edgetpu_device_group *group) { u8 mailbox_id; int ret; @@ -134,10 +147,10 @@ static void edgetpu_group_kci_close_device(struct edgetpu_device_group *group) if (edgetpu_group_mailbox_detached_locked(group)) return; mailbox_id = edgetpu_group_context_id_locked(group); - ret = edgetpu_mailbox_deactivate(group->etdev, BIT(mailbox_id)); + ret = edgetpu_mailbox_deactivate(group->etdev, mailbox_id); if (ret) - etdev_err(group->etdev, "deactivate mailbox failed with %d", - ret); + etdev_err(group->etdev, "deactivate mailbox for VCID %d failed with %d", + group->vcid, ret); return; } @@ -160,12 +173,12 @@ static void edgetpu_device_group_kci_leave(struct edgetpu_device_group *group) * Theoretically we don't need to check @dev_inaccessible here. * @dev_inaccessible is true implies the client has wakelock count zero, under such case * edgetpu_mailbox_deactivate() has been called on releasing the wakelock and therefore this - * edgetpu_group_kci_close_device() call won't send any KCI. + * edgetpu_group_deactivate() call won't send any KCI. * Still have a check here in case this function does CSR programming other than calling * edgetpu_mailbox_deactivate() someday. */ if (!group->dev_inaccessible) - edgetpu_group_kci_close_device(group); + edgetpu_group_deactivate(group); #else /* !EDGETPU_HAS_MULTI_GROUPS */ struct kci_worker_param *params = kmalloc_array(group->n_clients, sizeof(*params), GFP_KERNEL); @@ -207,7 +220,7 @@ static int edgetpu_device_group_kci_finalized(struct edgetpu_device_group *group) { #ifdef EDGETPU_HAS_MULTI_GROUPS - return edgetpu_group_kci_open_device(group); + return edgetpu_group_activate(group); #else /* !EDGETPU_HAS_MULTI_GROUPS */ struct kci_worker_param *params = kmalloc_array(group->n_clients, sizeof(*params), GFP_KERNEL); @@ -462,6 +475,7 @@ static void edgetpu_device_group_release(struct edgetpu_device_group *group) #ifdef EDGETPU_HAS_P2P_MAILBOX edgetpu_p2p_mailbox_release(group); #endif + edgetpu_mailbox_external_disable_free_locked(group); edgetpu_mailbox_remove_vii(&group->vii); group_release_members(group); } @@ -546,6 +560,22 @@ static int edgetpu_dev_add_group(struct edgetpu_dev *etdev, goto error_unlock; } #endif /* !EDGETPU_HAS_MULTI_GROUPS */ + if (group->etdev == etdev) { + u32 vcid_pool = etdev->vcid_pool; + +#ifdef EDGETPU_VCID_EXTRA_PARTITION + if (group->mbox_attr.partition_type != EDGETPU_PARTITION_EXTRA) + vcid_pool &= ~BIT(EDGETPU_VCID_EXTRA_PARTITION); + else + vcid_pool &= BIT(EDGETPU_VCID_EXTRA_PARTITION); +#endif + if (!vcid_pool) { + ret = -EBUSY; + goto error_unlock; + } + group->vcid = ffs(vcid_pool) - 1; + etdev->vcid_pool &= ~BIT(group->vcid); + } l->grp = edgetpu_device_group_get(group); list_add_tail(&l->list, &etdev->groups); etdev->n_groups++; @@ -620,6 +650,8 @@ void edgetpu_device_group_leave(struct edgetpu_client *client) mutex_lock(&client->etdev->groups_lock); list_for_each_entry(l, &client->etdev->groups, list) { if (l->grp == group) { + if (group->etdev == client->etdev) + client->etdev->vcid_pool |= BIT(group->vcid); list_del(&l->list); edgetpu_device_group_put(l->grp); kfree(l); @@ -1236,8 +1268,13 @@ alloc_mapping_from_useraddr(struct edgetpu_device_group *group, u64 host_addr, return hmap; error_free_sgt: - while (i > 0) { - i--; + /* + * Starting from kernel version 5.10, the caller must call sg_free_table + * to clean up any leftover allocations if sg_alloc_table_from_pages + * returns non-0 for failures. Calling sg_free_table is also fine with + * older kernel versions since sg_free_table handles this properly. + */ + for (; i >= 0; i--) { if (i == 0) sgt = &hmap->map.sgt; else @@ -1456,8 +1493,8 @@ int edgetpu_device_group_unmap(struct edgetpu_device_group *group, int ret = 0; mutex_lock(&group->lock); - if (!edgetpu_device_group_is_finalized(group)) { - ret = edgetpu_group_errno(group); + if (!is_finalized_or_errored(group)) { + ret = -EINVAL; goto unlock_group; } @@ -1639,17 +1676,74 @@ out: return ret; } -void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev) +/* + * Set @group status as errored, set the error mask, and notify the runtime of + * the fatal error event on the group. + */ +void edgetpu_group_fatal_error_notify(struct edgetpu_device_group *group, + uint error_mask) { - struct edgetpu_list_group *l; + etdev_dbg(group->etdev, "notify group %u error 0x%x", + group->workload_id, error_mask); + mutex_lock(&group->lock); + /* + * Only finalized groups may have handshake with the FW, mark + * them as errored. + */ + if (edgetpu_device_group_is_finalized(group)) + group->status = EDGETPU_DEVICE_GROUP_ERRORED; + group->fatal_errors |= error_mask; + mutex_unlock(&group->lock); + edgetpu_group_notify(group, EDGETPU_EVENT_FATAL_ERROR); +} + +/* + * For each group active on @etdev: set the group status as errored, set the + * error mask, and notify the runtime of the fatal error event. + */ +void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev, uint error_mask) +{ + size_t i, num_groups = 0; struct edgetpu_device_group *group; + struct edgetpu_device_group **groups; + struct edgetpu_list_group *g; mutex_lock(&etdev->groups_lock); + groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL); + if (unlikely(!groups)) { + /* + * Just give up setting status in this case, this only happens + * when the system is OOM. + */ + mutex_unlock(&etdev->groups_lock); + return; + } + /* + * Fetch the groups into an array to set the group status without + * holding @etdev->groups_lock. To prevent the potential deadlock that + * edgetpu_device_group_add() holds group->lock then etdev->groups_lock. + */ + etdev_for_each_group(etdev, g, group) { + if (edgetpu_device_group_is_disbanded(group)) + continue; + groups[num_groups++] = edgetpu_device_group_get(group); + } + mutex_unlock(&etdev->groups_lock); + for (i = 0; i < num_groups; i++) { + edgetpu_group_fatal_error_notify(groups[i], error_mask); + edgetpu_device_group_put(groups[i]); + } + kfree(groups); +} - etdev_for_each_group(etdev, l, group) - edgetpu_group_notify(group, EDGETPU_EVENT_FATAL_ERROR); +uint edgetpu_group_get_fatal_errors(struct edgetpu_device_group *group) +{ + uint fatal_errors; - mutex_unlock(&etdev->groups_lock); + mutex_lock(&group->lock); + fatal_errors = group->fatal_errors; + mutex_unlock(&group->lock); + return fatal_errors; } void edgetpu_group_detach_mailbox_locked(struct edgetpu_device_group *group) @@ -1670,7 +1764,7 @@ void edgetpu_group_close_and_detach_mailbox(struct edgetpu_device_group *group) * Detaching mailbox for an errored group is also fine. */ if (is_finalized_or_errored(group)) { - edgetpu_group_kci_close_device(group); + edgetpu_group_deactivate(group); edgetpu_group_detach_mailbox_locked(group); } mutex_unlock(&group->lock); @@ -1694,11 +1788,54 @@ int edgetpu_group_attach_and_open_mailbox(struct edgetpu_device_group *group) * Only attaching mailbox for finalized groups. * Don't attach mailbox for errored groups. */ - if (edgetpu_device_group_is_finalized(group)) { - ret = edgetpu_group_attach_mailbox_locked(group); - if (!ret) - ret = edgetpu_group_kci_open_device(group); - } + if (!edgetpu_device_group_is_finalized(group)) + goto out_unlock; + ret = edgetpu_group_attach_mailbox_locked(group); + if (ret) + goto out_unlock; + ret = edgetpu_group_activate(group); + if (ret) + edgetpu_group_detach_mailbox_locked(group); + +out_unlock: mutex_unlock(&group->lock); return ret; } + +/* + * Return the group with id @vcid for device @etdev, with a reference held + * on the group (must call edgetpu_device_group_put when done), or NULL if + * no group with that VCID is found. + */ +static struct edgetpu_device_group *get_group_by_vcid( + struct edgetpu_dev *etdev, u16 vcid) +{ + struct edgetpu_device_group *group = NULL; + struct edgetpu_device_group *tgroup; + struct edgetpu_list_group *g; + + mutex_lock(&etdev->groups_lock); + etdev_for_each_group(etdev, g, tgroup) { + if (tgroup->vcid == vcid) { + group = edgetpu_device_group_get(tgroup); + break; + } + } + mutex_unlock(&etdev->groups_lock); + return group; +} + +void edgetpu_handle_job_lockup(struct edgetpu_dev *etdev, u16 vcid) +{ + struct edgetpu_device_group *group; + + etdev_err(etdev, "firmware-detected job lockup on VCID %u", + vcid); + group = get_group_by_vcid(etdev, vcid); + if (!group) { + etdev_warn(etdev, "VCID %u group not found", vcid); + return; + } + edgetpu_group_fatal_error_notify(group, EDGETPU_ERROR_RUNTIME_TIMEOUT); + edgetpu_device_group_put(group); +} diff --git a/drivers/edgetpu/edgetpu-device-group.h b/drivers/edgetpu/edgetpu-device-group.h index 545b4bd..7b20dd0 100644 --- a/drivers/edgetpu/edgetpu-device-group.h +++ b/drivers/edgetpu/edgetpu-device-group.h @@ -85,6 +85,8 @@ struct edgetpu_device_group { * leader of this group. */ bool dev_inaccessible; + /* Virtual context ID to be sent to the firmware. */ + u16 vcid; /* protects everything in the following comment block */ struct mutex lock; @@ -104,6 +106,7 @@ struct edgetpu_device_group { */ struct edgetpu_client **members; enum edgetpu_device_group_status status; + bool activated; /* whether this group's VII has ever been activated */ struct edgetpu_vii vii; /* VII mailbox */ /* * Context ID ranges from EDGETPU_CONTEXT_VII_BASE to @@ -117,6 +120,14 @@ struct edgetpu_device_group { struct edgetpu_iommu_domain *etdomain; /* matrix of P2P mailboxes */ struct edgetpu_p2p_mailbox **p2p_mailbox_matrix; + /* + * External mailboxes associated with this group, only valid if + * external mailbox allocated and enabled. + */ + struct edgetpu_external_mailbox *ext_mailbox; + + /* Mask of errors set for this group. */ + uint fatal_errors; /* end of fields protected by @lock */ @@ -372,8 +383,14 @@ bool edgetpu_in_any_group(struct edgetpu_dev *etdev); */ bool edgetpu_set_group_join_lockout(struct edgetpu_dev *etdev, bool lockout); +/* Notify @group about a fatal error for that group. */ +void edgetpu_group_fatal_error_notify(struct edgetpu_device_group *group, + uint error_mask); /* Notify all device groups of @etdev about a failure on the die */ -void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev); +void edgetpu_fatal_error_notify(struct edgetpu_dev *etdev, uint error_mask); + +/* Return fatal error signaled bitmask for device group */ +uint edgetpu_group_get_fatal_errors(struct edgetpu_device_group *group); /* * Detach and release the mailbox resources of VII from @group. diff --git a/drivers/edgetpu/edgetpu-dmabuf.c b/drivers/edgetpu/edgetpu-dmabuf.c index 3d1c190..1c89178 100644 --- a/drivers/edgetpu/edgetpu-dmabuf.c +++ b/drivers/edgetpu/edgetpu-dmabuf.c @@ -36,10 +36,8 @@ struct dmabuf_map_entry { /* SG table returned by dma_buf_map_attachment() */ struct sg_table *sgt; /* - * The SG table that shrunk from @sgt with region [offset, offset+size], - * where @offset and @size are the arguments in edgetpu_dmabuf_map. - * If @offset for mapping is zero and @size equals the total length of - * @sgt, this table is a duplicate of @sgt. + * The SG table that shrunk and condensed from @sgt with region [0, size), where @size is + * the size field in edgetpu_dmabuf_map which owns this entry. */ struct sg_table shrunk_sgt; }; @@ -50,7 +48,6 @@ struct dmabuf_map_entry { */ struct edgetpu_dmabuf_map { struct edgetpu_mapping map; - u64 offset; u64 size; /* size of this mapping in bytes */ u32 mmu_flags; /* @@ -285,7 +282,6 @@ static void dmabuf_map_callback_release(struct edgetpu_mapping *map) uint i; if (tpu_addr) { - tpu_addr -= dmap->offset; if (IS_MIRRORED(map->flags)) { group_unmap_dmabuf(group, dmap, tpu_addr); } else { @@ -636,7 +632,6 @@ int edgetpu_map_dmabuf(struct edgetpu_device_group *group, int ret = -EINVAL; struct dma_buf *dmabuf; edgetpu_map_flag_t flags = arg->flags; - const u64 offset = arg->offset; u64 size; const enum dma_data_direction dir = edgetpu_host_dma_dir(flags & EDGETPU_MAP_DIR_MASK); @@ -645,30 +640,16 @@ int edgetpu_map_dmabuf(struct edgetpu_device_group *group, tpu_addr_t tpu_addr; uint i; - /* invalid DMA direction or offset is not page-aligned */ - if (!valid_dma_direction(dir) || offset_in_page(offset)) { - etdev_dbg(group->etdev, - "%s: valid=%d offset_in_page=%lu offset=0x%llx\n", - __func__, valid_dma_direction(dir), - offset_in_page(offset), offset); + if (!valid_dma_direction(dir)) { + etdev_dbg(group->etdev, "%s: invalid direction %d\n", __func__, dir); return -EINVAL; } - /* TODO(b/189278468): entirely ignore @offset */ - if (offset != 0) - etdev_warn_ratelimited(group->etdev, - "Non-zero offset for dmabuf mapping is deprecated"); dmabuf = dma_buf_get(arg->dmabuf_fd); if (IS_ERR(dmabuf)) { etdev_dbg(group->etdev, "%s: dma_buf_get returns %ld\n", __func__, PTR_ERR(dmabuf)); return PTR_ERR(dmabuf); } - if (offset >= dmabuf->size) { - etdev_dbg(group->etdev, - "%s: offset=0x%llx > dmabuf size=%zx\n", - __func__, offset, dmabuf->size); - goto err_put; - } mutex_lock(&group->lock); if (!edgetpu_device_group_is_finalized(group)) { @@ -687,7 +668,6 @@ int edgetpu_map_dmabuf(struct edgetpu_device_group *group, get_dma_buf(dmabuf); dmap->dmabufs[0] = dmabuf; - dmap->offset = offset; dmap->size = size = dmabuf->size; if (IS_MIRRORED(flags)) { for (i = 0; i < group->n_clients; i++) { @@ -734,7 +714,7 @@ int edgetpu_map_dmabuf(struct edgetpu_device_group *group, } dmap->map.die_index = arg->die_index; } - dmap->map.device_address = tpu_addr + offset; + dmap->map.device_address = tpu_addr; ret = edgetpu_mapping_add(&group->dmabuf_mappings, &dmap->map); if (ret) { etdev_dbg(group->etdev, "%s: edgetpu_mapping_add returns %d\n", @@ -751,7 +731,6 @@ err_release_map: dmabuf_map_callback_release(&dmap->map); err_unlock_group: mutex_unlock(&group->lock); -err_put: dma_buf_put(dmabuf); return ret; @@ -765,8 +744,9 @@ int edgetpu_unmap_dmabuf(struct edgetpu_device_group *group, u32 die_index, int ret = -EINVAL; mutex_lock(&group->lock); - if (!edgetpu_device_group_is_finalized(group)) { - ret = edgetpu_group_errno(group); + /* allows unmapping on errored groups */ + if (!edgetpu_device_group_is_finalized(group) && !edgetpu_device_group_is_errored(group)) { + ret = -EINVAL; goto out_unlock; } edgetpu_mapping_lock(mappings); diff --git a/drivers/edgetpu/edgetpu-firmware.c b/drivers/edgetpu/edgetpu-firmware.c index 00da3c4..2a1e577 100644 --- a/drivers/edgetpu/edgetpu-firmware.c +++ b/drivers/edgetpu/edgetpu-firmware.c @@ -14,6 +14,7 @@ #include <linux/string.h> #include <linux/types.h> +#include "edgetpu.h" #include "edgetpu-device-group.h" #include "edgetpu-firmware.h" #include "edgetpu-firmware-util.h" @@ -698,54 +699,6 @@ static const struct attribute_group edgetpu_firmware_attr_group = { .attrs = dev_attrs, }; -/* - * Sets all groups related to @etdev as errored. - */ -static void edgetpu_set_groups_error(struct edgetpu_dev *etdev) -{ - size_t i, num_groups = 0; - struct edgetpu_device_group *group; - struct edgetpu_device_group **groups; - struct edgetpu_list_group *g; - - mutex_lock(&etdev->groups_lock); - groups = kmalloc_array(etdev->n_groups, sizeof(*groups), GFP_KERNEL); - if (unlikely(!groups)) { - /* - * Just give up setting status in this case, this only happens - * when the system is OOM. - */ - mutex_unlock(&etdev->groups_lock); - edgetpu_fatal_error_notify(etdev); - return; - } - /* - * Fetch the groups into an array to set the group status without - * holding @etdev->groups_lock. To prevent the potential deadlock that - * edgetpu_device_group_add() holds group->lock then etdev->groups_lock. - */ - etdev_for_each_group(etdev, g, group) { - if (edgetpu_device_group_is_disbanded(group)) - continue; - groups[num_groups++] = edgetpu_device_group_get(group); - } - mutex_unlock(&etdev->groups_lock); - for (i = 0; i < num_groups; i++) { - group = groups[i]; - mutex_lock(&group->lock); - /* - * Only finalized groups may have handshake with the FW, mark - * them as errored. - */ - if (edgetpu_device_group_is_finalized(group)) - group->status = EDGETPU_DEVICE_GROUP_ERRORED; - mutex_unlock(&group->lock); - edgetpu_device_group_put(group); - } - edgetpu_fatal_error_notify(etdev); - kfree(groups); -} - static void edgetpu_firmware_wdt_timeout_action(void *data) { int ret; @@ -762,7 +715,7 @@ static void edgetpu_firmware_wdt_timeout_action(void *data) * groups the CLOSE_DEVICE KCIs won't be sent. */ edgetpu_handshake_clear_fw_state(&etdev->mailbox_manager->open_devices); - edgetpu_set_groups_error(etdev); + edgetpu_fatal_error_notify(etdev, EDGETPU_ERROR_WATCHDOG_TIMEOUT); /* Another procedure is loading the firmware, let it do the work. */ if (edgetpu_firmware_is_loading(etdev)) diff --git a/drivers/edgetpu/edgetpu-fs.c b/drivers/edgetpu/edgetpu-fs.c index 256f946..6fbd642 100644 --- a/drivers/edgetpu/edgetpu-fs.c +++ b/drivers/edgetpu/edgetpu-fs.c @@ -107,17 +107,20 @@ static int edgetpu_fs_release(struct inode *inode, struct file *file) wakelock_count = edgetpu_wakelock_lock(client->wakelock); mutex_lock(&client->group_lock); /* - * @wakelock_count = 0 means the device might be powered off. Mailbox is removed when the - * group is released, we need to ensure the device is powered to prevent kernel panic on - * programming VII mailbox CSRs. - * If the device is known to be not powered then simply set dev_inaccessible to true to - * prevent device interactions during group releasing. + * @wakelock_count = 0 means the device might be powered off. And for group with a + * non-detachable mailbox, its mailbox is removed when the group is released, in such case + * we need to ensure the device is powered to prevent kernel panic on programming VII + * mailbox CSRs. + * + * For mailbox-detachable groups the mailbox had been removed when the wakelock was + * released, edgetpu_device_group_release() doesn't need the device be powered in this case. */ - if (!wakelock_count && client->group) { + if (!wakelock_count && client->group && !client->group->mailbox_detachable) { /* assumes @group->etdev == @client->etdev, i.e. @client is the leader of @group */ - if (edgetpu_pm_get_if_powered(etdev->pm)) + if (!edgetpu_pm_get(etdev->pm)) wakelock_count = 1; else + /* failed to power on - prevent group releasing from accessing the device */ client->group->dev_inaccessible = true; } mutex_unlock(&client->group_lock); @@ -576,6 +579,7 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client) { int count; int ret; + struct edgetpu_thermal *thermal = client->etdev->thermal; edgetpu_wakelock_lock(client->wakelock); /* when NO_WAKELOCK: count should be 1 so here is a no-op */ @@ -585,7 +589,18 @@ static int edgetpu_ioctl_acquire_wakelock(struct edgetpu_client *client) return count; } if (!count) { - ret = edgetpu_pm_get(client->etdev->pm); + edgetpu_thermal_lock(thermal); + if (edgetpu_thermal_is_suspended(thermal)) { + /* TPU is thermal suspended, so fail acquiring wakelock */ + ret = -EAGAIN; + etdev_warn_ratelimited(client->etdev, + "wakelock acquire rejected due to thermal suspend"); + edgetpu_thermal_unlock(thermal); + goto error_release; + } else { + ret = edgetpu_pm_get(client->etdev->pm); + edgetpu_thermal_unlock(thermal); + } if (ret) { etdev_warn(client->etdev, "%s: pm_get failed (%d)", __func__, ret); @@ -628,9 +643,9 @@ edgetpu_ioctl_dram_usage(struct edgetpu_dev *etdev, static int edgetpu_ioctl_acquire_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox __user *argp) + struct edgetpu_ext_mailbox_ioctl __user *argp) { - struct edgetpu_ext_mailbox ext_mailbox; + struct edgetpu_ext_mailbox_ioctl ext_mailbox; if (copy_from_user(&ext_mailbox, argp, sizeof(ext_mailbox))) return -EFAULT; @@ -640,9 +655,9 @@ edgetpu_ioctl_acquire_ext_mailbox(struct edgetpu_client *client, static int edgetpu_ioctl_release_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox __user *argp) + struct edgetpu_ext_mailbox_ioctl __user *argp) { - struct edgetpu_ext_mailbox ext_mailbox; + struct edgetpu_ext_mailbox_ioctl ext_mailbox; if (copy_from_user(&ext_mailbox, argp, sizeof(ext_mailbox))) return -EFAULT; @@ -650,6 +665,21 @@ edgetpu_ioctl_release_ext_mailbox(struct edgetpu_client *client, return edgetpu_chip_release_ext_mailbox(client, &ext_mailbox); } +static int edgetpu_ioctl_get_fatal_errors(struct edgetpu_client *client, + __u32 __user *argp) +{ + u32 fatal_errors = 0; + int ret = 0; + + mutex_lock(&client->group_lock); + if (client->group) + fatal_errors = edgetpu_group_get_fatal_errors(client->group); + mutex_unlock(&client->group_lock); + if (copy_to_user(argp, &fatal_errors, sizeof(fatal_errors))) + ret = -EFAULT; + return ret; +} + long edgetpu_ioctl(struct file *file, uint cmd, ulong arg) { struct edgetpu_client *client = file->private_data; @@ -738,6 +768,9 @@ long edgetpu_ioctl(struct file *file, uint cmd, ulong arg) case EDGETPU_RELEASE_EXT_MAILBOX: ret = edgetpu_ioctl_release_ext_mailbox(client, argp); break; + case EDGETPU_GET_FATAL_ERRORS: + ret = edgetpu_ioctl_get_fatal_errors(client, argp); + break; default: return -ENOTTY; /* unknown command */ diff --git a/drivers/edgetpu/edgetpu-google-iommu.c b/drivers/edgetpu/edgetpu-google-iommu.c index 9d28949..851a326 100644 --- a/drivers/edgetpu/edgetpu-google-iommu.c +++ b/drivers/edgetpu/edgetpu-google-iommu.c @@ -101,21 +101,21 @@ static int edgetpu_iommu_dev_fault_handler(struct iommu_fault *fault, struct edgetpu_dev *etdev = (struct edgetpu_dev *)token; if (fault->type == IOMMU_FAULT_DMA_UNRECOV) { - etdev_err(etdev, "Unrecoverable IOMMU fault!\n"); - etdev_err(etdev, "Reason = %08X\n", fault->event.reason); - etdev_err(etdev, "flags = %08X\n", fault->event.flags); - etdev_err(etdev, "pasid = %08X\n", fault->event.pasid); - etdev_err(etdev, "perms = %08X\n", fault->event.perm); - etdev_err(etdev, "addr = %llX\n", fault->event.addr); - etdev_err(etdev, "fetch_addr = %llX\n", + etdev_warn(etdev, "Unrecoverable IOMMU fault!\n"); + etdev_warn(etdev, "Reason = %08X\n", fault->event.reason); + etdev_warn(etdev, "flags = %08X\n", fault->event.flags); + etdev_warn(etdev, "pasid = %08X\n", fault->event.pasid); + etdev_warn(etdev, "perms = %08X\n", fault->event.perm); + etdev_warn(etdev, "addr = %llX\n", fault->event.addr); + etdev_warn(etdev, "fetch_addr = %llX\n", fault->event.fetch_addr); } else if (fault->type == IOMMU_FAULT_PAGE_REQ) { - etdev_err(etdev, "IOMMU page request fault!\n"); - etdev_err(etdev, "flags = %08X\n", fault->prm.flags); - etdev_err(etdev, "pasid = %08X\n", fault->prm.pasid); - etdev_err(etdev, "grpid = %08X\n", fault->prm.grpid); - etdev_err(etdev, "perms = %08X\n", fault->prm.perm); - etdev_err(etdev, "addr = %llX\n", fault->prm.addr); + etdev_dbg(etdev, "IOMMU page request fault!\n"); + etdev_dbg(etdev, "flags = %08X\n", fault->prm.flags); + etdev_dbg(etdev, "pasid = %08X\n", fault->prm.pasid); + etdev_dbg(etdev, "grpid = %08X\n", fault->prm.grpid); + etdev_dbg(etdev, "perms = %08X\n", fault->prm.perm); + etdev_dbg(etdev, "addr = %llX\n", fault->prm.addr); } // Tell the IOMMU driver to carry on return -EAGAIN; @@ -168,7 +168,7 @@ static int edgetpu_iommu_fault_handler(struct iommu_domain *domain, struct edgetpu_iommu_domain *etdomain = (struct edgetpu_iommu_domain *)token; - dev_err(dev, "IOMMU fault on address %08lX. PASID = %u flags = %08X", + dev_dbg(dev, "IOMMU fault on address %08lX. PASID = %u flags = %08X", iova, etdomain->pasid, flags); // Tell the IOMMU driver we are OK with this fault return 0; diff --git a/drivers/edgetpu/edgetpu-internal.h b/drivers/edgetpu/edgetpu-internal.h index f2f2ff1..7c4966e 100644 --- a/drivers/edgetpu/edgetpu-internal.h +++ b/drivers/edgetpu/edgetpu-internal.h @@ -167,10 +167,16 @@ struct edgetpu_dev { struct dentry *d_entry; /* debugfs dir for this device */ struct mutex state_lock; /* protects state of this device */ enum edgetpu_dev_state state; - struct mutex groups_lock; /* protects groups, n_groups, and lockout */ + struct mutex groups_lock; + /* fields protected by @groups_lock */ + struct list_head groups; uint n_groups; /* number of entries in @groups */ bool group_join_lockout; /* disable group join while reinit */ + u32 vcid_pool; /* bitmask of VCID to be allocated */ + + /* end of fields protected by @groups_lock */ + void *mmu_cookie; /* mmu driver private data */ void *dram_cookie; /* on-device DRAM private data */ struct edgetpu_mailbox_manager *mailbox_manager; @@ -333,6 +339,9 @@ static inline bool edgetpu_is_external_wrapper_class_file(struct file *file) void edgetpu_handle_firmware_crash(struct edgetpu_dev *etdev, enum edgetpu_fw_crash_type crash_type); +/* Handle notification of job lockup from firmware */ +void edgetpu_handle_job_lockup(struct edgetpu_dev *etdev, u16 vcid); + /* Bus (Platform/PCI) <-> Core API */ int __init edgetpu_init(void); @@ -424,10 +433,10 @@ int edgetpu_get_state_errno_locked(struct edgetpu_dev *etdev); /* Chip-specific code to acquire external mailboxes */ int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox *ext_mbox); + struct edgetpu_ext_mailbox_ioctl *args); /* Chip-specific code to release external mailboxes */ int edgetpu_chip_release_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox *ext_mbox); + struct edgetpu_ext_mailbox_ioctl *args); #endif /* __EDGETPU_INTERNAL_H__ */ diff --git a/drivers/edgetpu/edgetpu-kci.c b/drivers/edgetpu/edgetpu-kci.c index c62ac73..73a47cc 100644 --- a/drivers/edgetpu/edgetpu-kci.c +++ b/drivers/edgetpu/edgetpu-kci.c @@ -6,9 +6,9 @@ * Copyright (C) 2019 Google, Inc. */ +#include <linux/bits.h> #include <linux/circ_buf.h> #include <linux/device.h> -#include <linux/dma-mapping.h> /* dmam_alloc_coherent */ #include <linux/errno.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -93,6 +93,9 @@ edgetpu_reverse_kci_consume_response(struct edgetpu_dev *etdev, edgetpu_handle_firmware_crash( etdev, (enum edgetpu_fw_crash_type)resp->retval); break; + case RKCI_JOB_LOCKUP: + edgetpu_handle_job_lockup(etdev, resp->retval); + break; default: etdev_warn(etdev, "%s: Unrecognized KCI request: 0x%x\n", __func__, resp->code); @@ -690,6 +693,31 @@ static int edgetpu_kci_send_cmd_return_resp( return resp->code; } +static int edgetpu_kci_send_cmd_with_data(struct edgetpu_kci *kci, + struct edgetpu_command_element *cmd, const void *data, + size_t size) +{ + struct edgetpu_dev *etdev = kci->mailbox->etdev; + struct edgetpu_coherent_mem mem; + int ret; + + ret = edgetpu_iremap_alloc(etdev, size, &mem, EDGETPU_CONTEXT_KCI); + if (ret) + return ret; + memcpy(mem.vaddr, data, size); + + etdev_dbg(etdev, "%s: map kva=%pK iova=0x%llx dma=%pad", __func__, mem.vaddr, mem.tpu_addr, + &mem.dma_addr); + + cmd->dma.address = mem.tpu_addr; + cmd->dma.size = size; + ret = edgetpu_kci_send_cmd(kci, cmd); + edgetpu_iremap_free(etdev, &mem, EDGETPU_CONTEXT_KCI); + etdev_dbg(etdev, "%s: unmap kva=%pK iova=0x%llx dma=%pad", __func__, mem.vaddr, + mem.tpu_addr, &mem.dma_addr); + return ret; +} + int edgetpu_kci_send_cmd(struct edgetpu_kci *kci, struct edgetpu_command_element *cmd) { @@ -741,51 +769,19 @@ int edgetpu_kci_map_trace_buffer(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, return edgetpu_kci_send_cmd(kci, &cmd); } -int edgetpu_kci_join_group(struct edgetpu_kci *kci, struct edgetpu_dev *etdev, - u8 n_dies, u8 vid) +int edgetpu_kci_join_group(struct edgetpu_kci *kci, u8 n_dies, u8 vid) { - struct edgetpu_kci_device_group_detail *detail; - const u32 size = sizeof(*detail); - dma_addr_t dma_addr; - tpu_addr_t tpu_addr; struct edgetpu_command_element cmd = { .code = KCI_CODE_JOIN_GROUP, - .dma = { - .size = size, - }, }; - const u32 flags = EDGETPU_MMU_DIE | EDGETPU_MMU_32 | EDGETPU_MMU_HOST; - int ret; + const struct edgetpu_kci_device_group_detail detail = { + .n_dies = n_dies, + .vid = vid, + }; if (!kci) return -ENODEV; - detail = dma_alloc_coherent(etdev->dev, sizeof(*detail), &dma_addr, - GFP_KERNEL); - if (!detail) - return -ENOMEM; - detail->n_dies = n_dies; - detail->vid = vid; - - tpu_addr = edgetpu_mmu_tpu_map(etdev, dma_addr, size, DMA_TO_DEVICE, - EDGETPU_CONTEXT_KCI, flags); - if (!tpu_addr) { - etdev_err(etdev, "%s: failed to map group detail to TPU", - __func__); - dma_free_coherent(etdev->dev, size, detail, dma_addr); - return -EINVAL; - } - - cmd.dma.address = tpu_addr; - etdev_dbg(etdev, "%s: map kva=%pK iova=0x%llx dma=%pad", __func__, - detail, tpu_addr, &dma_addr); - - ret = edgetpu_kci_send_cmd(kci, &cmd); - edgetpu_mmu_tpu_unmap(etdev, tpu_addr, size, EDGETPU_CONTEXT_KCI); - dma_free_coherent(etdev->dev, size, detail, dma_addr); - etdev_dbg(etdev, "%s: unmap kva=%pK iova=0x%llx dma=%pad", __func__, - detail, tpu_addr, &dma_addr); - - return ret; + return edgetpu_kci_send_cmd_with_data(kci, &cmd, &detail, sizeof(detail)); } int edgetpu_kci_leave_group(struct edgetpu_kci *kci) @@ -989,26 +985,33 @@ int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, return edgetpu_kci_send_cmd(kci, &cmd); } -int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_ids) +int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_id, s16 vcid, bool first_open) { + const struct edgetpu_kci_open_device_detail detail = { + .mailbox_id = mailbox_id, + .vcid = vcid, + .flags = first_open, + }; struct edgetpu_command_element cmd = { .code = KCI_CODE_OPEN_DEVICE, .dma = { - .flags = mailbox_ids, + .flags = BIT(mailbox_id), }, }; if (!kci) return -ENODEV; - return edgetpu_kci_send_cmd(kci, &cmd); + if (vcid < 0) + return edgetpu_kci_send_cmd(kci, &cmd); + return edgetpu_kci_send_cmd_with_data(kci, &cmd, &detail, sizeof(detail)); } -int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_ids) +int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_id) { struct edgetpu_command_element cmd = { .code = KCI_CODE_CLOSE_DEVICE, .dma = { - .flags = mailbox_ids, + .flags = BIT(mailbox_id), }, }; diff --git a/drivers/edgetpu/edgetpu-kci.h b/drivers/edgetpu/edgetpu-kci.h index 05f87c8..deb258d 100644 --- a/drivers/edgetpu/edgetpu-kci.h +++ b/drivers/edgetpu/edgetpu-kci.h @@ -122,6 +122,7 @@ enum edgetpu_reverse_kci_code { RKCI_CHIP_CODE_LAST = 0x7FFF, RKCI_GENERIC_CODE_FIRST = 0x8000, RKCI_FIRMWARE_CRASH = RKCI_GENERIC_CODE_FIRST + 0, + RKCI_JOB_LOCKUP = RKCI_GENERIC_CODE_FIRST + 1, RKCI_GENERIC_CODE_LAST = 0xFFFF, }; @@ -204,6 +205,29 @@ struct edgetpu_kci_device_group_detail { u8 reserved[6]; /* padding */ }; +struct edgetpu_kci_open_device_detail { + /* The ID of mailbox to be opened. */ + u16 mailbox_id; + /* + * Virtual context ID @mailbox_id is associated to. + * For device groups with @mailbox_detachable attribute the mailbox attached to the group + * can be different after wakelock re-acquired. Firmware uses this VCID to identify the + * device group. + */ + u16 vcid; + /* + * Extra flags for the attributes of this request. + * Set RESERVED bits to 0 to ensure backwards compatibility. + * + * Bitfields: + * [0:0] - first_open: Specifies if this is the first time we are calling mailbox open + * KCI for this VCID after it has been allocated to a device group. This allows + * firmware to clean up/reset the memory allocator for that partition. + * [31:1] - RESERVED + */ + u32 flags; +}; + /* * Initializes a KCI object. * @@ -328,8 +352,7 @@ int edgetpu_kci_map_trace_buffer(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, * * Returns the code of response, or a negative errno on error. */ -int edgetpu_kci_join_group(struct edgetpu_kci *kci, struct edgetpu_dev *etdev, - u8 n_dies, u8 vid); +int edgetpu_kci_join_group(struct edgetpu_kci *kci, u8 n_dies, u8 vid); /* Informs the TPU to leave the group it currently belongs to. */ int edgetpu_kci_leave_group(struct edgetpu_kci *kci); @@ -344,20 +367,20 @@ int edgetpu_kci_get_debug_dump(struct edgetpu_kci *kci, tpu_addr_t tpu_addr, size_t size); /* - * Inform the firmware to prepare to serve the VII of @mailbox_ids. + * Inform the firmware to prepare to serve the VII of @mailbox_id. * * You usually shouldn't call this directly - consider using * edgetpu_mailbox_activate() instead. */ -int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_ids); +int edgetpu_kci_open_device(struct edgetpu_kci *kci, u32 mailbox_id, s16 vcid, bool first_open); /* - * Inform the firmware the VII with @mailbox_ids are closed. + * Inform the firmware the VII with @mailbox_id is closed. * * You usually shouldn't call this directly - consider using * edgetpu_mailbox_deactivate() instead. */ -int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_ids); +int edgetpu_kci_close_device(struct edgetpu_kci *kci, u32 mailbox_id); /* Cancel work queues or wait until they're done */ void edgetpu_kci_cancel_work_queues(struct edgetpu_kci *kci); diff --git a/drivers/edgetpu/edgetpu-mailbox.c b/drivers/edgetpu/edgetpu-mailbox.c index 1a19185..cf996f7 100644 --- a/drivers/edgetpu/edgetpu-mailbox.c +++ b/drivers/edgetpu/edgetpu-mailbox.c @@ -80,6 +80,24 @@ edgetpu_mailbox_create_locked(struct edgetpu_mailbox_manager *mgr, uint index) return mailbox; } +/* Caller must hold @mgr->mailboxes_lock. */ +static int edgetpu_mailbox_remove_locked(struct edgetpu_mailbox_manager *mgr, + struct edgetpu_mailbox *mailbox) +{ + /* simple security checks */ + if (mailbox->mailbox_id >= mgr->num_mailbox || + mgr->mailboxes[mailbox->mailbox_id] != mailbox) { + return -EINVAL; + } + + mgr->mailboxes[mailbox->mailbox_id] = NULL; + /* KCI mailbox is a special case */ + if (mailbox->mailbox_id == KERNEL_MAILBOX_INDEX) + edgetpu_kci_release(mgr->etdev, mailbox->internal.kci); + kfree(mailbox); + return 0; +} + /* * Disables the @index-th mailbox via setting CSR. Doesn't need * @mgr->mailboxes[index] be allocated. @@ -342,23 +360,13 @@ out: int edgetpu_mailbox_remove(struct edgetpu_mailbox_manager *mgr, struct edgetpu_mailbox *mailbox) { unsigned long flags; + int ret; write_lock_irqsave(&mgr->mailboxes_lock, flags); - /* simple security checks */ - if (mailbox->mailbox_id >= mgr->num_mailbox || - mgr->mailboxes[mailbox->mailbox_id] != mailbox) { - write_unlock_irqrestore(&mgr->mailboxes_lock, flags); - return -EINVAL; - } - - mgr->mailboxes[mailbox->mailbox_id] = NULL; - /* KCI mailbox is a special case */ - if (mailbox->mailbox_id == KERNEL_MAILBOX_INDEX) - edgetpu_kci_release(mgr->etdev, mailbox->internal.kci); + ret = edgetpu_mailbox_remove_locked(mgr, mailbox); write_unlock_irqrestore(&mgr->mailboxes_lock, flags); - kfree(mailbox); - return 0; + return ret; } /* @@ -548,6 +556,7 @@ edgetpu_mailbox_create_mgr(struct edgetpu_dev *etdev, total += 1; /* KCI mailbox */ total += desc->num_vii_mailbox; total += desc->num_p2p_mailbox; + total += desc->num_ext_mailbox; if (total > desc->num_mailbox) return ERR_PTR(-EINVAL); mgr = devm_kzalloc(etdev->dev, sizeof(*mgr), GFP_KERNEL); @@ -561,6 +570,8 @@ edgetpu_mailbox_create_mgr(struct edgetpu_dev *etdev, mgr->vii_index_to = mgr->vii_index_from + desc->num_vii_mailbox; mgr->p2p_index_from = mgr->vii_index_to; mgr->p2p_index_to = mgr->p2p_index_from + desc->num_p2p_mailbox; + mgr->ext_index_from = mgr->p2p_index_to; + mgr->ext_index_to = mgr->ext_index_from + desc->num_ext_mailbox; mgr->get_context_csr_base = desc->get_context_csr_base; mgr->get_cmd_queue_csr_base = desc->get_cmd_queue_csr_base; @@ -769,87 +780,373 @@ void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev) kfree(groups); } -int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids) +static int edgetpu_mailbox_external_alloc_queue_batch(struct edgetpu_external_mailbox *ext_mailbox) +{ + int ret, i; + struct edgetpu_mailbox *mailbox; + struct edgetpu_mailbox_attr attr; + struct edgetpu_mailbox_descriptor *desc; + + attr = ext_mailbox->attr; + + for (i = 0; i < ext_mailbox->count; i++) { + desc = &ext_mailbox->descriptors[i]; + mailbox = desc->mailbox; + ret = edgetpu_mailbox_alloc_queue(ext_mailbox->etdev, mailbox, attr.cmd_queue_size, + attr.sizeof_cmd, MAILBOX_CMD_QUEUE, + &desc->cmd_queue_mem); + if (ret) + goto undo; + + ret = edgetpu_mailbox_alloc_queue(ext_mailbox->etdev, mailbox, attr.resp_queue_size, + attr.sizeof_resp, MAILBOX_RESP_QUEUE, + &desc->resp_queue_mem); + if (ret) { + edgetpu_mailbox_free_queue(ext_mailbox->etdev, mailbox, + &desc->cmd_queue_mem); + goto undo; + } + } + return 0; +undo: + while (i--) { + desc = &ext_mailbox->descriptors[i]; + mailbox = desc->mailbox; + edgetpu_mailbox_free_queue(ext_mailbox->etdev, mailbox, &desc->cmd_queue_mem); + edgetpu_mailbox_free_queue(ext_mailbox->etdev, mailbox, &desc->resp_queue_mem); + } + return ret; +} + +static void edgetpu_mailbox_external_free_queue_batch(struct edgetpu_external_mailbox *ext_mailbox) +{ + u32 i; + struct edgetpu_mailbox *mailbox; + struct edgetpu_mailbox_descriptor *desc; + + for (i = 0; i < ext_mailbox->count; i++) { + desc = &ext_mailbox->descriptors[i]; + mailbox = desc->mailbox; + edgetpu_mailbox_free_queue(ext_mailbox->etdev, mailbox, &desc->cmd_queue_mem); + edgetpu_mailbox_free_queue(ext_mailbox->etdev, mailbox, &desc->resp_queue_mem); + } +} + +/* + * Checks if the indexes given for external mailboxes are in range of mailbox + * manager(@mgr) managing the external mailboxes. + */ +static bool edgetpu_mailbox_external_check_range(struct edgetpu_mailbox_manager *mgr, + const int start, const int end) +{ + return (start <= end) && (mgr->ext_index_from <= start && mgr->ext_index_to > end); +} + +static int edgetpu_mailbox_external_alloc(struct edgetpu_device_group *group, + struct edgetpu_external_mailbox_req *ext_mailbox_req) +{ + u32 i, j = 0; + struct edgetpu_mailbox_manager *mgr = group->etdev->mailbox_manager; + struct edgetpu_mailbox *mailbox; + int ret = 0, c = 0, count; + struct edgetpu_external_mailbox *ext_mailbox; + struct edgetpu_mailbox_attr attr; + unsigned long flags; + + if (!ext_mailbox_req) + return -EINVAL; + + count = ext_mailbox_req->count; + attr = ext_mailbox_req->attr; + + if (!edgetpu_mailbox_external_check_range(mgr, ext_mailbox_req->start, + ext_mailbox_req->end)) + return -ERANGE; + + ext_mailbox = kzalloc(sizeof(*ext_mailbox), GFP_KERNEL); + if (!ext_mailbox) + return -ENOMEM; + + ext_mailbox->descriptors = + kcalloc(count, sizeof(struct edgetpu_mailbox_descriptor), GFP_KERNEL); + if (!ext_mailbox->descriptors) { + kfree(ext_mailbox); + return -ENOMEM; + } + + ext_mailbox->attr = attr; + ext_mailbox->count = count; + ext_mailbox->etdev = group->etdev; + + write_lock_irqsave(&mgr->mailboxes_lock, flags); + for (i = ext_mailbox_req->start; i <= ext_mailbox_req->end; i++) { + if (!mgr->mailboxes[i]) + c++; + } + if (c < count) { + ret = -EBUSY; + goto unlock; + } + + for (i = ext_mailbox_req->start; i <= ext_mailbox_req->end && j < count; i++) { + if (!mgr->mailboxes[i]) { + mailbox = edgetpu_mailbox_create_locked(mgr, i); + if (!IS_ERR(mailbox)) { + mgr->mailboxes[i] = mailbox; + ext_mailbox->descriptors[j++].mailbox = mailbox; + } else { + goto release; + } + } + } + + ret = edgetpu_mailbox_external_alloc_queue_batch(ext_mailbox); + if (ret) + goto release; + write_unlock_irqrestore(&mgr->mailboxes_lock, flags); + + for (i = 0; i < count; i++) { + mailbox = ext_mailbox->descriptors[i].mailbox; + edgetpu_mailbox_set_priority(mailbox, attr.priority); + EDGETPU_MAILBOX_CONTEXT_WRITE(mailbox, cmd_queue_tail_doorbell_enable, + attr.cmdq_tail_doorbell); + mailbox->internal.group = edgetpu_device_group_get(group); + edgetpu_mailbox_enable(mailbox); + } + group->ext_mailbox = ext_mailbox; + return 0; +release: + while (j--) + edgetpu_mailbox_remove_locked(mgr, ext_mailbox->descriptors[j].mailbox); +unlock: + write_unlock_irqrestore(&mgr->mailboxes_lock, flags); + kfree(ext_mailbox->descriptors); + kfree(ext_mailbox); + return ret; +} + +/* Caller must hold @group->lock. */ +static void edgetpu_mailbox_external_free(struct edgetpu_device_group *group) +{ + struct edgetpu_mailbox_manager *mgr; + struct edgetpu_mailbox *mailbox; + struct edgetpu_external_mailbox *ext_mailbox; + u32 i; + + ext_mailbox = group->ext_mailbox; + if (!ext_mailbox) + return; + + mgr = ext_mailbox->etdev->mailbox_manager; + + for (i = 0; i < ext_mailbox->count; i++) + edgetpu_mailbox_disable(ext_mailbox->descriptors[i].mailbox); + + edgetpu_mailbox_external_free_queue_batch(ext_mailbox); + + for (i = 0; i < ext_mailbox->count; i++) { + mailbox = ext_mailbox->descriptors[i].mailbox; + edgetpu_device_group_put(mailbox->internal.group); + edgetpu_mailbox_remove(mgr, mailbox); + } + + kfree(ext_mailbox->descriptors); + kfree(ext_mailbox); + group->ext_mailbox = NULL; +} + +static int edgetpu_mailbox_external_alloc_enable(struct edgetpu_client *client, + struct edgetpu_external_mailbox_req *req) +{ + int ret = 0, i, id; + struct edgetpu_external_mailbox *ext_mailbox = NULL; + struct edgetpu_device_group *group; + int vcid; + + mutex_lock(&client->group_lock); + if (!client->group || !edgetpu_device_group_is_leader(client->group, client)) { + mutex_unlock(&client->group_lock); + return -EINVAL; + } + group = edgetpu_device_group_get(client->group); + mutex_unlock(&client->group_lock); + + mutex_lock(&group->lock); + if (!edgetpu_device_group_is_finalized(group)) { + ret = -EINVAL; + goto unlock; + } + + if (group->ext_mailbox) { + ret = -EEXIST; + goto unlock; + } + + ret = edgetpu_mailbox_external_alloc(group, req); + if (ret) + goto unlock; + + ext_mailbox = group->ext_mailbox; + vcid = group->vcid; + + for (i = 0; i < ext_mailbox->count; i++) { + id = ext_mailbox->descriptors[i].mailbox->mailbox_id; + etdev_dbg(group->etdev, "Enabling mailbox: %d\n", id); + ret = edgetpu_mailbox_activate(group->etdev, id, vcid, false); + if (ret) { + etdev_err(group->etdev, "Activate mailbox %d failed: %d", id, ret); + break; + } + } + + if (ret) { + while (i--) { + id = ext_mailbox->descriptors[i].mailbox->mailbox_id; + if (edgetpu_mailbox_deactivate(group->etdev, id)) + etdev_err(group->etdev, "Deactivate mailbox %d failed", id); + } + /* + * Deactivate only fails if f/w is unresponsive which will put group + * in errored state or mailbox physically disabled before requesting + * deactivate which will never be the case. + */ + edgetpu_mailbox_external_free(group); + } +unlock: + mutex_unlock(&group->lock); + edgetpu_device_group_put(group); + return ret; +} + +static int edgetpu_mailbox_external_disable_free(struct edgetpu_client *client) +{ + struct edgetpu_device_group *group; + + mutex_lock(&client->group_lock); + if (!client->group || !edgetpu_device_group_is_leader(client->group, client)) { + mutex_unlock(&client->group_lock); + return -EINVAL; + } + group = edgetpu_device_group_get(client->group); + mutex_unlock(&client->group_lock); + + mutex_lock(&group->lock); + edgetpu_mailbox_external_disable_free_locked(group); + mutex_unlock(&group->lock); + edgetpu_device_group_put(group); + return 0; +} + +void edgetpu_mailbox_external_disable_free_locked(struct edgetpu_device_group *group) +{ + u32 i, id; + struct edgetpu_external_mailbox *ext_mailbox; + + ext_mailbox = group->ext_mailbox; + if (!ext_mailbox) + return; + + for (i = 0; i < ext_mailbox->count; i++) { + id = ext_mailbox->descriptors[i].mailbox->mailbox_id; + etdev_dbg(group->etdev, "Disabling mailbox: %d\n", id); + if (edgetpu_mailbox_deactivate(group->etdev, id)) + etdev_err(group->etdev, "Deactivate mailbox %d failed", id); + } + /* + * Deactivate only fails if f/w is unresponsive which will put group + * in errored state or mailbox physically disabled before requesting + * deactivate which will never be the case. + */ + edgetpu_mailbox_external_free(group); +} + +int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, int mailbox_id, + struct edgetpu_external_mailbox_req *ext_mailbox_req) { int ret; if (!edgetpu_wakelock_lock(client->wakelock)) { - etdev_err(client->etdev, - "Enabling mailboxes %08x needs wakelock acquired\n", - mailbox_ids); + etdev_err(client->etdev, "Enabling mailbox %d needs wakelock acquired\n", + mailbox_id); edgetpu_wakelock_unlock(client->wakelock); return -EAGAIN; } - edgetpu_wakelock_inc_event_locked(client->wakelock, - EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX); - - etdev_dbg(client->etdev, "Enabling mailboxes: %08X\n", mailbox_ids); + if (mailbox_id == EDGETPU_MAILBOX_ID_USE_ASSOC) { + ret = edgetpu_mailbox_external_alloc_enable(client, ext_mailbox_req); + goto out; + } + etdev_dbg(client->etdev, "Enabling mailbox: %d\n", mailbox_id); - ret = edgetpu_mailbox_activate(client->etdev, mailbox_ids); + ret = edgetpu_mailbox_activate(client->etdev, mailbox_id, -1, false); if (ret) - etdev_err(client->etdev, "Activate mailboxes %08x failed: %d", - mailbox_ids, ret); + etdev_err(client->etdev, "Activate mailbox %d failed: %d", mailbox_id, ret); +out: + if (!ret) + edgetpu_wakelock_inc_event_locked(client->wakelock, + EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX); edgetpu_wakelock_unlock(client->wakelock); return ret; } -int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids) +int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, int mailbox_id) { int ret; if (!edgetpu_wakelock_lock(client->wakelock)) { - etdev_err(client->etdev, - "Disabling mailboxes %08x needs wakelock acquired\n", - mailbox_ids); + etdev_err(client->etdev, "Disabling mailbox %d needs wakelock acquired\n", + mailbox_id); edgetpu_wakelock_unlock(client->wakelock); return -EAGAIN; } - edgetpu_wakelock_dec_event_locked(client->wakelock, - EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX); - - etdev_dbg(client->etdev, "Disabling mailbox: %08X\n", mailbox_ids); - ret = edgetpu_mailbox_deactivate(client->etdev, mailbox_ids); + if (mailbox_id == EDGETPU_MAILBOX_ID_USE_ASSOC) { + ret = edgetpu_mailbox_external_disable_free(client); + goto out; + } + etdev_dbg(client->etdev, "Disabling mailbox: %d\n", mailbox_id); + ret = edgetpu_mailbox_deactivate(client->etdev, mailbox_id); if (ret) - etdev_err(client->etdev, "Deactivate mailboxes %08x failed: %d", - mailbox_ids, ret); + etdev_err(client->etdev, "Deactivate mailbox %d failed: %d", mailbox_id, ret); + +out: + if (!ret) + edgetpu_wakelock_dec_event_locked(client->wakelock, + EDGETPU_WAKELOCK_EVENT_EXT_MAILBOX); edgetpu_wakelock_unlock(client->wakelock); return ret; } -int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_ids) +int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid, bool first_open) { struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices; - u32 to_send; + const u32 bit = BIT(mailbox_id); int ret = 0; mutex_lock(&eh->lock); - to_send = mailbox_ids & ~eh->fw_state; - if (to_send) - ret = edgetpu_kci_open_device(etdev->kci, to_send); + if (bit & ~eh->fw_state) + ret = edgetpu_kci_open_device(etdev->kci, mailbox_id, vcid, first_open); if (!ret) { - eh->state |= mailbox_ids; - eh->fw_state |= mailbox_ids; + eh->state |= bit; + eh->fw_state |= bit; } mutex_unlock(&eh->lock); return ret; } -int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_ids) +int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id) { struct edgetpu_handshake *eh = &etdev->mailbox_manager->open_devices; - u32 to_send; + const u32 bit = BIT(mailbox_id); int ret = 0; mutex_lock(&eh->lock); - to_send = mailbox_ids & eh->fw_state; - if (to_send) - ret = edgetpu_kci_close_device(etdev->kci, to_send); + if (bit & eh->fw_state) + ret = edgetpu_kci_close_device(etdev->kci, mailbox_id); if (!ret) { - eh->state &= ~mailbox_ids; - eh->fw_state &= ~mailbox_ids; + eh->state &= ~bit; + eh->fw_state &= ~bit; } mutex_unlock(&eh->lock); return ret; diff --git a/drivers/edgetpu/edgetpu-mailbox.h b/drivers/edgetpu/edgetpu-mailbox.h index 8425807..1ae4889 100644 --- a/drivers/edgetpu/edgetpu-mailbox.h +++ b/drivers/edgetpu/edgetpu-mailbox.h @@ -31,6 +31,9 @@ /* Size of CSRs start from cmd_queue_csr_base can be mmap-ed to userspace. */ #define USERSPACE_CSR_SIZE 0x1000ul +/* Mailbox ID to indicate external mailboxes */ +#define EDGETPU_MAILBOX_ID_USE_ASSOC -1 + struct edgetpu_device_group; struct edgetpu_mailbox { @@ -78,6 +81,34 @@ struct edgetpu_vii { edgetpu_queue_mem resp_queue_mem; }; +/* Structure to hold info about mailbox and its queues. */ +struct edgetpu_mailbox_descriptor { + struct edgetpu_mailbox *mailbox; + edgetpu_queue_mem cmd_queue_mem; + edgetpu_queue_mem resp_queue_mem; +}; + +/* Structure to hold multiple external mailboxes allocated for a device group. */ +struct edgetpu_external_mailbox { + /* Number of external mailboxes allocated for a device group. */ + int count; + /* Leader of device group. */ + struct edgetpu_dev *etdev; + /* Array of external mailboxes info with length @count. */ + struct edgetpu_mailbox_descriptor *descriptors; + /* Mailbox attribute for allocated external mailboxes. */ + struct edgetpu_mailbox_attr attr; +}; + +/* Structure used for requesting to allocate external mailboxes. */ +struct edgetpu_external_mailbox_req { + uint start; /* starting index of external mailbox in mailbox_manager */ + uint end; /* end index of external mailbox in mailbox_manager */ + /* number of mailboxes to be allocated, should be less or equal to (end - start + 1) */ + uint count; + struct edgetpu_mailbox_attr attr; /* mailbox attribute for allocation */ +}; + /* * Structure for recording the driver state vs FW state. * @@ -107,6 +138,8 @@ struct edgetpu_mailbox_manager { u8 vii_index_from, vii_index_to; /* indices reserved for P2P, the range is [from, to) */ u8 p2p_index_from, p2p_index_to; + /* indices reserved for external mailboxes */ + u8 ext_index_from, ext_index_to; rwlock_t mailboxes_lock; /* protects mailboxes */ struct edgetpu_mailbox **mailboxes; /* converts index (0 ~ num_mailbox - 1) of mailbox to CSR offset */ @@ -121,6 +154,7 @@ struct edgetpu_mailbox_manager_desc { u8 num_mailbox; u8 num_vii_mailbox; u8 num_p2p_mailbox; + u8 num_ext_mailbox; get_csr_base_t get_context_csr_base; get_csr_base_t get_cmd_queue_csr_base; get_csr_base_t get_resp_queue_csr_base; @@ -302,28 +336,41 @@ void edgetpu_mailbox_restore_active_vii_queues(struct edgetpu_dev *etdev); int edgetpu_mailbox_p2p_batch(struct edgetpu_mailbox_manager *mgr, uint n, uint skip_i, struct edgetpu_mailbox **mailboxes); -/* Notify firmware of external mailboxes becoming active */ -int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, u32 mailbox_ids); +/* + * If @mailbox_id is EDGETPU_MAILBOX_ID_USE_ASSOC, use @ext_mailbox_req to + * allocate external mailboxes and activate the allocated mailboxes. + * Otherwise, activate the external mailbox with id @mailbox_id. + */ +int edgetpu_mailbox_enable_ext(struct edgetpu_client *client, int mailbox_id, + struct edgetpu_external_mailbox_req *ext_mailbox_req); -/* Notify firmware of external mailboxes becoming inactive */ -int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, u32 mailbox_ids); +/* + * Notify firmware of an external mailboxes becoming inactive. + */ +int edgetpu_mailbox_disable_ext(struct edgetpu_client *client, int mailbox_id); /* - * Activates @mailbox_ids, OPEN_DEVICE KCI will be sent. + * Activates @mailbox_id, OPEN_DEVICE KCI will be sent. * - * If @mailbox_ids are known to be activated, KCI is not sent and this function + * If @mailbox_id is known to be activated, KCI is not sent and this function * returns 0. * * Returns what edgetpu_kci_open_device() returned. * Caller ensures device is powered on. */ -int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_ids); +int edgetpu_mailbox_activate(struct edgetpu_dev *etdev, u32 mailbox_id, s16 vcid, bool first_open); /* * Similar to edgetpu_mailbox_activate() but sends CLOSE_DEVICE KCI instead. */ -int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_ids); +int edgetpu_mailbox_deactivate(struct edgetpu_dev *etdev, u32 mailbox_id); /* Sets @eh->fw_state to 0. */ void edgetpu_handshake_clear_fw_state(struct edgetpu_handshake *eh); +/* + * Disables and frees any external mailboxes allocated for @group. + * + * Caller must hold @group->lock. + */ +void edgetpu_mailbox_external_disable_free_locked(struct edgetpu_device_group *group); /* Utilities of circular queue operations */ diff --git a/drivers/edgetpu/edgetpu-mapping.c b/drivers/edgetpu/edgetpu-mapping.c index 6b21db3..136cf29 100644 --- a/drivers/edgetpu/edgetpu-mapping.c +++ b/drivers/edgetpu/edgetpu-mapping.c @@ -51,7 +51,7 @@ int edgetpu_mapping_add(struct edgetpu_mapping_root *mappings, struct rb_node *parent = NULL; int ret = -EBUSY; - if (WARN_ON(!map->release)) + if (!map->release) return -EINVAL; edgetpu_mapping_lock(mappings); new = &mappings->rb.rb_node; diff --git a/drivers/edgetpu/edgetpu-mmu.h b/drivers/edgetpu/edgetpu-mmu.h index 094f14d..7cc9ffa 100644 --- a/drivers/edgetpu/edgetpu-mmu.h +++ b/drivers/edgetpu/edgetpu-mmu.h @@ -16,11 +16,6 @@ #include "edgetpu-internal.h" #include "edgetpu.h" -/* TODO(b/153947157): remove this */ -#if IS_ENABLED(CONFIG_EDGETPU_TEST) -#include <linux/iommu-ext.h> -#endif - #if LINUX_VERSION_CODE < KERNEL_VERSION(5, 2, 0) #ifndef IOMMU_PASID_INVALID #define IOMMU_PASID_INVALID (-1U) diff --git a/drivers/edgetpu/edgetpu-thermal.h b/drivers/edgetpu/edgetpu-thermal.h index 63fc91c..2b4dd09 100644 --- a/drivers/edgetpu/edgetpu-thermal.h +++ b/drivers/edgetpu/edgetpu-thermal.h @@ -25,6 +25,7 @@ struct edgetpu_thermal { unsigned long cooling_state; unsigned int tpu_num_states; struct edgetpu_dev *etdev; + bool thermal_suspended; /* TPU thermal suspended state */ }; struct edgetpu_state_pwr { @@ -40,4 +41,37 @@ struct edgetpu_state_pwr { struct edgetpu_thermal *devm_tpu_thermal_create(struct device *dev, struct edgetpu_dev *etdev); +/* + * Holds thermal->lock. + * + * Does nothing if the thermal management is not supported. + */ +static inline void edgetpu_thermal_lock(struct edgetpu_thermal *thermal) +{ + if (!IS_ERR_OR_NULL(thermal)) + mutex_lock(&thermal->lock); +} + +/* + * Checks whether device is thermal suspended. + * Returns false if the thermal management is not supported. + */ +static inline bool edgetpu_thermal_is_suspended(struct edgetpu_thermal *thermal) +{ + if (!IS_ERR_OR_NULL(thermal)) + return thermal->thermal_suspended; + return false; +} + +/* + * Releases thermal->lock. + * + * Does nothing if the thermal management is not supported. + */ +static inline void edgetpu_thermal_unlock(struct edgetpu_thermal *thermal) +{ + if (!IS_ERR_OR_NULL(thermal)) + mutex_unlock(&thermal->lock); +} + #endif /* __EDGETPU_THERMAL_H__ */ diff --git a/drivers/edgetpu/edgetpu-usage-stats.c b/drivers/edgetpu/edgetpu-usage-stats.c index c13b42e..0001210 100644 --- a/drivers/edgetpu/edgetpu-usage-stats.c +++ b/drivers/edgetpu/edgetpu-usage-stats.c @@ -13,7 +13,6 @@ #include "edgetpu-usage-stats.h" #if IS_ENABLED(CONFIG_ABROLHOS) -//TODO(b/179343138): Implement for Janeiro #include "abrolhos-pm.h" static enum tpu_pwr_state tpu_states_arr[] = { diff --git a/drivers/edgetpu/edgetpu.h b/drivers/edgetpu/edgetpu.h index 93d7afa..db6b6b8 100644 --- a/drivers/edgetpu/edgetpu.h +++ b/drivers/edgetpu/edgetpu.h @@ -154,6 +154,9 @@ struct edgetpu_event_register { * released. */ #define EDGETPU_PRIORITY_DETACHABLE (1u << 3) +/* For @partition_type. */ +#define EDGETPU_PARTITION_NORMAL 0 +#define EDGETPU_PARTITION_EXTRA 1 struct edgetpu_mailbox_attr { /* * There are limitations on these size fields, see the error cases in @@ -166,6 +169,8 @@ struct edgetpu_mailbox_attr { __u32 sizeof_resp; /* size of response element in bytes */ __u32 priority : 4; /* mailbox service priority */ __u32 cmdq_tail_doorbell: 1; /* auto doorbell on cmd queue tail move */ + /* Type of memory partitions to be used for this group, exact meaning is chip-dependent. */ + __u32 partition_type : 1; }; /* @@ -273,7 +278,7 @@ struct edgetpu_sync_ioctl { _IOW(EDGETPU_IOCTL_BASE, 16, struct edgetpu_sync_ioctl) struct edgetpu_map_dmabuf_ioctl { - /* Deprecated; pass 0 to keep compatibility. */ + /* Ignored. */ __u64 offset; /* Ignored; the entire dma-buf is mapped. */ __u64 size; @@ -517,14 +522,14 @@ struct edgetpu_device_dram_usage { _IOR(EDGETPU_IOCTL_BASE, 29, struct edgetpu_device_dram_usage) /* - * struct edgetpu_ext_mailbox + * struct edgetpu_ext_mailbox_ioctl * @client_id: Client identifier (may not be needed depending on type) * @attrs: Array of mailbox attributes (pointer to * edgetpu_mailbox_attr, may be NULL depending on type) * @type: One of the EDGETPU_EXT_MAILBOX_xxx values * @count: Number of mailboxes to acquire */ -struct edgetpu_ext_mailbox { +struct edgetpu_ext_mailbox_ioctl { __u64 client_id; __u64 attrs; __u32 type; @@ -536,13 +541,35 @@ struct edgetpu_ext_mailbox { * runtime. This can be a secure mailbox or a device-to-device mailbox. */ #define EDGETPU_ACQUIRE_EXT_MAILBOX \ - _IOW(EDGETPU_IOCTL_BASE, 30, struct edgetpu_ext_mailbox) + _IOW(EDGETPU_IOCTL_BASE, 30, struct edgetpu_ext_mailbox_ioctl) /* * Release a chip-specific mailbox that is not directly managed by the TPU * runtime. This can be a secure mailbox or a device-to-device mailbox. */ #define EDGETPU_RELEASE_EXT_MAILBOX \ - _IOW(EDGETPU_IOCTL_BASE, 31, struct edgetpu_ext_mailbox) + _IOW(EDGETPU_IOCTL_BASE, 31, struct edgetpu_ext_mailbox_ioctl) + +/* Fatal error event bitmasks... */ +/* Firmware crash in non-restartable thread */ +#define EDGETPU_ERROR_FW_CRASH 0x1 +/* Host or device watchdog timeout */ +#define EDGETPU_ERROR_WATCHDOG_TIMEOUT 0x2 +/* Thermal shutdown */ +#define EDGETPU_ERROR_THERMAL_STOP 0x4 +/* TPU hardware inaccessible: link fail, memory protection unit blocking... */ +#define EDGETPU_ERROR_HW_NO_ACCESS 0x8 +/* Various hardware failures */ +#define EDGETPU_ERROR_HW_FAIL 0x10 +/* Firmware-reported timeout on runtime processing of workload */ +#define EDGETPU_ERROR_RUNTIME_TIMEOUT 0x20 + +/* + * Return fatal errors raised for the client's device group, as a bitmask of + * the above fatal error event codes, or zero if no errors encountered or + * client is not part of a device group. + */ +#define EDGETPU_GET_FATAL_ERRORS \ + _IOR(EDGETPU_IOCTL_BASE, 32, __u32) #endif /* __EDGETPU_H__ */ diff --git a/drivers/edgetpu/janeiro-device.c b/drivers/edgetpu/janeiro-device.c index 00d480a..5fdf792 100644 --- a/drivers/edgetpu/janeiro-device.c +++ b/drivers/edgetpu/janeiro-device.c @@ -6,6 +6,7 @@ */ #include <linux/irqreturn.h> +#include <linux/uaccess.h> #include "edgetpu-config.h" #include "edgetpu-internal.h" @@ -109,18 +110,29 @@ void edgetpu_chip_handle_reverse_kci(struct edgetpu_dev *etdev, } } - -/* TODO: This would be a good place to handle AoC/DSP mailboxes */ - int edgetpu_chip_acquire_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox *ext_mbox) + struct edgetpu_ext_mailbox_ioctl *args) { + struct edgetpu_external_mailbox_req req; + + if (args->type == EDGETPU_EXT_MAILBOX_TYPE_DSP) { + if (!args->count || args->count > EDGETPU_NUM_EXT_MAILBOXES) + return -EINVAL; + if (copy_from_user(&req.attr, (void __user *)args->attrs, sizeof(req.attr))) + return -EFAULT; + req.count = args->count; + req.start = JANEIRO_EXT_DSP_MAILBOX_START; + req.end = JANEIRO_EXT_DSP_MAILBOX_END; + return edgetpu_mailbox_enable_ext(client, -1, &req); + } return -ENODEV; } int edgetpu_chip_release_ext_mailbox(struct edgetpu_client *client, - struct edgetpu_ext_mailbox *ext_mbox) + struct edgetpu_ext_mailbox_ioctl *args) { + if (args->type == EDGETPU_EXT_MAILBOX_TYPE_DSP) + return edgetpu_mailbox_disable_ext(client, -1); return -ENODEV; } diff --git a/drivers/edgetpu/janeiro-platform.c b/drivers/edgetpu/janeiro-platform.c index d330f48..81241f1 100644 --- a/drivers/edgetpu/janeiro-platform.c +++ b/drivers/edgetpu/janeiro-platform.c @@ -24,11 +24,12 @@ #include "mobile-firmware.h" static const struct of_device_id edgetpu_of_match[] = { - { - .compatible = "google,darwinn", - }, + /* TODO(b/190677977): remove */ + { .compatible = "google,darwinn", }, + { .compatible = "google,edgetpu-gs201", }, { /* end of list */ }, }; + MODULE_DEVICE_TABLE(of, edgetpu_of_match); #define EDGETPU_POOL_MEM_OFFSET (EDGETPU_TELEMETRY_BUFFER_SIZE * 2) diff --git a/drivers/edgetpu/janeiro-pm.c b/drivers/edgetpu/janeiro-pm.c index 7967d16..ae1938f 100644 --- a/drivers/edgetpu/janeiro-pm.c +++ b/drivers/edgetpu/janeiro-pm.c @@ -18,7 +18,6 @@ #include "edgetpu-kci.h" #include "edgetpu-mailbox.h" #include "edgetpu-pm.h" -#include "edgetpu-telemetry.h" #include "janeiro-platform.h" #include "janeiro-pm.h" @@ -234,7 +233,6 @@ static int janeiro_set_lpm(struct edgetpu_dev *etdev) static int janeiro_power_up(struct edgetpu_pm *etpm) { struct edgetpu_dev *etdev = etpm->etdev; - struct janeiro_platform_dev *edgetpu_pdev = to_janeiro_dev(etdev); int ret = 0; ret = janeiro_pwr_state_set( @@ -247,12 +245,6 @@ static int janeiro_power_up(struct edgetpu_pm *etpm) janeiro_set_lpm(etdev); - /* Clear out log / trace buffers */ - memset(edgetpu_pdev->log_mem.vaddr, 0, EDGETPU_TELEMETRY_BUFFER_SIZE); -#if IS_ENABLED(CONFIG_EDGETPU_TELEMETRY_TRACE) - memset(edgetpu_pdev->trace_mem.vaddr, 0, EDGETPU_TELEMETRY_BUFFER_SIZE); -#endif - edgetpu_chip_init(etdev); if (etdev->kci) { diff --git a/drivers/edgetpu/janeiro/config-mailbox.h b/drivers/edgetpu/janeiro/config-mailbox.h index e54dad8..e2a07cf 100644 --- a/drivers/edgetpu/janeiro/config-mailbox.h +++ b/drivers/edgetpu/janeiro/config-mailbox.h @@ -10,15 +10,25 @@ #include <linux/types.h> /* u32 */ -#define EDGETPU_NUM_MAILBOXES 8 -#define EDGETPU_NUM_VII_MAILBOXES (EDGETPU_NUM_MAILBOXES - 1) +#define EDGETPU_NUM_VII_MAILBOXES 7 #define EDGETPU_NUM_P2P_MAILBOXES 0 +#define EDGETPU_NUM_EXT_MAILBOXES 4 +#define EDGETPU_NUM_MAILBOXES (EDGETPU_NUM_VII_MAILBOXES + EDGETPU_NUM_EXT_MAILBOXES + 1) +/* + * Mailbox index layout in mailbox manager is like: + * --------------------------------------------- + * | KCI X 1 | VII(s) X 7 | EXT_DSP(s) X 4 | + * --------------------------------------------- + */ +#define JANEIRO_EXT_DSP_MAILBOX_START (EDGETPU_NUM_VII_MAILBOXES + 1) +#define JANEIRO_EXT_DSP_MAILBOX_END (EDGETPU_NUM_EXT_MAILBOXES + JANEIRO_EXT_DSP_MAILBOX_START - 1) -#define JANEIRO_CSR_MBOX2_CONTEXT_ENABLE 0xa0000 /* starting kernel mb*/ -#define JANEIRO_CSR_MBOX2_CMD_QUEUE_DOORBELL_SET 0xa1000 -#define JANEIRO_CSR_MBOX2_RESP_QUEUE_DOORBELL_SET 0xa1800 -#define EDGETPU_MBOX_CSRS_SIZE 0x2000 /* CSR size of each mailbox */ +#define JANEIRO_CSR_MBOX2_CONTEXT_ENABLE 0xa0000 /* starting kernel mb*/ +#define JANEIRO_CSR_MBOX11_CONTEXT_ENABLE 0xc0000 /* DSP mailbox */ +#define EDGETPU_MBOX_CSRS_SIZE 0x2000 /* CSR size of each mailbox */ +#define JANEIRO_CSR_MBOX_CMD_QUEUE_DOORBELL_SET_OFFSET 0x1000 +#define JANEIRO_CSR_MBOX_RESP_QUEUE_DOORBELL_SET_OFFSET 0x1800 #define EDGETPU_MBOX_BASE JANEIRO_CSR_MBOX2_CONTEXT_ENABLE // TODO: check correct values /* CSR storing mailbox response queue doorbell status */ @@ -27,19 +37,37 @@ static inline u32 edgetpu_mailbox_get_context_csr_base(u32 index) { - return JANEIRO_CSR_MBOX2_CONTEXT_ENABLE + - index * EDGETPU_MBOX_CSRS_SIZE; + u32 base; + + if (index >= 0 && index <= EDGETPU_NUM_VII_MAILBOXES) + base = JANEIRO_CSR_MBOX2_CONTEXT_ENABLE; + else + base = JANEIRO_CSR_MBOX11_CONTEXT_ENABLE; + return base + (index % JANEIRO_EXT_DSP_MAILBOX_START) * EDGETPU_MBOX_CSRS_SIZE; } static inline u32 edgetpu_mailbox_get_cmd_queue_csr_base(u32 index) { - return JANEIRO_CSR_MBOX2_CMD_QUEUE_DOORBELL_SET + - index * EDGETPU_MBOX_CSRS_SIZE; + u32 base; + + if (index >= 0 && index <= EDGETPU_NUM_VII_MAILBOXES) + base = JANEIRO_CSR_MBOX2_CONTEXT_ENABLE; + else + base = JANEIRO_CSR_MBOX11_CONTEXT_ENABLE; + return base + JANEIRO_CSR_MBOX_CMD_QUEUE_DOORBELL_SET_OFFSET + + ((index % JANEIRO_EXT_DSP_MAILBOX_START) * EDGETPU_MBOX_CSRS_SIZE); } static inline u32 edgetpu_mailbox_get_resp_queue_csr_base(u32 index) { - return JANEIRO_CSR_MBOX2_RESP_QUEUE_DOORBELL_SET + - index * EDGETPU_MBOX_CSRS_SIZE; + u32 base; + + if (index >= 0 && index <= EDGETPU_NUM_VII_MAILBOXES) + base = JANEIRO_CSR_MBOX2_CONTEXT_ENABLE; + else + base = JANEIRO_CSR_MBOX11_CONTEXT_ENABLE; + return base + JANEIRO_CSR_MBOX_RESP_QUEUE_DOORBELL_SET_OFFSET + + ((index % JANEIRO_EXT_DSP_MAILBOX_START) * EDGETPU_MBOX_CSRS_SIZE); } + #endif /* __JANEIRO_CONFIG_MAILBOX_H__ */ diff --git a/drivers/edgetpu/janeiro/config.h b/drivers/edgetpu/janeiro/config.h index 0ed255b..4dc759f 100644 --- a/drivers/edgetpu/janeiro/config.h +++ b/drivers/edgetpu/janeiro/config.h @@ -13,6 +13,10 @@ #define EDGETPU_DEV_MAX 1 #define EDGETPU_HAS_MULTI_GROUPS +/* Max number of virtual context IDs that can be allocated for one device. */ +#define EDGETPU_NUM_VCIDS 16 +/* Reserved VCID that uses the extra partition. */ +#define EDGETPU_VCID_EXTRA_PARTITION 0 #define EDGETPU_HAS_WAKELOCK |