|
@@ -463,6 +463,8 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
|
|
|
for (i = 0; i < exec->bo_count; i++) {
|
|
|
bo = to_vc4_bo(&exec->bo[i]->base);
|
|
|
bo->seqno = seqno;
|
|
|
+
|
|
|
+ reservation_object_add_shared_fence(bo->resv, exec->fence);
|
|
|
}
|
|
|
|
|
|
list_for_each_entry(bo, &exec->unref_list, unref_head) {
|
|
@@ -472,7 +474,103 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
|
|
|
for (i = 0; i < exec->rcl_write_bo_count; i++) {
|
|
|
bo = to_vc4_bo(&exec->rcl_write_bo[i]->base);
|
|
|
bo->write_seqno = seqno;
|
|
|
+
|
|
|
+ reservation_object_add_excl_fence(bo->resv, exec->fence);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+static void
|
|
|
+vc4_unlock_bo_reservations(struct drm_device *dev,
|
|
|
+ struct vc4_exec_info *exec,
|
|
|
+ struct ww_acquire_ctx *acquire_ctx)
|
|
|
+{
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i < exec->bo_count; i++) {
|
|
|
+ struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
|
|
|
+
|
|
|
+ ww_mutex_unlock(&bo->resv->lock);
|
|
|
}
|
|
|
+
|
|
|
+ ww_acquire_fini(acquire_ctx);
|
|
|
+}
|
|
|
+
|
|
|
+/* Takes the reservation lock on all the BOs being referenced, so that
|
|
|
+ * at queue submit time we can update the reservations.
|
|
|
+ *
|
|
|
+ * We don't lock the RCL the tile alloc/state BOs, or overflow memory
|
|
|
+ * (all of which are on exec->unref_list). They're entirely private
|
|
|
+ * to vc4, so we don't attach dma-buf fences to them.
|
|
|
+ */
|
|
|
+static int
|
|
|
+vc4_lock_bo_reservations(struct drm_device *dev,
|
|
|
+ struct vc4_exec_info *exec,
|
|
|
+ struct ww_acquire_ctx *acquire_ctx)
|
|
|
+{
|
|
|
+ int contended_lock = -1;
|
|
|
+ int i, ret;
|
|
|
+ struct vc4_bo *bo;
|
|
|
+
|
|
|
+ ww_acquire_init(acquire_ctx, &reservation_ww_class);
|
|
|
+
|
|
|
+retry:
|
|
|
+ if (contended_lock != -1) {
|
|
|
+ bo = to_vc4_bo(&exec->bo[contended_lock]->base);
|
|
|
+ ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock,
|
|
|
+ acquire_ctx);
|
|
|
+ if (ret) {
|
|
|
+ ww_acquire_done(acquire_ctx);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ for (i = 0; i < exec->bo_count; i++) {
|
|
|
+ if (i == contended_lock)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ bo = to_vc4_bo(&exec->bo[i]->base);
|
|
|
+
|
|
|
+ ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx);
|
|
|
+ if (ret) {
|
|
|
+ int j;
|
|
|
+
|
|
|
+ for (j = 0; j < i; j++) {
|
|
|
+ bo = to_vc4_bo(&exec->bo[j]->base);
|
|
|
+ ww_mutex_unlock(&bo->resv->lock);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (contended_lock != -1 && contended_lock >= i) {
|
|
|
+ bo = to_vc4_bo(&exec->bo[contended_lock]->base);
|
|
|
+
|
|
|
+ ww_mutex_unlock(&bo->resv->lock);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (ret == -EDEADLK) {
|
|
|
+ contended_lock = i;
|
|
|
+ goto retry;
|
|
|
+ }
|
|
|
+
|
|
|
+ ww_acquire_done(acquire_ctx);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ ww_acquire_done(acquire_ctx);
|
|
|
+
|
|
|
+ /* Reserve space for our shared (read-only) fence references,
|
|
|
+ * before we commit the CL to the hardware.
|
|
|
+ */
|
|
|
+ for (i = 0; i < exec->bo_count; i++) {
|
|
|
+ bo = to_vc4_bo(&exec->bo[i]->base);
|
|
|
+
|
|
|
+ ret = reservation_object_reserve_shared(bo->resv);
|
|
|
+ if (ret) {
|
|
|
+ vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
|
|
|
+ return ret;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/* Queues a struct vc4_exec_info for execution. If no job is
|
|
@@ -484,19 +582,34 @@ vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
|
|
|
* then bump the end address. That's a change for a later date,
|
|
|
* though.
|
|
|
*/
|
|
|
-static void
|
|
|
-vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
|
|
|
+static int
|
|
|
+vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec,
|
|
|
+ struct ww_acquire_ctx *acquire_ctx)
|
|
|
{
|
|
|
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
|
|
uint64_t seqno;
|
|
|
unsigned long irqflags;
|
|
|
+ struct vc4_fence *fence;
|
|
|
+
|
|
|
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
|
|
|
+ if (!fence)
|
|
|
+ return -ENOMEM;
|
|
|
+ fence->dev = dev;
|
|
|
|
|
|
spin_lock_irqsave(&vc4->job_lock, irqflags);
|
|
|
|
|
|
seqno = ++vc4->emit_seqno;
|
|
|
exec->seqno = seqno;
|
|
|
+
|
|
|
+ dma_fence_init(&fence->base, &vc4_fence_ops, &vc4->job_lock,
|
|
|
+ vc4->dma_fence_context, exec->seqno);
|
|
|
+ fence->seqno = exec->seqno;
|
|
|
+ exec->fence = &fence->base;
|
|
|
+
|
|
|
vc4_update_bo_seqnos(exec, seqno);
|
|
|
|
|
|
+ vc4_unlock_bo_reservations(dev, exec, acquire_ctx);
|
|
|
+
|
|
|
list_add_tail(&exec->head, &vc4->bin_job_list);
|
|
|
|
|
|
/* If no job was executing, kick ours off. Otherwise, it'll
|
|
@@ -509,6 +622,8 @@ vc4_queue_submit(struct drm_device *dev, struct vc4_exec_info *exec)
|
|
|
}
|
|
|
|
|
|
spin_unlock_irqrestore(&vc4->job_lock, irqflags);
|
|
|
+
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -707,6 +822,12 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
|
|
|
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
|
|
unsigned i;
|
|
|
|
|
|
+ /* If we got force-completed because of GPU reset rather than
|
|
|
+ * through our IRQ handler, signal the fence now.
|
|
|
+ */
|
|
|
+ if (exec->fence)
|
|
|
+ dma_fence_signal(exec->fence);
|
|
|
+
|
|
|
if (exec->bo) {
|
|
|
for (i = 0; i < exec->bo_count; i++)
|
|
|
drm_gem_object_unreference_unlocked(&exec->bo[i]->base);
|
|
@@ -874,6 +995,7 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
|
|
|
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
|
|
struct drm_vc4_submit_cl *args = data;
|
|
|
struct vc4_exec_info *exec;
|
|
|
+ struct ww_acquire_ctx acquire_ctx;
|
|
|
int ret = 0;
|
|
|
|
|
|
if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
|
|
@@ -916,12 +1038,18 @@ vc4_submit_cl_ioctl(struct drm_device *dev, void *data,
|
|
|
if (ret)
|
|
|
goto fail;
|
|
|
|
|
|
+ ret = vc4_lock_bo_reservations(dev, exec, &acquire_ctx);
|
|
|
+ if (ret)
|
|
|
+ goto fail;
|
|
|
+
|
|
|
/* Clear this out of the struct we'll be putting in the queue,
|
|
|
* since it's part of our stack.
|
|
|
*/
|
|
|
exec->args = NULL;
|
|
|
|
|
|
- vc4_queue_submit(dev, exec);
|
|
|
+ ret = vc4_queue_submit(dev, exec, &acquire_ctx);
|
|
|
+ if (ret)
|
|
|
+ goto fail;
|
|
|
|
|
|
/* Return the seqno for our job. */
|
|
|
args->seqno = vc4->emit_seqno;
|
|
@@ -939,6 +1067,8 @@ vc4_gem_init(struct drm_device *dev)
|
|
|
{
|
|
|
struct vc4_dev *vc4 = to_vc4_dev(dev);
|
|
|
|
|
|
+ vc4->dma_fence_context = dma_fence_context_alloc(1);
|
|
|
+
|
|
|
INIT_LIST_HEAD(&vc4->bin_job_list);
|
|
|
INIT_LIST_HEAD(&vc4->render_job_list);
|
|
|
INIT_LIST_HEAD(&vc4->job_done_list);
|