|
@@ -156,6 +156,144 @@ static void vc4_v3d_init_hw(struct drm_device *dev)
|
|
V3D_WRITE(V3D_VPMBASE, 0);
|
|
V3D_WRITE(V3D_VPMBASE, 0);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+int vc4_v3d_get_bin_slot(struct vc4_dev *vc4)
|
|
|
|
+{
|
|
|
|
+ struct drm_device *dev = vc4->dev;
|
|
|
|
+ unsigned long irqflags;
|
|
|
|
+ int slot;
|
|
|
|
+ uint64_t seqno = 0;
|
|
|
|
+ struct vc4_exec_info *exec;
|
|
|
|
+
|
|
|
|
+try_again:
|
|
|
|
+ spin_lock_irqsave(&vc4->job_lock, irqflags);
|
|
|
|
+ slot = ffs(~vc4->bin_alloc_used);
|
|
|
|
+ if (slot != 0) {
|
|
|
|
+ /* Switch from ffs() bit index to a 0-based index. */
|
|
|
|
+ slot--;
|
|
|
|
+ vc4->bin_alloc_used |= BIT(slot);
|
|
|
|
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
|
|
|
|
+ return slot;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* Couldn't find an open slot. Wait for render to complete
|
|
|
|
+ * and try again.
|
|
|
|
+ */
|
|
|
|
+ exec = vc4_last_render_job(vc4);
|
|
|
|
+ if (exec)
|
|
|
|
+ seqno = exec->seqno;
|
|
|
|
+ spin_unlock_irqrestore(&vc4->job_lock, irqflags);
|
|
|
|
+
|
|
|
|
+ if (seqno) {
|
|
|
|
+ int ret = vc4_wait_for_seqno(dev, seqno, ~0ull, true);
|
|
|
|
+
|
|
|
|
+ if (ret == 0)
|
|
|
|
+ goto try_again;
|
|
|
|
+
|
|
|
|
+ return ret;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * vc4_allocate_bin_bo() - allocates the memory that will be used for
|
|
|
|
+ * tile binning.
|
|
|
|
+ *
|
|
|
|
+ * The binner has a limitation that the addresses in the tile state
|
|
|
|
+ * buffer that point into the tile alloc buffer or binner overflow
|
|
|
|
+ * memory only have 28 bits (256MB), and the top 4 on the bus for
|
|
|
|
+ * tile alloc references end up coming from the tile state buffer's
|
|
|
|
+ * address.
|
|
|
|
+ *
|
|
|
|
+ * To work around this, we allocate a single large buffer while V3D is
|
|
|
|
+ * in use, make sure that it has the top 4 bits constant across its
|
|
|
|
+ * entire extent, and then put the tile state, tile alloc, and binner
|
|
|
|
+ * overflow memory inside that buffer.
|
|
|
|
+ *
|
|
|
|
+ * This creates a limitation where we may not be able to execute a job
|
|
|
|
+ * if it doesn't fit within the buffer that we allocated up front.
|
|
|
|
+ * However, it turns out that 16MB is "enough for anybody", and
|
|
|
|
+ * real-world applications run into allocation failures from the
|
|
|
|
+ * overall CMA pool before they make scenes complicated enough to run
|
|
|
|
+ * out of bin space.
|
|
|
|
+ */
|
|
|
|
+int
|
|
|
|
+vc4_allocate_bin_bo(struct drm_device *drm)
|
|
|
|
+{
|
|
|
|
+ struct vc4_dev *vc4 = to_vc4_dev(drm);
|
|
|
|
+ struct vc4_v3d *v3d = vc4->v3d;
|
|
|
|
+ uint32_t size = 16 * 1024 * 1024;
|
|
|
|
+ int ret = 0;
|
|
|
|
+ struct list_head list;
|
|
|
|
+
|
|
|
|
+ /* We may need to try allocating more than once to get a BO
|
|
|
|
+ * that doesn't cross 256MB. Track the ones we've allocated
|
|
|
|
+ * that failed so far, so that we can free them when we've got
|
|
|
|
+ * one that succeeded (if we freed them right away, our next
|
|
|
|
+ * allocation would probably be the same chunk of memory).
|
|
|
|
+ */
|
|
|
|
+ INIT_LIST_HEAD(&list);
|
|
|
|
+
|
|
|
|
+ while (true) {
|
|
|
|
+ struct vc4_bo *bo = vc4_bo_create(drm, size, true);
|
|
|
|
+
|
|
|
|
+ if (IS_ERR(bo)) {
|
|
|
|
+ ret = PTR_ERR(bo);
|
|
|
|
+
|
|
|
|
+ dev_err(&v3d->pdev->dev,
|
|
|
|
+ "Failed to allocate memory for tile binning: "
|
|
|
|
+ "%d. You may need to enable CMA or give it "
|
|
|
|
+ "more memory.",
|
|
|
|
+ ret);
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* Check if this BO won't trigger the addressing bug. */
|
|
|
|
+ if ((bo->base.paddr & 0xf0000000) ==
|
|
|
|
+ ((bo->base.paddr + bo->base.base.size - 1) & 0xf0000000)) {
|
|
|
|
+ vc4->bin_bo = bo;
|
|
|
|
+
|
|
|
|
+ /* Set up for allocating 512KB chunks of
|
|
|
|
+ * binner memory. The biggest allocation we
|
|
|
|
+ * need to do is for the initial tile alloc +
|
|
|
|
+ * tile state buffer. We can render to a
|
|
|
|
+ * maximum of ((2048*2048) / (32*32) = 4096
|
|
|
|
+ * tiles in a frame (until we do floating
|
|
|
|
+ * point rendering, at which point it would be
|
|
|
|
+ * 8192). Tile state is 48b/tile (rounded to
|
|
|
|
+ * a page), and tile alloc is 32b/tile
|
|
|
|
+ * (rounded to a page), plus a page of extra,
|
|
|
|
+ * for a total of 320kb for our worst-case.
|
|
|
|
+ * We choose 512kb so that it divides evenly
|
|
|
|
+ * into our 16MB, and the rest of the 512kb
|
|
|
|
+ * will be used as storage for the overflow
|
|
|
|
+ * from the initial 32b CL per bin.
|
|
|
|
+ */
|
|
|
|
+ vc4->bin_alloc_size = 512 * 1024;
|
|
|
|
+ vc4->bin_alloc_used = 0;
|
|
|
|
+ vc4->bin_alloc_overflow = 0;
|
|
|
|
+ WARN_ON_ONCE(sizeof(vc4->bin_alloc_used) * 8 !=
|
|
|
|
+ bo->base.base.size / vc4->bin_alloc_size);
|
|
|
|
+
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* Put it on the list to free later, and try again. */
|
|
|
|
+ list_add(&bo->unref_head, &list);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /* Free all the BOs we allocated but didn't choose. */
|
|
|
|
+ while (!list_empty(&list)) {
|
|
|
|
+ struct vc4_bo *bo = list_last_entry(&list,
|
|
|
|
+ struct vc4_bo, unref_head);
|
|
|
|
+
|
|
|
|
+ list_del(&bo->unref_head);
|
|
|
|
+ drm_gem_object_put_unlocked(&bo->base.base);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
#ifdef CONFIG_PM
|
|
#ifdef CONFIG_PM
|
|
static int vc4_v3d_runtime_suspend(struct device *dev)
|
|
static int vc4_v3d_runtime_suspend(struct device *dev)
|
|
{
|
|
{
|
|
@@ -164,6 +302,9 @@ static int vc4_v3d_runtime_suspend(struct device *dev)
|
|
|
|
|
|
vc4_irq_uninstall(vc4->dev);
|
|
vc4_irq_uninstall(vc4->dev);
|
|
|
|
|
|
|
|
+ drm_gem_object_put_unlocked(&vc4->bin_bo->base.base);
|
|
|
|
+ vc4->bin_bo = NULL;
|
|
|
|
+
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -171,6 +312,11 @@ static int vc4_v3d_runtime_resume(struct device *dev)
|
|
{
|
|
{
|
|
struct vc4_v3d *v3d = dev_get_drvdata(dev);
|
|
struct vc4_v3d *v3d = dev_get_drvdata(dev);
|
|
struct vc4_dev *vc4 = v3d->vc4;
|
|
struct vc4_dev *vc4 = v3d->vc4;
|
|
|
|
+ int ret;
|
|
|
|
+
|
|
|
|
+ ret = vc4_allocate_bin_bo(vc4->dev);
|
|
|
|
+ if (ret)
|
|
|
|
+ return ret;
|
|
|
|
|
|
vc4_v3d_init_hw(vc4->dev);
|
|
vc4_v3d_init_hw(vc4->dev);
|
|
vc4_irq_postinstall(vc4->dev);
|
|
vc4_irq_postinstall(vc4->dev);
|
|
@@ -208,6 +354,10 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ ret = vc4_allocate_bin_bo(drm);
|
|
|
|
+ if (ret)
|
|
|
|
+ return ret;
|
|
|
|
+
|
|
/* Reset the binner overflow address/size at setup, to be sure
|
|
/* Reset the binner overflow address/size at setup, to be sure
|
|
* we don't reuse an old one.
|
|
* we don't reuse an old one.
|
|
*/
|
|
*/
|