Forráskód Böngészése

drm/nouveau/fifo/gk104: fix race condition when updating engine runlists

The CPU-side tracking of engine runlists was not protected by a lock,
leading to list corruption, eventually causing runlist_update() to
overrun the GPU-side runlist, triggering an OOPS.

Fixes some of the issues noticed during parallel piglit runs.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Ben Skeggs 9 éve
szülő
commit
386ffd5e80

+ 18 - 2
drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c

@@ -47,7 +47,7 @@ gk104_fifo_uevent_init(struct nvkm_fifo *fifo)
 }
 
 void
-gk104_fifo_runlist_update(struct gk104_fifo *fifo, u32 engine)
+gk104_fifo_runlist_commit(struct gk104_fifo *fifo, u32 engine)
 {
 	struct gk104_fifo_engn *engn = &fifo->engine[engine];
 	struct gk104_fifo_chan *chan;
@@ -78,6 +78,22 @@ gk104_fifo_runlist_update(struct gk104_fifo *fifo, u32 engine)
 	mutex_unlock(&subdev->mutex);
 }
 
+void
+gk104_fifo_runlist_remove(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
+{
+	mutex_lock(&fifo->base.engine.subdev.mutex);
+	list_del_init(&chan->head);
+	mutex_unlock(&fifo->base.engine.subdev.mutex);
+}
+
+void
+gk104_fifo_runlist_insert(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
+{
+	mutex_lock(&fifo->base.engine.subdev.mutex);
+	list_add_tail(&chan->head, &fifo->engine[chan->engine].chan);
+	mutex_unlock(&fifo->base.engine.subdev.mutex);
+}
+
 static inline struct nvkm_engine *
 gk104_fifo_engine(struct gk104_fifo *fifo, u32 engn)
 {
@@ -112,7 +128,7 @@ gk104_fifo_recover_work(struct work_struct *work)
 			nvkm_subdev_fini(&engine->subdev, false);
 			WARN_ON(nvkm_subdev_init(&engine->subdev));
 		}
-		gk104_fifo_runlist_update(fifo, gk104_fifo_subdev_engine(engn));
+		gk104_fifo_runlist_commit(fifo, gk104_fifo_subdev_engine(engn));
 	}
 
 	nvkm_wr32(device, 0x00262c, engm);

+ 4 - 1
drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h

@@ -5,6 +5,7 @@
 
 #include <subdev/mmu.h>
 
+struct gk104_fifo_chan;
 struct gk104_fifo_engn {
 	struct nvkm_memory *runlist[2];
 	int cur_runlist;
@@ -35,7 +36,9 @@ void gk104_fifo_fini(struct nvkm_fifo *);
 void gk104_fifo_intr(struct nvkm_fifo *);
 void gk104_fifo_uevent_init(struct nvkm_fifo *);
 void gk104_fifo_uevent_fini(struct nvkm_fifo *);
-void gk104_fifo_runlist_update(struct gk104_fifo *, u32 engine);
+void gk104_fifo_runlist_insert(struct gk104_fifo *, struct gk104_fifo_chan *);
+void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *);
+void gk104_fifo_runlist_commit(struct gk104_fifo *, u32 engine);
 
 static inline u64
 gk104_fifo_engine_subdev(int engine)

+ 4 - 4
drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c

@@ -151,9 +151,9 @@ gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
 	u32 coff = chan->base.chid * 8;
 
 	if (!list_empty(&chan->head)) {
-		list_del_init(&chan->head);
+		gk104_fifo_runlist_remove(fifo, chan);
 		nvkm_mask(device, 0x800004 + coff, 0x00000800, 0x00000800);
-		gk104_fifo_runlist_update(fifo, chan->engine);
+		gk104_fifo_runlist_commit(fifo, chan->engine);
 	}
 
 	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
@@ -172,9 +172,9 @@ gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
 	nvkm_wr32(device, 0x800000 + coff, 0x80000000 | addr);
 
 	if (list_empty(&chan->head) && !chan->killed) {
-		list_add_tail(&chan->head, &fifo->engine[chan->engine].chan);
+		gk104_fifo_runlist_insert(fifo, chan);
 		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
-		gk104_fifo_runlist_update(fifo, chan->engine);
+		gk104_fifo_runlist_commit(fifo, chan->engine);
 		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
 	}
 }