浏览代码

drm/msm: dump submits which triggered gpu hang

Note we need to move update_fences() to after msm_rd_dump_submit(),
otherwise the bo's referenced by the submit may no longer be valid.

Signed-off-by: Rob Clark <robdclark@gmail.com>
Rob Clark 8 年之前
父节点
当前提交
96169f4e75
共有 1 个文件被更改,包括 27 次插入20 次删除
  1. 27 20
      drivers/gpu/drm/msm/msm_gpu.c

+ 27 - 20
drivers/gpu/drm/msm/msm_gpu.c

@@ -257,34 +257,16 @@ static void recover_worker(struct work_struct *work)
 {
 	struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work);
 	struct drm_device *dev = gpu->dev;
+	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_gem_submit *submit;
 	struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu);
-	uint64_t fence;
 	int i;
 
-	/* Update all the rings with the latest and greatest fence */
-	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
-		struct msm_ringbuffer *ring = gpu->rb[i];
-
-		fence = ring->memptrs->fence;
-
-		/*
-		 * For the current (faulting?) ring/submit advance the fence by
-		 * one more to clear the faulting submit
-		 */
-		if (ring == cur_ring)
-			fence = fence + 1;
-
-		update_fences(gpu, ring, fence);
-	}
-
 	mutex_lock(&dev->struct_mutex);
 
-
 	dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name);
-	fence = cur_ring->memptrs->fence + 1;
 
-	submit = find_submit(cur_ring, fence);
+	submit = find_submit(cur_ring, cur_ring->memptrs->fence + 1);
 	if (submit) {
 		struct task_struct *task;
 
@@ -309,9 +291,34 @@ static void recover_worker(struct work_struct *work)
 
 			dev_err(dev->dev, "%s: offending task: %s (%s)\n",
 				gpu->name, task->comm, cmd);
+
+			msm_rd_dump_submit(priv->hangrd, submit,
+				"offending task: %s (%s)", task->comm, cmd);
+		} else {
+			msm_rd_dump_submit(priv->hangrd, submit, NULL);
 		}
 		rcu_read_unlock();
+	}
+
+
+	/*
+	 * Update all the rings with the latest and greatest fence.. this
+	 * needs to happen after msm_rd_dump_submit() to ensure that the
+	 * bo's referenced by the offending submit are still around.
+	 */
+	for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) {
+		struct msm_ringbuffer *ring = gpu->rb[i];
+
+		uint32_t fence = ring->memptrs->fence;
 
+		/*
+		 * For the current (faulting?) ring/submit advance the fence by
+		 * one more to clear the faulting submit
+		 */
+		if (ring == cur_ring)
+			fence++;
+
+		update_fences(gpu, ring, fence);
 	}
 
 	if (msm_gpu_active(gpu)) {