Browse Source

drm/amdgpu: add command submission workflow tracepoint

OGL needs these tracepoints to investigate performance issue.

Change-Id: I5e58187d061253f7d665dfce8e4e163ba91d3e2b
Signed-off-by: Chunming Zhou <David1.Zhou@amd.com>
Chunming Zhou 9 years ago
parent
commit
7034decf6a

+ 1 - 1
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c

@@ -888,7 +888,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		ttm_eu_fence_buffer_objects(&parser.ticket,
 		ttm_eu_fence_buffer_objects(&parser.ticket,
 				&parser.validated,
 				&parser.validated,
 				&job->base.s_fence->base);
 				&job->base.s_fence->base);
-
+		trace_amdgpu_cs_ioctl(job);
 		mutex_unlock(&job->job_lock);
 		mutex_unlock(&job->job_lock);
 		amdgpu_cs_parser_fini_late(&parser);
 		amdgpu_cs_parser_fini_late(&parser);
 		mutex_unlock(&vm->mutex);
 		mutex_unlock(&vm->mutex);

+ 2 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c

@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <drm/drmP.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 
 
 static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
 static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
 {
 {
@@ -45,6 +46,7 @@ static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
 	}
 	}
 	job = to_amdgpu_job(sched_job);
 	job = to_amdgpu_job(sched_job);
 	mutex_lock(&job->job_lock);
 	mutex_lock(&job->job_lock);
+	trace_amdgpu_sched_run_job(job);
 	r = amdgpu_ib_schedule(job->adev,
 	r = amdgpu_ib_schedule(job->adev,
 			       job->num_ibs,
 			       job->num_ibs,
 			       job->ibs,
 			       job->ibs,

+ 51 - 0
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h

@@ -48,6 +48,57 @@ TRACE_EVENT(amdgpu_cs,
 		      __entry->fences)
 		      __entry->fences)
 );
 );
 
 
+TRACE_EVENT(amdgpu_cs_ioctl,
+	    TP_PROTO(struct amdgpu_job *job),
+	    TP_ARGS(job),
+	    TP_STRUCT__entry(
+			     __field(struct amdgpu_device *, adev)
+			     __field(struct amd_sched_job *, sched_job)
+			     __field(struct amdgpu_ib *, ib)
+			     __field(struct fence *, fence)
+			     __field(char *, ring_name)
+			     __field(u32, num_ibs)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->adev = job->adev;
+			   __entry->sched_job = &job->base;
+			   __entry->ib = job->ibs;
+			   __entry->fence = &job->base.s_fence->base;
+			   __entry->ring_name = job->ibs[0].ring->name;
+			   __entry->num_ibs = job->num_ibs;
+			   ),
+	    TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+		      __entry->adev, __entry->sched_job, __entry->ib,
+		      __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+TRACE_EVENT(amdgpu_sched_run_job,
+	    TP_PROTO(struct amdgpu_job *job),
+	    TP_ARGS(job),
+	    TP_STRUCT__entry(
+			     __field(struct amdgpu_device *, adev)
+			     __field(struct amd_sched_job *, sched_job)
+			     __field(struct amdgpu_ib *, ib)
+			     __field(struct fence *, fence)
+			     __field(char *, ring_name)
+			     __field(u32, num_ibs)
+			     ),
+
+	    TP_fast_assign(
+			   __entry->adev = job->adev;
+			   __entry->sched_job = &job->base;
+			   __entry->ib = job->ibs;
+			   __entry->fence = &job->base.s_fence->base;
+			   __entry->ring_name = job->ibs[0].ring->name;
+			   __entry->num_ibs = job->num_ibs;
+			   ),
+	    TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
+		      __entry->adev, __entry->sched_job, __entry->ib,
+		      __entry->fence, __entry->ring_name, __entry->num_ibs)
+);
+
+
 TRACE_EVENT(amdgpu_vm_grab_id,
 TRACE_EVENT(amdgpu_vm_grab_id,
 	    TP_PROTO(unsigned vmid, int ring),
 	    TP_PROTO(unsigned vmid, int ring),
 	    TP_ARGS(vmid, ring),
 	    TP_ARGS(vmid, ring),

+ 21 - 3
drivers/gpu/drm/amd/scheduler/gpu_sched_trace.h

@@ -16,6 +16,8 @@ TRACE_EVENT(amd_sched_job,
 	    TP_ARGS(sched_job),
 	    TP_ARGS(sched_job),
 	    TP_STRUCT__entry(
 	    TP_STRUCT__entry(
 			     __field(struct amd_sched_entity *, entity)
 			     __field(struct amd_sched_entity *, entity)
+			     __field(struct amd_sched_job *, sched_job)
+			     __field(struct fence *, fence)
 			     __field(const char *, name)
 			     __field(const char *, name)
 			     __field(u32, job_count)
 			     __field(u32, job_count)
 			     __field(int, hw_job_count)
 			     __field(int, hw_job_count)
@@ -23,16 +25,32 @@ TRACE_EVENT(amd_sched_job,
 
 
 	    TP_fast_assign(
 	    TP_fast_assign(
 			   __entry->entity = sched_job->s_entity;
 			   __entry->entity = sched_job->s_entity;
+			   __entry->sched_job = sched_job;
+			   __entry->fence = &sched_job->s_fence->base;
 			   __entry->name = sched_job->sched->name;
 			   __entry->name = sched_job->sched->name;
 			   __entry->job_count = kfifo_len(
 			   __entry->job_count = kfifo_len(
 				   &sched_job->s_entity->job_queue) / sizeof(sched_job);
 				   &sched_job->s_entity->job_queue) / sizeof(sched_job);
 			   __entry->hw_job_count = atomic_read(
 			   __entry->hw_job_count = atomic_read(
 				   &sched_job->sched->hw_rq_count);
 				   &sched_job->sched->hw_rq_count);
 			   ),
 			   ),
-	    TP_printk("entity=%p, ring=%s, job count:%u, hw job count:%d",
-		      __entry->entity, __entry->name, __entry->job_count,
-		      __entry->hw_job_count)
+	    TP_printk("entity=%p, sched job=%p, fence=%p, ring=%s, job count:%u, hw job count:%d",
+		      __entry->entity, __entry->sched_job, __entry->fence, __entry->name,
+		      __entry->job_count, __entry->hw_job_count)
 );
 );
+
+TRACE_EVENT(amd_sched_process_job,
+	    TP_PROTO(struct amd_sched_fence *fence),
+	    TP_ARGS(fence),
+	    TP_STRUCT__entry(
+		    __field(struct fence *, fence)
+		    ),
+
+	    TP_fast_assign(
+		    __entry->fence = &fence->base;
+		    ),
+	    TP_printk("fence=%p signaled", __entry->fence)
+);
+
 #endif
 #endif
 
 
 /* This part must be outside protection */
 /* This part must be outside protection */

+ 1 - 0
drivers/gpu/drm/amd/scheduler/gpu_scheduler.c

@@ -346,6 +346,7 @@ static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
 		list_del_init(&s_fence->list);
 		list_del_init(&s_fence->list);
 		spin_unlock_irqrestore(&sched->fence_list_lock, flags);
 		spin_unlock_irqrestore(&sched->fence_list_lock, flags);
 	}
 	}
+	trace_amd_sched_process_job(s_fence);
 	fence_put(&s_fence->base);
 	fence_put(&s_fence->base);
 	wake_up_interruptible(&sched->wake_up_worker);
 	wake_up_interruptible(&sched->wake_up_worker);
 }
 }