Parcourir la source

drm/radeon: validate relocations in the order determined by userspace v3

Userspace should set the first 4 bits of drm_radeon_cs_reloc::flags to
a number from 0 to 15. The higher the number, the higher the priority,
which means a buffer with a higher number will be validated sooner.

The old behavior is preserved: Buffers used for write are prioritized over
read-only buffers if the userspace doesn't set the number.

v2: add buffers to buckets directly, then concatenate them
v3: use a stable sort

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Marek Olšák il y a 11 ans
Parent
commit
c9b7654889

+ 0 - 1
drivers/gpu/drm/radeon/radeon.h

@@ -483,7 +483,6 @@ struct radeon_bo_list {
 	struct ttm_validate_buffer tv;
 	struct ttm_validate_buffer tv;
 	struct radeon_bo	*bo;
 	struct radeon_bo	*bo;
 	uint64_t		gpu_offset;
 	uint64_t		gpu_offset;
-	bool			written;
 	unsigned		domain;
 	unsigned		domain;
 	unsigned		alt_domain;
 	unsigned		alt_domain;
 	u32			tiling_flags;
 	u32			tiling_flags;

+ 61 - 3
drivers/gpu/drm/radeon/radeon_cs.c

@@ -31,10 +31,52 @@
 #include "radeon.h"
 #include "radeon.h"
 #include "radeon_trace.h"
 #include "radeon_trace.h"
 
 
+#define RADEON_CS_MAX_PRIORITY		32u
+#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
+
+/* This is based on the bucket sort with O(n) time complexity.
+ * An item with priority "i" is added to bucket[i]. The lists are then
+ * concatenated in descending order.
+ */
+struct radeon_cs_buckets {
+	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
+};
+
+static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
+{
+	unsigned i;
+
+	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
+		INIT_LIST_HEAD(&b->bucket[i]);
+}
+
+static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
+				  struct list_head *item, unsigned priority)
+{
+	/* Since buffers which appear sooner in the relocation list are
+	 * likely to be used more often than buffers which appear later
+	 * in the list, the sort mustn't change the ordering of buffers
+	 * with the same priority, i.e. it must be stable.
+	 */
+	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
+}
+
+static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
+				       struct list_head *out_list)
+{
+	unsigned i;
+
+	/* Connect the sorted buckets in the output list. */
+	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
+		list_splice(&b->bucket[i], out_list);
+	}
+}
+
 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 {
 {
 	struct drm_device *ddev = p->rdev->ddev;
 	struct drm_device *ddev = p->rdev->ddev;
 	struct radeon_cs_chunk *chunk;
 	struct radeon_cs_chunk *chunk;
+	struct radeon_cs_buckets buckets;
 	unsigned i, j;
 	unsigned i, j;
 	bool duplicate;
 	bool duplicate;
 
 
@@ -53,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	if (p->relocs == NULL) {
 	if (p->relocs == NULL) {
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
+
+	radeon_cs_buckets_init(&buckets);
+
 	for (i = 0; i < p->nrelocs; i++) {
 	for (i = 0; i < p->nrelocs; i++) {
 		struct drm_radeon_cs_reloc *r;
 		struct drm_radeon_cs_reloc *r;
+		unsigned priority;
 
 
 		duplicate = false;
 		duplicate = false;
 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
@@ -80,7 +126,14 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		p->relocs_ptr[i] = &p->relocs[i];
 		p->relocs_ptr[i] = &p->relocs[i];
 		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
 		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
 		p->relocs[i].lobj.bo = p->relocs[i].robj;
 		p->relocs[i].lobj.bo = p->relocs[i].robj;
-		p->relocs[i].lobj.written = !!r->write_domain;
+
+		/* The userspace buffer priorities are from 0 to 15. A higher
+		 * number means the buffer is more important.
+		 * Also, the buffers used for write have a higher priority than
+		 * the buffers used for read only, which doubles the range
+		 * to 0 to 31. 32 is reserved for the kernel driver.
+		 */
+		priority = (r->flags & 0xf) * 2 + !!r->write_domain;
 
 
 		/* the first reloc of an UVD job is the msg and that must be in
 		/* the first reloc of an UVD job is the msg and that must be in
 		   VRAM, also but everything into VRAM on AGP cards to avoid
 		   VRAM, also but everything into VRAM on AGP cards to avoid
@@ -94,6 +147,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			p->relocs[i].lobj.alt_domain =
 			p->relocs[i].lobj.alt_domain =
 				RADEON_GEM_DOMAIN_VRAM;
 				RADEON_GEM_DOMAIN_VRAM;
 
 
+			/* prioritize this over any other relocation */
+			priority = RADEON_CS_MAX_PRIORITY;
 		} else {
 		} else {
 			uint32_t domain = r->write_domain ?
 			uint32_t domain = r->write_domain ?
 				r->write_domain : r->read_domains;
 				r->write_domain : r->read_domains;
@@ -107,9 +162,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
 		p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
 		p->relocs[i].handle = r->handle;
 		p->relocs[i].handle = r->handle;
 
 
-		radeon_bo_list_add_object(&p->relocs[i].lobj,
-					  &p->validated);
+		radeon_cs_buckets_add(&buckets, &p->relocs[i].lobj.tv.head,
+				      priority);
 	}
 	}
+
+	radeon_cs_buckets_get_list(&buckets, &p->validated);
+
 	return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
 	return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
 }
 }
 
 

+ 0 - 10
drivers/gpu/drm/radeon/radeon_object.c

@@ -366,16 +366,6 @@ void radeon_bo_fini(struct radeon_device *rdev)
 	arch_phys_wc_del(rdev->mc.vram_mtrr);
 	arch_phys_wc_del(rdev->mc.vram_mtrr);
 }
 }
 
 
-void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
-				struct list_head *head)
-{
-	if (lobj->written) {
-		list_add(&lobj->tv.head, head);
-	} else {
-		list_add_tail(&lobj->tv.head, head);
-	}
-}
-
 int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
 int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
 			    struct list_head *head, int ring)
 			    struct list_head *head, int ring)
 {
 {

+ 0 - 2
drivers/gpu/drm/radeon/radeon_object.h

@@ -138,8 +138,6 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev);
 extern void radeon_bo_force_delete(struct radeon_device *rdev);
 extern void radeon_bo_force_delete(struct radeon_device *rdev);
 extern int radeon_bo_init(struct radeon_device *rdev);
 extern int radeon_bo_init(struct radeon_device *rdev);
 extern void radeon_bo_fini(struct radeon_device *rdev);
 extern void radeon_bo_fini(struct radeon_device *rdev);
-extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
-				struct list_head *head);
 extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
 extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
 				   struct list_head *head, int ring);
 				   struct list_head *head, int ring);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,