Эх сурвалжийг харах

drm/radeon: use an intervall tree to manage the VMA v2

Scales much better than scanning the address range linearly.

v2: store pfn instead of address

Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Michel Dänzer <michel.daenzer@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Alex Deucher 11 жил өмнө
parent
commit
0aea5e4aa2

+ 1 - 0
drivers/gpu/drm/Kconfig

@@ -114,6 +114,7 @@ config DRM_RADEON
 	select POWER_SUPPLY
 	select POWER_SUPPLY
 	select HWMON
 	select HWMON
 	select BACKLIGHT_CLASS_DEVICE
 	select BACKLIGHT_CLASS_DEVICE
+	select INTERVAL_TREE
 	help
 	help
 	  Choose this option if you have an ATI Radeon graphics card.  There
 	  Choose this option if you have an ATI Radeon graphics card.  There
 	  are both PCI and AGP versions.  You don't need to choose this to
 	  are both PCI and AGP versions.  You don't need to choose this to

+ 3 - 4
drivers/gpu/drm/radeon/radeon.h

@@ -64,6 +64,7 @@
 #include <linux/wait.h>
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/list.h>
 #include <linux/kref.h>
 #include <linux/kref.h>
+#include <linux/interval_tree.h>
 
 
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_api.h>
 #include <ttm/ttm_bo_driver.h>
 #include <ttm/ttm_bo_driver.h>
@@ -447,14 +448,12 @@ struct radeon_mman {
 struct radeon_bo_va {
 struct radeon_bo_va {
 	/* protected by bo being reserved */
 	/* protected by bo being reserved */
 	struct list_head		bo_list;
 	struct list_head		bo_list;
-	uint64_t			soffset;
-	uint64_t			eoffset;
 	uint32_t			flags;
 	uint32_t			flags;
 	uint64_t			addr;
 	uint64_t			addr;
 	unsigned			ref_count;
 	unsigned			ref_count;
 
 
 	/* protected by vm mutex */
 	/* protected by vm mutex */
-	struct list_head		vm_list;
+	struct interval_tree_node	it;
 	struct list_head		vm_status;
 	struct list_head		vm_status;
 
 
 	/* constant after initialization */
 	/* constant after initialization */
@@ -877,7 +876,7 @@ struct radeon_vm_pt {
 };
 };
 
 
 struct radeon_vm {
 struct radeon_vm {
-	struct list_head		va;
+	struct rb_root			va;
 	unsigned			id;
 	unsigned			id;
 
 
 	/* BOs moved, but not yet updated in the PT */
 	/* BOs moved, but not yet updated in the PT */

+ 2 - 2
drivers/gpu/drm/radeon/radeon_gem.c

@@ -496,9 +496,9 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
 
 
 	switch (args->operation) {
 	switch (args->operation) {
 	case RADEON_VA_MAP:
 	case RADEON_VA_MAP:
-		if (bo_va->soffset) {
+		if (bo_va->it.start) {
 			args->operation = RADEON_VA_RESULT_VA_EXIST;
 			args->operation = RADEON_VA_RESULT_VA_EXIST;
-			args->offset = bo_va->soffset;
+			args->offset = bo_va->it.start * RADEON_GPU_PAGE_SIZE;
 			goto out;
 			goto out;
 		}
 		}
 		r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);
 		r = radeon_vm_bo_set_addr(rdev, bo_va, args->offset, args->flags);

+ 2 - 2
drivers/gpu/drm/radeon/radeon_trace.h

@@ -72,8 +72,8 @@ TRACE_EVENT(radeon_vm_bo_update,
 			     ),
 			     ),
 
 
 	    TP_fast_assign(
 	    TP_fast_assign(
-			   __entry->soffset = bo_va->soffset;
-			   __entry->eoffset = bo_va->eoffset;
+			   __entry->soffset = bo_va->it.start;
+			   __entry->eoffset = bo_va->it.last + 1;
 			   __entry->flags = bo_va->flags;
 			   __entry->flags = bo_va->flags;
 			   ),
 			   ),
 	    TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
 	    TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",

+ 44 - 53
drivers/gpu/drm/radeon/radeon_vm.c

@@ -326,17 +326,15 @@ struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
 	}
 	}
 	bo_va->vm = vm;
 	bo_va->vm = vm;
 	bo_va->bo = bo;
 	bo_va->bo = bo;
-	bo_va->soffset = 0;
-	bo_va->eoffset = 0;
+	bo_va->it.start = 0;
+	bo_va->it.last = 0;
 	bo_va->flags = 0;
 	bo_va->flags = 0;
 	bo_va->addr = 0;
 	bo_va->addr = 0;
 	bo_va->ref_count = 1;
 	bo_va->ref_count = 1;
 	INIT_LIST_HEAD(&bo_va->bo_list);
 	INIT_LIST_HEAD(&bo_va->bo_list);
-	INIT_LIST_HEAD(&bo_va->vm_list);
 	INIT_LIST_HEAD(&bo_va->vm_status);
 	INIT_LIST_HEAD(&bo_va->vm_status);
 
 
 	mutex_lock(&vm->mutex);
 	mutex_lock(&vm->mutex);
-	list_add(&bo_va->vm_list, &vm->va);
 	list_add_tail(&bo_va->bo_list, &bo->va);
 	list_add_tail(&bo_va->bo_list, &bo->va);
 	mutex_unlock(&vm->mutex);
 	mutex_unlock(&vm->mutex);
 
 
@@ -420,11 +418,9 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 			  uint32_t flags)
 			  uint32_t flags)
 {
 {
 	uint64_t size = radeon_bo_size(bo_va->bo);
 	uint64_t size = radeon_bo_size(bo_va->bo);
-	uint64_t eoffset, last_offset = 0;
 	struct radeon_vm *vm = bo_va->vm;
 	struct radeon_vm *vm = bo_va->vm;
-	struct radeon_bo_va *tmp;
-	struct list_head *head;
 	unsigned last_pfn, pt_idx;
 	unsigned last_pfn, pt_idx;
+	uint64_t eoffset;
 	int r;
 	int r;
 
 
 	if (soffset) {
 	if (soffset) {
@@ -446,51 +442,48 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev,
 	}
 	}
 
 
 	mutex_lock(&vm->mutex);
 	mutex_lock(&vm->mutex);
-	head = &vm->va;
-	last_offset = 0;
-	list_for_each_entry(tmp, &vm->va, vm_list) {
-		if (bo_va == tmp) {
-			/* skip over currently modified bo */
-			continue;
+	if (bo_va->it.start || bo_va->it.last) {
+		if (bo_va->addr) {
+			/* add a clone of the bo_va to clear the old address */
+			struct radeon_bo_va *tmp;
+			tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
+			tmp->it.start = bo_va->it.start;
+			tmp->it.last = bo_va->it.last;
+			tmp->vm = vm;
+			tmp->addr = bo_va->addr;
+			list_add(&tmp->vm_status, &vm->freed);
 		}
 		}
 
 
-		if (soffset >= last_offset && eoffset <= tmp->soffset) {
-			/* bo can be added before this one */
-			break;
-		}
-		if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
-			/* bo and tmp overlap, invalid offset */
-			dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
-				bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
-				(unsigned)tmp->soffset, (unsigned)tmp->eoffset);
-			mutex_unlock(&vm->mutex);
-			return -EINVAL;
-		}
-		last_offset = tmp->eoffset;
-		head = &tmp->vm_list;
+		interval_tree_remove(&bo_va->it, &vm->va);
+		bo_va->it.start = 0;
+		bo_va->it.last = 0;
 	}
 	}
 
 
-	if (bo_va->soffset) {
-		/* add a clone of the bo_va to clear the old address */
-		tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
-		if (!tmp) {
+	soffset /= RADEON_GPU_PAGE_SIZE;
+	eoffset /= RADEON_GPU_PAGE_SIZE;
+	if (soffset || eoffset) {
+		struct interval_tree_node *it;
+		it = interval_tree_iter_first(&vm->va, soffset, eoffset - 1);
+		if (it) {
+			struct radeon_bo_va *tmp;
+			tmp = container_of(it, struct radeon_bo_va, it);
+			/* bo and tmp overlap, invalid offset */
+			dev_err(rdev->dev, "bo %p va 0x%010Lx conflict with "
+				"(bo %p 0x%010lx 0x%010lx)\n", bo_va->bo,
+				soffset, tmp->bo, tmp->it.start, tmp->it.last);
 			mutex_unlock(&vm->mutex);
 			mutex_unlock(&vm->mutex);
-			return -ENOMEM;
+			return -EINVAL;
 		}
 		}
-		tmp->soffset = bo_va->soffset;
-		tmp->eoffset = bo_va->eoffset;
-		tmp->vm = vm;
-		list_add(&tmp->vm_status, &vm->freed);
+		bo_va->it.start = soffset;
+		bo_va->it.last = eoffset - 1;
+		interval_tree_insert(&bo_va->it, &vm->va);
 	}
 	}
 
 
-	bo_va->soffset = soffset;
-	bo_va->eoffset = eoffset;
 	bo_va->flags = flags;
 	bo_va->flags = flags;
 	bo_va->addr = 0;
 	bo_va->addr = 0;
-	list_move(&bo_va->vm_list, head);
 
 
-	soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
-	eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> radeon_vm_block_size;
+	soffset >>= radeon_vm_block_size;
+	eoffset >>= radeon_vm_block_size;
 
 
 	BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
 	BUG_ON(eoffset >= radeon_vm_num_pdes(rdev));
 
 
@@ -778,9 +771,6 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
 	unsigned count = 0;
 	unsigned count = 0;
 	uint64_t addr;
 	uint64_t addr;
 
 
-	start = start / RADEON_GPU_PAGE_SIZE;
-	end = end / RADEON_GPU_PAGE_SIZE;
-
 	/* walk over the address space and update the page tables */
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; ) {
 	for (addr = start; addr < end; ) {
 		uint64_t pt_idx = addr >> radeon_vm_block_size;
 		uint64_t pt_idx = addr >> radeon_vm_block_size;
@@ -847,7 +837,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 	uint64_t addr;
 	uint64_t addr;
 	int r;
 	int r;
 
 
-	if (!bo_va->soffset) {
+	if (!bo_va->it.start) {
 		dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
 		dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
 			bo_va->bo, vm);
 			bo_va->bo, vm);
 		return -EINVAL;
 		return -EINVAL;
@@ -881,7 +871,7 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 
 
 	trace_radeon_vm_bo_update(bo_va);
 	trace_radeon_vm_bo_update(bo_va);
 
 
-	nptes = (bo_va->eoffset - bo_va->soffset) / RADEON_GPU_PAGE_SIZE;
+	nptes = bo_va->it.last - bo_va->it.start + 1;
 
 
 	/* padding, etc. */
 	/* padding, etc. */
 	ndw = 64;
 	ndw = 64;
@@ -906,8 +896,9 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
 		return r;
 		return r;
 	ib.length_dw = 0;
 	ib.length_dw = 0;
 
 
-	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
-			      addr, radeon_vm_page_flags(bo_va->flags));
+	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->it.start,
+			      bo_va->it.last + 1, addr,
+			      radeon_vm_page_flags(bo_va->flags));
 
 
 	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
 	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
 	r = radeon_ib_schedule(rdev, &ib, NULL);
 	r = radeon_ib_schedule(rdev, &ib, NULL);
@@ -993,7 +984,7 @@ void radeon_vm_bo_rmv(struct radeon_device *rdev,
 	list_del(&bo_va->bo_list);
 	list_del(&bo_va->bo_list);
 
 
 	mutex_lock(&vm->mutex);
 	mutex_lock(&vm->mutex);
-	list_del(&bo_va->vm_list);
+	interval_tree_remove(&bo_va->it, &vm->va);
 	list_del(&bo_va->vm_status);
 	list_del(&bo_va->vm_status);
 
 
 	if (bo_va->addr) {
 	if (bo_va->addr) {
@@ -1051,7 +1042,7 @@ int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
 	vm->last_flush = NULL;
 	vm->last_flush = NULL;
 	vm->last_id_use = NULL;
 	vm->last_id_use = NULL;
 	mutex_init(&vm->mutex);
 	mutex_init(&vm->mutex);
-	INIT_LIST_HEAD(&vm->va);
+	vm->va = RB_ROOT;
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->invalidated);
 	INIT_LIST_HEAD(&vm->freed);
 	INIT_LIST_HEAD(&vm->freed);
 
 
@@ -1096,11 +1087,11 @@ void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
 	struct radeon_bo_va *bo_va, *tmp;
 	struct radeon_bo_va *bo_va, *tmp;
 	int i, r;
 	int i, r;
 
 
-	if (!list_empty(&vm->va)) {
+	if (!RB_EMPTY_ROOT(&vm->va)) {
 		dev_err(rdev->dev, "still active bo inside vm\n");
 		dev_err(rdev->dev, "still active bo inside vm\n");
 	}
 	}
-	list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
-		list_del_init(&bo_va->vm_list);
+	rbtree_postorder_for_each_entry_safe(bo_va, tmp, &vm->va, it.rb) {
+		interval_tree_remove(&bo_va->it, &vm->va);
 		r = radeon_bo_reserve(bo_va->bo, false);
 		r = radeon_bo_reserve(bo_va->bo, false);
 		if (!r) {
 		if (!r) {
 			list_del_init(&bo_va->bo_list);
 			list_del_init(&bo_va->bo_list);