8 years ago · 00f06b246a
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1117,7 +1117,9 @@ struct amdgpu_cs_parser {
 
				 	struct list_head		validated;
			
 
				 	struct dma_fence		*fence;
			
 
				 	uint64_t			bytes_moved_threshold;
			
 
				+	uint64_t			bytes_moved_vis_threshold;
			
 
				 	uint64_t			bytes_moved;
			
 
				+	uint64_t			bytes_moved_vis;
			
 
				 	struct amdgpu_bo_list_entry	*evictable;
			
 
				 
			
 
				 	/* user fence */
			
@@ -1555,6 +1557,7 @@ struct amdgpu_device {
 
				 		spinlock_t		lock;
			
 
				 		s64			last_update_us;
			
 
				 		s64			accum_us; /* accumulated microseconds */
			
 
				+		s64			accum_us_vis; /* for visible VRAM */
			
 
				 		u32			log2_max_MBps;
			
 
				 	} mm_stats;
			
 
				 
			
@@ -1846,7 +1849,8 @@ void amdgpu_pci_config_reset(struct amdgpu_device *adev);
 
				 bool amdgpu_need_post(struct amdgpu_device *adev);
			
 
				 void amdgpu_update_display_priority(struct amdgpu_device *adev);
			
 
				 
			
 
				-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
			
 
				+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
			
 
				+				  u64 num_vis_bytes);
			
 
				 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
			
 
				 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
			
 
				 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
 
				  * ticks. The accumulated microseconds (us) are converted to bytes and
			
 
				  * returned.
			
 
				  */
			
 
				-static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
			
 
				+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
			
 
				+					      u64 *max_bytes,
			
 
				+					      u64 *max_vis_bytes)
			
 
				 {
			
 
				 	s64 time_us, increment_us;
			
 
				-	u64 max_bytes;
			
 
				 	u64 free_vram, total_vram, used_vram;
			
 
				 
			
 
				 	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
			
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
 
				 	 */
			
 
				 	const s64 us_upper_bound = 200000;
			
 
				 
			
 
				-	if (!adev->mm_stats.log2_max_MBps)
			
 
				-		return 0;
			
 
				+	if (!adev->mm_stats.log2_max_MBps) {
			
 
				+		*max_bytes = 0;
			
 
				+		*max_vis_bytes = 0;
			
 
				+		return;
			
 
				+	}
			
 
				 
			
 
				 	total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
			
 
				 	used_vram = atomic64_read(&adev->vram_usage);
			
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
 
				 		adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
			
 
				 	}
			
 
				 
			
 
				-	/* This returns 0 if the driver is in debt to disallow (optional)
			
 
				+	/* This is set to 0 if the driver is in debt to disallow (optional)
			
 
				 	 * buffer moves.
			
 
				 	 */
			
 
				-	max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
			
 
				+	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
			
 
				+
			
 
				+	/* Do the same for visible VRAM if half of it is free */
			
 
				+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
			
 
				+		u64 total_vis_vram = adev->mc.visible_vram_size;
			
 
				+		u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
			
 
				+
			
 
				+		if (used_vis_vram < total_vis_vram) {
			
 
				+			u64 free_vis_vram = total_vis_vram - used_vis_vram;
			
 
				+			adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
			
 
				+							  increment_us, us_upper_bound);
			
 
				+
			
 
				+			if (free_vis_vram >= total_vis_vram / 2)
			
 
				+				adev->mm_stats.accum_us_vis =
			
 
				+					max(bytes_to_us(adev, free_vis_vram / 2),
			
 
				+					    adev->mm_stats.accum_us_vis);
			
 
				+		}
			
 
				+
			
 
				+		*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
			
 
				+	} else {
			
 
				+		*max_vis_bytes = 0;
			
 
				+	}
			
 
				 
			
 
				 	spin_unlock(&adev->mm_stats.lock);
			
 
				-	return max_bytes;
			
 
				 }
			
 
				 
			
 
				 /* Report how many bytes have really been moved for the last command
			
 
				  * submission. This can result in a debt that can stop buffer migrations
			
 
				  * temporarily.
			
 
				  */
			
 
				-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
			
 
				+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
			
 
				+				  u64 num_vis_bytes)
			
 
				 {
			
 
				 	spin_lock(&adev->mm_stats.lock);
			
 
				 	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
			
 
				+	adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
			
 
				 	spin_unlock(&adev->mm_stats.lock);
			
 
				 }
			
 
				 
			
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 
				 				 struct amdgpu_bo *bo)
			
 
				 {
			
 
				 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
			
 
				-	u64 initial_bytes_moved;
			
 
				+	u64 initial_bytes_moved, bytes_moved;
			
 
				 	uint32_t domain;
			
 
				 	int r;
			
 
				 
			
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 
				 	/* Don't move this buffer if we have depleted our allowance
			
 
				 	 * to move it. Don't move anything if the threshold is zero.
			
 
				 	 */
			
 
				-	if (p->bytes_moved < p->bytes_moved_threshold)
			
 
				-		domain = bo->prefered_domains;
			
 
				-	else
			
 
				+	if (p->bytes_moved < p->bytes_moved_threshold) {
			
 
				+		if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
			
 
				+		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
			
 
				+			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
			
 
				+			 * visible VRAM if we've depleted our allowance to do
			
 
				+			 * that.
			
 
				+			 */
			
 
				+			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
			
 
				+				domain = bo->prefered_domains;
			
 
				+			else
			
 
				+				domain = bo->allowed_domains;
			
 
				+		} else {
			
 
				+			domain = bo->prefered_domains;
			
 
				+		}
			
 
				+	} else {
			
 
				 		domain = bo->allowed_domains;
			
 
				+	}
			
 
				 
			
 
				 retry:
			
 
				 	amdgpu_ttm_placement_from_domain(bo, domain);
			
 
				 	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
			
 
				 	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
			
 
				-	p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
			
 
				-		initial_bytes_moved;
			
 
				+	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
			
 
				+		      initial_bytes_moved;
			
 
				+	p->bytes_moved += bytes_moved;
			
 
				+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
			
 
				+	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
			
 
				+	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
			
 
				+		p->bytes_moved_vis += bytes_moved;
			
 
				 
			
 
				 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
			
 
				 		domain = bo->allowed_domains;
			
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 
				 		struct amdgpu_bo_list_entry *candidate = p->evictable;
			
 
				 		struct amdgpu_bo *bo = candidate->robj;
			
 
				 		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
			
 
				-		u64 initial_bytes_moved;
			
 
				+		u64 initial_bytes_moved, bytes_moved;
			
 
				+		bool update_bytes_moved_vis;
			
 
				 		uint32_t other;
			
 
				 
			
 
				 		/* If we reached our current BO we can forget it */
			
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 
				 
			
 
				 		/* Good we can try to move this BO somewhere else */
			
 
				 		amdgpu_ttm_placement_from_domain(bo, other);
			
 
				+		update_bytes_moved_vis =
			
 
				+			adev->mc.visible_vram_size < adev->mc.real_vram_size &&
			
 
				+			bo->tbo.mem.mem_type == TTM_PL_VRAM &&
			
 
				+			bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
			
 
				 		initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
			
 
				 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
			
 
				-		p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
			
 
				+		bytes_moved = atomic64_read(&adev->num_bytes_moved) -
			
 
				 			initial_bytes_moved;
			
 
				+		p->bytes_moved += bytes_moved;
			
 
				+		if (update_bytes_moved_vis)
			
 
				+			p->bytes_moved_vis += bytes_moved;
			
 
				 
			
 
				 		if (unlikely(r))
			
 
				 			break;
			
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
				 		list_splice(&need_pages, &p->validated);
			
 
				 	}
			
 
				 
			
 
				-	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
			
 
				+	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
			
 
				+					  &p->bytes_moved_vis_threshold);
			
 
				 	p->bytes_moved = 0;
			
 
				+	p->bytes_moved_vis = 0;
			
 
				 	p->evictable = list_last_entry(&p->validated,
			
 
				 				       struct amdgpu_bo_list_entry,
			
 
				 				       tv.head);
			
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
				 		goto error_validate;
			
 
				 	}
			
 
				 
			
 
				-	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
			
 
				-
			
 
				+	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
			
 
				+				     p->bytes_moved_vis);
			
 
				 	fpriv->vm.last_eviction_counter =
			
 
				 		atomic64_read(&p->adev->num_evictions);
			
 
				 
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -322,7 +322,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 
				 	struct amdgpu_bo *bo;
			
 
				 	enum ttm_bo_type type;
			
 
				 	unsigned long page_align;
			
 
				-	u64 initial_bytes_moved;
			
 
				+	u64 initial_bytes_moved, bytes_moved;
			
 
				 	size_t acc_size;
			
 
				 	int r;
			
 
				 
			
@@ -398,8 +398,14 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 
				 	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, type,
			
 
				 				 &bo->placement, page_align, !kernel, NULL,
			
 
				 				 acc_size, sg, resv, &amdgpu_ttm_bo_destroy);
			
 
				-	amdgpu_cs_report_moved_bytes(adev,
			
 
				-		atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
			
 
				+	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
			
 
				+		      initial_bytes_moved;
			
 
				+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
			
 
				+	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
			
 
				+	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
			
 
				+		amdgpu_cs_report_moved_bytes(adev, bytes_moved, bytes_moved);
			
 
				+	else
			
 
				+		amdgpu_cs_report_moved_bytes(adev, bytes_moved, 0);
			
 
				 
			
 
				 	if (unlikely(r != 0))
			
 
				 		return r;