|
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
|
|
|
* ticks. The accumulated microseconds (us) are converted to bytes and
|
|
|
* returned.
|
|
|
*/
|
|
|
-static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
|
|
|
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
|
|
|
+ u64 *max_bytes,
|
|
|
+ u64 *max_vis_bytes)
|
|
|
{
|
|
|
s64 time_us, increment_us;
|
|
|
- u64 max_bytes;
|
|
|
u64 free_vram, total_vram, used_vram;
|
|
|
|
|
|
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
|
|
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
|
|
|
*/
|
|
|
const s64 us_upper_bound = 200000;
|
|
|
|
|
|
- if (!adev->mm_stats.log2_max_MBps)
|
|
|
- return 0;
|
|
|
+ if (!adev->mm_stats.log2_max_MBps) {
|
|
|
+ *max_bytes = 0;
|
|
|
+ *max_vis_bytes = 0;
|
|
|
+ return;
|
|
|
+ }
|
|
|
|
|
|
total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
|
|
|
used_vram = atomic64_read(&adev->vram_usage);
|
|
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
|
|
|
adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
|
|
|
}
|
|
|
|
|
|
- /* This returns 0 if the driver is in debt to disallow (optional)
|
|
|
+ /* This is set to 0 if the driver is in debt to disallow (optional)
|
|
|
* buffer moves.
|
|
|
*/
|
|
|
- max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
|
|
|
+ *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
|
|
|
+
|
|
|
+ /* Do the same for visible VRAM if half of it is free */
|
|
|
+ if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
|
|
|
+ u64 total_vis_vram = adev->mc.visible_vram_size;
|
|
|
+ u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
|
|
|
+
|
|
|
+ if (used_vis_vram < total_vis_vram) {
|
|
|
+ u64 free_vis_vram = total_vis_vram - used_vis_vram;
|
|
|
+ adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
|
|
|
+ increment_us, us_upper_bound);
|
|
|
+
|
|
|
+ if (free_vis_vram >= total_vis_vram / 2)
|
|
|
+ adev->mm_stats.accum_us_vis =
|
|
|
+ max(bytes_to_us(adev, free_vis_vram / 2),
|
|
|
+ adev->mm_stats.accum_us_vis);
|
|
|
+ }
|
|
|
+
|
|
|
+ *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
|
|
|
+ } else {
|
|
|
+ *max_vis_bytes = 0;
|
|
|
+ }
|
|
|
|
|
|
spin_unlock(&adev->mm_stats.lock);
|
|
|
- return max_bytes;
|
|
|
}
|
|
|
|
|
|
/* Report how many bytes have really been moved for the last command
|
|
|
* submission. This can result in a debt that can stop buffer migrations
|
|
|
* temporarily.
|
|
|
*/
|
|
|
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
|
|
|
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
|
|
|
+ u64 num_vis_bytes)
|
|
|
{
|
|
|
spin_lock(&adev->mm_stats.lock);
|
|
|
adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
|
|
|
+ adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
|
|
|
spin_unlock(&adev->mm_stats.lock);
|
|
|
}
|
|
|
|
|
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
|
|
|
struct amdgpu_bo *bo)
|
|
|
{
|
|
|
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
|
|
- u64 initial_bytes_moved;
|
|
|
+ u64 initial_bytes_moved, bytes_moved;
|
|
|
uint32_t domain;
|
|
|
int r;
|
|
|
|
|
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
|
|
|
/* Don't move this buffer if we have depleted our allowance
|
|
|
* to move it. Don't move anything if the threshold is zero.
|
|
|
*/
|
|
|
- if (p->bytes_moved < p->bytes_moved_threshold)
|
|
|
- domain = bo->prefered_domains;
|
|
|
- else
|
|
|
+ if (p->bytes_moved < p->bytes_moved_threshold) {
|
|
|
+ if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
|
|
|
+ (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
|
|
|
+ /* And don't move a CPU_ACCESS_REQUIRED BO to limited
|
|
|
+ * visible VRAM if we've depleted our allowance to do
|
|
|
+ * that.
|
|
|
+ */
|
|
|
+ if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
|
|
|
+ domain = bo->prefered_domains;
|
|
|
+ else
|
|
|
+ domain = bo->allowed_domains;
|
|
|
+ } else {
|
|
|
+ domain = bo->prefered_domains;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
domain = bo->allowed_domains;
|
|
|
+ }
|
|
|
|
|
|
retry:
|
|
|
amdgpu_ttm_placement_from_domain(bo, domain);
|
|
|
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
|
|
|
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
|
|
|
- p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
|
|
|
- initial_bytes_moved;
|
|
|
+ bytes_moved = atomic64_read(&adev->num_bytes_moved) -
|
|
|
+ initial_bytes_moved;
|
|
|
+ p->bytes_moved += bytes_moved;
|
|
|
+ if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
|
|
|
+ bo->tbo.mem.mem_type == TTM_PL_VRAM &&
|
|
|
+ bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
|
|
|
+ p->bytes_moved_vis += bytes_moved;
|
|
|
|
|
|
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
|
|
|
domain = bo->allowed_domains;
|
|
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
|
|
|
struct amdgpu_bo_list_entry *candidate = p->evictable;
|
|
|
struct amdgpu_bo *bo = candidate->robj;
|
|
|
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
|
|
|
- u64 initial_bytes_moved;
|
|
|
+ u64 initial_bytes_moved, bytes_moved;
|
|
|
+ bool update_bytes_moved_vis;
|
|
|
uint32_t other;
|
|
|
|
|
|
/* If we reached our current BO we can forget it */
|
|
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
|
|
|
|
|
|
/* Good we can try to move this BO somewhere else */
|
|
|
amdgpu_ttm_placement_from_domain(bo, other);
|
|
|
+ update_bytes_moved_vis =
|
|
|
+ adev->mc.visible_vram_size < adev->mc.real_vram_size &&
|
|
|
+ bo->tbo.mem.mem_type == TTM_PL_VRAM &&
|
|
|
+ bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
|
|
|
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
|
|
|
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
|
|
|
- p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
|
|
|
+ bytes_moved = atomic64_read(&adev->num_bytes_moved) -
|
|
|
initial_bytes_moved;
|
|
|
+ p->bytes_moved += bytes_moved;
|
|
|
+ if (update_bytes_moved_vis)
|
|
|
+ p->bytes_moved_vis += bytes_moved;
|
|
|
|
|
|
if (unlikely(r))
|
|
|
break;
|
|
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|
|
list_splice(&need_pages, &p->validated);
|
|
|
}
|
|
|
|
|
|
- p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
|
|
|
+ amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
|
|
|
+ &p->bytes_moved_vis_threshold);
|
|
|
p->bytes_moved = 0;
|
|
|
+ p->bytes_moved_vis = 0;
|
|
|
p->evictable = list_last_entry(&p->validated,
|
|
|
struct amdgpu_bo_list_entry,
|
|
|
tv.head);
|
|
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
|
|
|
goto error_validate;
|
|
|
}
|
|
|
|
|
|
- amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
|
|
|
-
|
|
|
+ amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
|
|
|
+ p->bytes_moved_vis);
|
|
|
fpriv->vm.last_eviction_counter =
|
|
|
atomic64_read(&p->adev->num_evictions);
|
|
|
|