8 years ago · 1eca5a530d
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -290,97 +290,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 
				 	return addr;
			
 
				 }
			
 
				 
			
 
				-static int amdgpu_move_blit(struct ttm_buffer_object *bo,
			
 
				-			    bool evict, bool no_wait_gpu,
			
 
				-			    struct ttm_mem_reg *new_mem,
			
 
				-			    struct ttm_mem_reg *old_mem)
			
 
				+/**
			
 
				+ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
			
 
				+ *
			
 
				+ * The function copies @size bytes from {src->mem + src->offset} to
			
 
				+ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
			
 
				+ * move and different for a BO to BO copy.
			
 
				+ *
			
 
				+ * @f: Returns the last fence if multiple jobs are submitted.
			
 
				+ */
			
 
				+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
			
 
				+			       struct amdgpu_copy_mem *src,
			
 
				+			       struct amdgpu_copy_mem *dst,
			
 
				+			       uint64_t size,
			
 
				+			       struct reservation_object *resv,
			
 
				+			       struct dma_fence **f)
			
 
				 {
			
 
				-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
			
 
				 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
			
 
				-
			
 
				-	struct drm_mm_node *old_mm, *new_mm;
			
 
				-	uint64_t old_start, old_size, new_start, new_size;
			
 
				-	unsigned long num_pages;
			
 
				+	struct drm_mm_node *src_mm, *dst_mm;
			
 
				+	uint64_t src_node_start, dst_node_start, src_node_size,
			
 
				+		 dst_node_size, src_page_offset, dst_page_offset;
			
 
				 	struct dma_fence *fence = NULL;
			
 
				-	int r;
			
 
				-
			
 
				-	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
			
 
				+	int r = 0;
			
 
				+	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
			
 
				+					AMDGPU_GPU_PAGE_SIZE);
			
 
				 
			
 
				 	if (!ring->ready) {
			
 
				 		DRM_ERROR("Trying to move memory with ring turned off.\n");
			
 
				 		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				-	old_mm = old_mem->mm_node;
			
 
				-	old_size = old_mm->size;
			
 
				-	old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
			
 
				+	src_mm = src->mem->mm_node;
			
 
				+	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
			
 
				+		src->offset -= (src_mm->size << PAGE_SHIFT);
			
 
				+		++src_mm;
			
 
				+	}
			
 
				+	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
			
 
				+					     src->offset;
			
 
				+	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
			
 
				+	src_page_offset = src_node_start & (PAGE_SIZE - 1);
			
 
				 
			
 
				-	new_mm = new_mem->mm_node;
			
 
				-	new_size = new_mm->size;
			
 
				-	new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
			
 
				+	dst_mm = dst->mem->mm_node;
			
 
				+	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
			
 
				+		dst->offset -= (dst_mm->size << PAGE_SHIFT);
			
 
				+		++dst_mm;
			
 
				+	}
			
 
				+	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
			
 
				+					     dst->offset;
			
 
				+	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
			
 
				+	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
			
 
				 
			
 
				-	num_pages = new_mem->num_pages;
			
 
				 	mutex_lock(&adev->mman.gtt_window_lock);
			
 
				-	while (num_pages) {
			
 
				-		unsigned long cur_pages = min(min(old_size, new_size),
			
 
				-					      (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
			
 
				-		uint64_t from = old_start, to = new_start;
			
 
				+
			
 
				+	while (size) {
			
 
				+		unsigned long cur_size;
			
 
				+		uint64_t from = src_node_start, to = dst_node_start;
			
 
				 		struct dma_fence *next;
			
 
				 
			
 
				-		if (old_mem->mem_type == TTM_PL_TT &&
			
 
				-		    !amdgpu_gtt_mgr_is_allocated(old_mem)) {
			
 
				-			r = amdgpu_map_buffer(bo, old_mem, cur_pages,
			
 
				-					      old_start, 0, ring, &from);
			
 
				+		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
			
 
				+		 * begins at an offset, then adjust the size accordingly
			
 
				+		 */
			
 
				+		cur_size = min3(min(src_node_size, dst_node_size), size,
			
 
				+				GTT_MAX_BYTES);
			
 
				+		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
			
 
				+		    cur_size + dst_page_offset > GTT_MAX_BYTES)
			
 
				+			cur_size -= max(src_page_offset, dst_page_offset);
			
 
				+
			
 
				+		/* Map only what needs to be accessed. Map src to window 0 and
			
 
				+		 * dst to window 1
			
 
				+		 */
			
 
				+		if (src->mem->mem_type == TTM_PL_TT &&
			
 
				+		    !amdgpu_gtt_mgr_is_allocated(src->mem)) {
			
 
				+			r = amdgpu_map_buffer(src->bo, src->mem,
			
 
				+					PFN_UP(cur_size + src_page_offset),
			
 
				+					src_node_start, 0, ring,
			
 
				+					&from);
			
 
				 			if (r)
			
 
				 				goto error;
			
 
				+			/* Adjust the offset because amdgpu_map_buffer returns
			
 
				+			 * start of mapped page
			
 
				+			 */
			
 
				+			from += src_page_offset;
			
 
				 		}
			
 
				 
			
 
				-		if (new_mem->mem_type == TTM_PL_TT &&
			
 
				-		    !amdgpu_gtt_mgr_is_allocated(new_mem)) {
			
 
				-			r = amdgpu_map_buffer(bo, new_mem, cur_pages,
			
 
				-					      new_start, 1, ring, &to);
			
 
				+		if (dst->mem->mem_type == TTM_PL_TT &&
			
 
				+		    !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
			
 
				+			r = amdgpu_map_buffer(dst->bo, dst->mem,
			
 
				+					PFN_UP(cur_size + dst_page_offset),
			
 
				+					dst_node_start, 1, ring,
			
 
				+					&to);
			
 
				 			if (r)
			
 
				 				goto error;
			
 
				+			to += dst_page_offset;
			
 
				 		}
			
 
				 
			
 
				-		r = amdgpu_copy_buffer(ring, from, to,
			
 
				-				       cur_pages * PAGE_SIZE,
			
 
				-				       bo->resv, &next, false, true);
			
 
				+		r = amdgpu_copy_buffer(ring, from, to, cur_size,
			
 
				+				       resv, &next, false, true);
			
 
				 		if (r)
			
 
				 			goto error;
			
 
				 
			
 
				 		dma_fence_put(fence);
			
 
				 		fence = next;
			
 
				 
			
 
				-		num_pages -= cur_pages;
			
 
				-		if (!num_pages)
			
 
				+		size -= cur_size;
			
 
				+		if (!size)
			
 
				 			break;
			
 
				 
			
 
				-		old_size -= cur_pages;
			
 
				-		if (!old_size) {
			
 
				-			old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
			
 
				-			old_size = old_mm->size;
			
 
				+		src_node_size -= cur_size;
			
 
				+		if (!src_node_size) {
			
 
				+			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
			
 
				+							     src->mem);
			
 
				+			src_node_size = (src_mm->size << PAGE_SHIFT);
			
 
				 		} else {
			
 
				-			old_start += cur_pages * PAGE_SIZE;
			
 
				+			src_node_start += cur_size;
			
 
				+			src_page_offset = src_node_start & (PAGE_SIZE - 1);
			
 
				 		}
			
 
				-
			
 
				-		new_size -= cur_pages;
			
 
				-		if (!new_size) {
			
 
				-			new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
			
 
				-			new_size = new_mm->size;
			
 
				+		dst_node_size -= cur_size;
			
 
				+		if (!dst_node_size) {
			
 
				+			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
			
 
				+							     dst->mem);
			
 
				+			dst_node_size = (dst_mm->size << PAGE_SHIFT);
			
 
				 		} else {
			
 
				-			new_start += cur_pages * PAGE_SIZE;
			
 
				+			dst_node_start += cur_size;
			
 
				+			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
			
 
				 		}
			
 
				 	}
			
 
				+error:
			
 
				 	mutex_unlock(&adev->mman.gtt_window_lock);
			
 
				+	if (f)
			
 
				+		*f = dma_fence_get(fence);
			
 
				+	dma_fence_put(fence);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
			
 
				+			    bool evict, bool no_wait_gpu,
			
 
				+			    struct ttm_mem_reg *new_mem,
			
 
				+			    struct ttm_mem_reg *old_mem)
			
 
				+{
			
 
				+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
			
 
				+	struct amdgpu_copy_mem src, dst;
			
 
				+	struct dma_fence *fence = NULL;
			
 
				+	int r;
			
 
				+
			
 
				+	src.bo = bo;
			
 
				+	dst.bo = bo;
			
 
				+	src.mem = old_mem;
			
 
				+	dst.mem = new_mem;
			
 
				+	src.offset = 0;
			
 
				+	dst.offset = 0;
			
 
				+
			
 
				+	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
			
 
				+				       new_mem->num_pages << PAGE_SHIFT,
			
 
				+				       bo->resv, &fence);
			
 
				+	if (r)
			
 
				+		goto error;
			
 
				 
			
 
				 	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
			
 
				 	dma_fence_put(fence);
			
 
				 	return r;
			
 
				 
			
 
				 error:
			
 
				-	mutex_unlock(&adev->mman.gtt_window_lock);
			
 
				-
			
 
				 	if (fence)
			
 
				 		dma_fence_wait(fence, false);
			
 
				 	dma_fence_put(fence);
			
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -58,6 +58,12 @@ struct amdgpu_mman {
 
				 	struct amd_sched_entity			entity;
			
 
				 };
			
 
				 
			
 
				+struct amdgpu_copy_mem {
			
 
				+	struct ttm_buffer_object	*bo;
			
 
				+	struct ttm_mem_reg		*mem;
			
 
				+	unsigned long			offset;
			
 
				+};
			
 
				+
			
 
				 extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
			
 
				 extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
			
 
				 
			
@@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 
				 		       struct reservation_object *resv,
			
 
				 		       struct dma_fence **fence, bool direct_submit,
			
 
				 		       bool vm_needs_flush);
			
 
				+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
			
 
				+			       struct amdgpu_copy_mem *src,
			
 
				+			       struct amdgpu_copy_mem *dst,
			
 
				+			       uint64_t size,
			
 
				+			       struct reservation_object *resv,
			
 
				+			       struct dma_fence **f);
			
 
				 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
			
 
				 			uint64_t src_data,
			
 
				 			struct reservation_object *resv,