9 лет назад · 92696dd52e
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -701,83 +701,6 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 
				 	return r;
			
 
				 }
			
 
				 
			
 
				-/**
			
 
				- * amdgpu_vm_frag_ptes - add fragment information to PTEs
			
 
				- *
			
 
				- * @params: see amdgpu_pte_update_params definition
			
 
				- * @pe_start: first PTE to handle
			
 
				- * @pe_end: last PTE to handle
			
 
				- * @addr: addr those PTEs should point to
			
 
				- * @flags: hw mapping flags
			
 
				- */
			
 
				-static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
			
 
				-				uint64_t pe_start, uint64_t pe_end,
			
 
				-				uint64_t addr, uint32_t flags)
			
 
				-{
			
 
				-	/**
			
 
				-	 * The MC L1 TLB supports variable sized pages, based on a fragment
			
 
				-	 * field in the PTE. When this field is set to a non-zero value, page
			
 
				-	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
			
 
				-	 * flags are considered valid for all PTEs within the fragment range
			
 
				-	 * and corresponding mappings are assumed to be physically contiguous.
			
 
				-	 *
			
 
				-	 * The L1 TLB can store a single PTE for the whole fragment,
			
 
				-	 * significantly increasing the space available for translation
			
 
				-	 * caching. This leads to large improvements in throughput when the
			
 
				-	 * TLB is under pressure.
			
 
				-	 *
			
 
				-	 * The L2 TLB distributes small and large fragments into two
			
 
				-	 * asymmetric partitions. The large fragment cache is significantly
			
 
				-	 * larger. Thus, we try to use large fragments wherever possible.
			
 
				-	 * Userspace can support this by aligning virtual base address and
			
 
				-	 * allocation size to the fragment size.
			
 
				-	 */
			
 
				-
			
 
				-	/* SI and newer are optimized for 64KB */
			
 
				-	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
			
 
				-	uint64_t frag_align = 0x80;
			
 
				-
			
 
				-	uint64_t frag_start = ALIGN(pe_start, frag_align);
			
 
				-	uint64_t frag_end = pe_end & ~(frag_align - 1);
			
 
				-
			
 
				-	unsigned count;
			
 
				-
			
 
				-	/* Abort early if there isn't anything to do */
			
 
				-	if (pe_start == pe_end)
			
 
				-		return;
			
 
				-
			
 
				-	/* system pages are non continuously */
			
 
				-	if (params->src || params->pages_addr ||
			
 
				-		!(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
			
 
				-
			
 
				-		count = (pe_end - pe_start) / 8;
			
 
				-		amdgpu_vm_update_pages(params, pe_start, addr, count,
			
 
				-				       AMDGPU_GPU_PAGE_SIZE, flags);
			
 
				-		return;
			
 
				-	}
			
 
				-
			
 
				-	/* handle the 4K area at the beginning */
			
 
				-	if (pe_start != frag_start) {
			
 
				-		count = (frag_start - pe_start) / 8;
			
 
				-		amdgpu_vm_update_pages(params, pe_start, addr, count,
			
 
				-				       AMDGPU_GPU_PAGE_SIZE, flags);
			
 
				-		addr += AMDGPU_GPU_PAGE_SIZE * count;
			
 
				-	}
			
 
				-
			
 
				-	/* handle the area in the middle */
			
 
				-	count = (frag_end - frag_start) / 8;
			
 
				-	amdgpu_vm_update_pages(params, frag_start, addr, count,
			
 
				-			       AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
			
 
				-
			
 
				-	/* handle the 4K area at the end */
			
 
				-	if (frag_end != pe_end) {
			
 
				-		addr += AMDGPU_GPU_PAGE_SIZE * count;
			
 
				-		count = (pe_end - frag_end) / 8;
			
 
				-		amdgpu_vm_update_pages(params, frag_end, addr, count,
			
 
				-				       AMDGPU_GPU_PAGE_SIZE, flags);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				 /**
			
 
				  * amdgpu_vm_update_ptes - make sure that page tables are valid
			
 
				  *
			
@@ -797,7 +720,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 
				 {
			
 
				 	const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
			
 
				 
			
 
				-	uint64_t cur_pe_start, cur_pe_end, cur_dst;
			
 
				+	uint64_t cur_pe_start, cur_nptes, cur_dst;
			
 
				 	uint64_t addr; /* next GPU address to be updated */
			
 
				 	uint64_t pt_idx;
			
 
				 	struct amdgpu_bo *pt;
			
@@ -816,7 +739,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 
				 
			
 
				 	cur_pe_start = amdgpu_bo_gpu_offset(pt);
			
 
				 	cur_pe_start += (addr & mask) * 8;
			
 
				-	cur_pe_end = cur_pe_start + 8 * nptes;
			
 
				+	cur_nptes = nptes;
			
 
				 	cur_dst = dst;
			
 
				 
			
 
				 	/* for next ptb*/
			
@@ -836,18 +759,19 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 
				 		next_pe_start = amdgpu_bo_gpu_offset(pt);
			
 
				 		next_pe_start += (addr & mask) * 8;
			
 
				 
			
 
				-		if (cur_pe_end == next_pe_start) {
			
 
				+		if ((cur_pe_start + 8 * cur_nptes) == next_pe_start) {
			
 
				 			/* The next ptb is consecutive to current ptb.
			
 
				-			 * Don't call amdgpu_vm_frag_ptes now.
			
 
				+			 * Don't call amdgpu_vm_update_pages now.
			
 
				 			 * Will update two ptbs together in future.
			
 
				 			*/
			
 
				-			cur_pe_end += 8 * nptes;
			
 
				+			cur_nptes += nptes;
			
 
				 		} else {
			
 
				-			amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end,
			
 
				-					    cur_dst, flags);
			
 
				+			amdgpu_vm_update_pages(params, cur_pe_start, cur_dst,
			
 
				+					       cur_nptes, AMDGPU_GPU_PAGE_SIZE,
			
 
				+					       flags);
			
 
				 
			
 
				 			cur_pe_start = next_pe_start;
			
 
				-			cur_pe_end = next_pe_start + 8 * nptes;
			
 
				+			cur_nptes = nptes;
			
 
				 			cur_dst = dst;
			
 
				 		}
			
 
				 
			
@@ -856,7 +780,75 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 
				 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
			
 
				 	}
			
 
				 
			
 
				-	amdgpu_vm_frag_ptes(params, cur_pe_start, cur_pe_end, cur_dst, flags);
			
 
				+	amdgpu_vm_update_pages(params, cur_pe_start, cur_dst, cur_nptes,
			
 
				+			       AMDGPU_GPU_PAGE_SIZE, flags);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * amdgpu_vm_frag_ptes - add fragment information to PTEs
			
 
				+ *
			
 
				+ * @params: see amdgpu_pte_update_params definition
			
 
				+ * @vm: requested vm
			
 
				+ * @start: first PTE to handle
			
 
				+ * @end: last PTE to handle
			
 
				+ * @dst: addr those PTEs should point to
			
 
				+ * @flags: hw mapping flags
			
 
				+ */
			
 
				+static void amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
			
 
				+				struct amdgpu_vm *vm,
			
 
				+				uint64_t start, uint64_t end,
			
 
				+				uint64_t dst, uint32_t flags)
			
 
				+{
			
 
				+	/**
			
 
				+	 * The MC L1 TLB supports variable sized pages, based on a fragment
			
 
				+	 * field in the PTE. When this field is set to a non-zero value, page
			
 
				+	 * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
			
 
				+	 * flags are considered valid for all PTEs within the fragment range
			
 
				+	 * and corresponding mappings are assumed to be physically contiguous.
			
 
				+	 *
			
 
				+	 * The L1 TLB can store a single PTE for the whole fragment,
			
 
				+	 * significantly increasing the space available for translation
			
 
				+	 * caching. This leads to large improvements in throughput when the
			
 
				+	 * TLB is under pressure.
			
 
				+	 *
			
 
				+	 * The L2 TLB distributes small and large fragments into two
			
 
				+	 * asymmetric partitions. The large fragment cache is significantly
			
 
				+	 * larger. Thus, we try to use large fragments wherever possible.
			
 
				+	 * Userspace can support this by aligning virtual base address and
			
 
				+	 * allocation size to the fragment size.
			
 
				+	 */
			
 
				+
			
 
				+	/* SI and newer are optimized for 64KB */
			
 
				+	uint64_t frag_flags = AMDGPU_PTE_FRAG(AMDGPU_LOG2_PAGES_PER_FRAG);
			
 
				+	uint64_t frag_align = 1 << AMDGPU_LOG2_PAGES_PER_FRAG;
			
 
				+
			
 
				+	uint64_t frag_start = ALIGN(start, frag_align);
			
 
				+	uint64_t frag_end = end & ~(frag_align - 1);
			
 
				+
			
 
				+	/* system pages are non continuously */
			
 
				+	if (params->src || params->pages_addr || !(flags & AMDGPU_PTE_VALID) ||
			
 
				+	    (frag_start >= frag_end)) {
			
 
				+
			
 
				+		amdgpu_vm_update_ptes(params, vm, start, end, dst, flags);
			
 
				+		return;
			
 
				+	}
			
 
				+
			
 
				+	/* handle the 4K area at the beginning */
			
 
				+	if (start != frag_start) {
			
 
				+		amdgpu_vm_update_ptes(params, vm, start, frag_start,
			
 
				+				      dst, flags);
			
 
				+		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
			
 
				+	}
			
 
				+
			
 
				+	/* handle the area in the middle */
			
 
				+	amdgpu_vm_update_ptes(params, vm, frag_start, frag_end, dst,
			
 
				+			      flags | frag_flags);
			
 
				+
			
 
				+	/* handle the 4K area at the end */
			
 
				+	if (frag_end != end) {
			
 
				+		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
			
 
				+		amdgpu_vm_update_ptes(params, vm, frag_end, end, dst, flags);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 /**
			
@@ -953,7 +945,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
				 	if (r)
			
 
				 		goto error_free;
			
 
				 
			
 
				-	amdgpu_vm_update_ptes(&params, vm, start, last + 1, addr, flags);
			
 
				+	amdgpu_vm_frag_ptes(&params, vm, start, last + 1, addr, flags);
			
 
				 
			
 
				 	amdgpu_ring_pad_ib(ring, params.ib);
			
 
				 	WARN_ON(params.ib->length_dw > ndw);