|
@@ -657,6 +657,84 @@ int radeon_vm_update_page_directory(struct radeon_device *rdev,
|
|
|
return 0;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * radeon_vm_frag_ptes - add fragment information to PTEs
|
|
|
+ *
|
|
|
+ * @rdev: radeon_device pointer
|
|
|
+ * @ib: IB for the update
|
|
|
+ * @pe_start: first PTE to handle
|
|
|
+ * @pe_end: last PTE to handle
|
|
|
+ * @addr: addr those PTEs should point to
|
|
|
+ * @flags: hw mapping flags
|
|
|
+ *
|
|
|
+ * Global and local mutex must be locked!
|
|
|
+ */
|
|
|
+static void radeon_vm_frag_ptes(struct radeon_device *rdev,
|
|
|
+ struct radeon_ib *ib,
|
|
|
+ uint64_t pe_start, uint64_t pe_end,
|
|
|
+ uint64_t addr, uint32_t flags)
|
|
|
+{
|
|
|
+ /**
|
|
|
+ * The MC L1 TLB supports variable sized pages, based on a fragment
|
|
|
+ * field in the PTE. When this field is set to a non-zero value, page
|
|
|
+ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
|
|
|
+ * flags are considered valid for all PTEs within the fragment range
|
|
|
+ * and corresponding mappings are assumed to be physically contiguous.
|
|
|
+ *
|
|
|
+ * The L1 TLB can store a single PTE for the whole fragment,
|
|
|
+ * significantly increasing the space available for translation
|
|
|
+ * caching. This leads to large improvements in throughput when the
|
|
|
+ * TLB is under pressure.
|
|
|
+ *
|
|
|
+ * The L2 TLB distributes small and large fragments into two
|
|
|
+ * asymmetric partitions. The large fragment cache is significantly
|
|
|
+ * larger. Thus, we try to use large fragments wherever possible.
|
|
|
+ * Userspace can support this by aligning virtual base address and
|
|
|
+ * allocation size to the fragment size.
|
|
|
+ */
|
|
|
+
|
|
|
+ /* NI is optimized for 256KB fragments, SI and newer for 64KB */
|
|
|
+ uint64_t frag_flags = rdev->family == CHIP_CAYMAN ?
|
|
|
+ R600_PTE_FRAG_256KB : R600_PTE_FRAG_64KB;
|
|
|
+ uint64_t frag_align = rdev->family == CHIP_CAYMAN ? 0x200 : 0x80;
|
|
|
+
|
|
|
+ uint64_t frag_start = ALIGN(pe_start, frag_align);
|
|
|
+ uint64_t frag_end = pe_end & ~(frag_align - 1);
|
|
|
+
|
|
|
+ unsigned count;
|
|
|
+
|
|
|
+ /* system pages are non continuously */
|
|
|
+ if ((flags & R600_PTE_SYSTEM) || !(flags & R600_PTE_VALID) ||
|
|
|
+ (frag_start >= frag_end)) {
|
|
|
+
|
|
|
+ count = (pe_end - pe_start) / 8;
|
|
|
+ radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
|
|
|
+ RADEON_GPU_PAGE_SIZE, flags);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* handle the 4K area at the beginning */
|
|
|
+ if (pe_start != frag_start) {
|
|
|
+ count = (frag_start - pe_start) / 8;
|
|
|
+ radeon_asic_vm_set_page(rdev, ib, pe_start, addr, count,
|
|
|
+ RADEON_GPU_PAGE_SIZE, flags);
|
|
|
+ addr += RADEON_GPU_PAGE_SIZE * count;
|
|
|
+ }
|
|
|
+
|
|
|
+ /* handle the area in the middle */
|
|
|
+ count = (frag_end - frag_start) / 8;
|
|
|
+ radeon_asic_vm_set_page(rdev, ib, frag_start, addr, count,
|
|
|
+ RADEON_GPU_PAGE_SIZE, flags | frag_flags);
|
|
|
+
|
|
|
+ /* handle the 4K area at the end */
|
|
|
+ if (frag_end != pe_end) {
|
|
|
+ addr += RADEON_GPU_PAGE_SIZE * count;
|
|
|
+ count = (pe_end - frag_end) / 8;
|
|
|
+ radeon_asic_vm_set_page(rdev, ib, frag_end, addr, count,
|
|
|
+ RADEON_GPU_PAGE_SIZE, flags);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* radeon_vm_update_ptes - make sure that page tables are valid
|
|
|
*
|
|
@@ -703,10 +781,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
|
|
|
if ((last_pte + 8 * count) != pte) {
|
|
|
|
|
|
if (count) {
|
|
|
- radeon_asic_vm_set_page(rdev, ib, last_pte,
|
|
|
- last_dst, count,
|
|
|
- RADEON_GPU_PAGE_SIZE,
|
|
|
- flags);
|
|
|
+ radeon_vm_frag_ptes(rdev, ib, last_pte,
|
|
|
+ last_pte + 8 * count,
|
|
|
+ last_dst, flags);
|
|
|
}
|
|
|
|
|
|
count = nptes;
|
|
@@ -721,9 +798,9 @@ static void radeon_vm_update_ptes(struct radeon_device *rdev,
|
|
|
}
|
|
|
|
|
|
if (count) {
|
|
|
- radeon_asic_vm_set_page(rdev, ib, last_pte,
|
|
|
- last_dst, count,
|
|
|
- RADEON_GPU_PAGE_SIZE, flags);
|
|
|
+ radeon_vm_frag_ptes(rdev, ib, last_pte,
|
|
|
+ last_pte + 8 * count,
|
|
|
+ last_dst, flags);
|
|
|
}
|
|
|
}
|
|
|
|