|
@@ -133,48 +133,6 @@ struct amdgpu_prt_cb {
|
|
|
struct dma_fence_cb cb;
|
|
|
};
|
|
|
|
|
|
-/**
|
|
|
- * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
|
|
|
- *
|
|
|
- * @base: base structure for tracking BO usage in a VM
|
|
|
- * @vm: vm to which bo is to be added
|
|
|
- * @bo: amdgpu buffer object
|
|
|
- *
|
|
|
- * Initialize a bo_va_base structure and add it to the appropriate lists
|
|
|
- *
|
|
|
- */
|
|
|
-static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
|
|
|
- struct amdgpu_vm *vm,
|
|
|
- struct amdgpu_bo *bo)
|
|
|
-{
|
|
|
- base->vm = vm;
|
|
|
- base->bo = bo;
|
|
|
- INIT_LIST_HEAD(&base->bo_list);
|
|
|
- INIT_LIST_HEAD(&base->vm_status);
|
|
|
-
|
|
|
- if (!bo)
|
|
|
- return;
|
|
|
- list_add_tail(&base->bo_list, &bo->va);
|
|
|
-
|
|
|
- if (bo->tbo.type == ttm_bo_type_kernel)
|
|
|
- list_move(&base->vm_status, &vm->relocated);
|
|
|
-
|
|
|
- if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
|
|
|
- return;
|
|
|
-
|
|
|
- if (bo->preferred_domains &
|
|
|
- amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
|
|
|
- return;
|
|
|
-
|
|
|
- /*
|
|
|
- * we checked all the prerequisites, but it looks like this per vm bo
|
|
|
- * is currently evicted. add the bo to the evicted list to make sure it
|
|
|
- * is validated on next vm use to avoid fault.
|
|
|
- * */
|
|
|
- list_move_tail(&base->vm_status, &vm->evicted);
|
|
|
- base->moved = true;
|
|
|
-}
|
|
|
-
|
|
|
/**
|
|
|
* amdgpu_vm_level_shift - return the addr shift for each level
|
|
|
*
|
|
@@ -232,6 +190,26 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
|
|
|
return AMDGPU_VM_PTE_COUNT(adev);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device pointer
|
|
|
+ * @level: VMPT level
|
|
|
+ *
|
|
|
+ * Returns:
|
|
|
+ * The mask to extract the entry number of a PD/PT from an address.
|
|
|
+ */
|
|
|
+static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev,
|
|
|
+ unsigned int level)
|
|
|
+{
|
|
|
+ if (level <= adev->vm_manager.root_level)
|
|
|
+ return 0xffffffff;
|
|
|
+ else if (level != AMDGPU_VM_PTB)
|
|
|
+ return 0x1ff;
|
|
|
+ else
|
|
|
+ return AMDGPU_VM_PTE_COUNT(adev) - 1;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* amdgpu_vm_bo_size - returns the size of the BOs in bytes
|
|
|
*
|
|
@@ -246,6 +224,382 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
|
|
|
return AMDGPU_GPU_PAGE_ALIGN(amdgpu_vm_num_entries(adev, level) * 8);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * amdgpu_vm_bo_evicted - vm_bo is evicted
|
|
|
+ *
|
|
|
+ * @vm_bo: vm_bo which is evicted
|
|
|
+ *
|
|
|
+ * State for PDs/PTs and per VM BOs which are not at the location they should
|
|
|
+ * be.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo)
|
|
|
+{
|
|
|
+ struct amdgpu_vm *vm = vm_bo->vm;
|
|
|
+ struct amdgpu_bo *bo = vm_bo->bo;
|
|
|
+
|
|
|
+ vm_bo->moved = true;
|
|
|
+ if (bo->tbo.type == ttm_bo_type_kernel)
|
|
|
+ list_move(&vm_bo->vm_status, &vm->evicted);
|
|
|
+ else
|
|
|
+ list_move_tail(&vm_bo->vm_status, &vm->evicted);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_bo_relocated - vm_bo is reloacted
|
|
|
+ *
|
|
|
+ * @vm_bo: vm_bo which is relocated
|
|
|
+ *
|
|
|
+ * State for PDs/PTs which needs to update their parent PD.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo)
|
|
|
+{
|
|
|
+ list_move(&vm_bo->vm_status, &vm_bo->vm->relocated);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_bo_moved - vm_bo is moved
|
|
|
+ *
|
|
|
+ * @vm_bo: vm_bo which is moved
|
|
|
+ *
|
|
|
+ * State for per VM BOs which are moved, but that change is not yet reflected
|
|
|
+ * in the page tables.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo)
|
|
|
+{
|
|
|
+ list_move(&vm_bo->vm_status, &vm_bo->vm->moved);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_bo_idle - vm_bo is idle
|
|
|
+ *
|
|
|
+ * @vm_bo: vm_bo which is now idle
|
|
|
+ *
|
|
|
+ * State for PDs/PTs and per VM BOs which have gone through the state machine
|
|
|
+ * and are now idle.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo)
|
|
|
+{
|
|
|
+ list_move(&vm_bo->vm_status, &vm_bo->vm->idle);
|
|
|
+ vm_bo->moved = false;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_bo_invalidated - vm_bo is invalidated
|
|
|
+ *
|
|
|
+ * @vm_bo: vm_bo which is now invalidated
|
|
|
+ *
|
|
|
+ * State for normal BOs which are invalidated and that change not yet reflected
|
|
|
+ * in the PTs.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo)
|
|
|
+{
|
|
|
+ spin_lock(&vm_bo->vm->invalidated_lock);
|
|
|
+ list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated);
|
|
|
+ spin_unlock(&vm_bo->vm->invalidated_lock);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_bo_done - vm_bo is done
|
|
|
+ *
|
|
|
+ * @vm_bo: vm_bo which is now done
|
|
|
+ *
|
|
|
+ * State for normal BOs which are invalidated and that change has been updated
|
|
|
+ * in the PTs.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo)
|
|
|
+{
|
|
|
+ spin_lock(&vm_bo->vm->invalidated_lock);
|
|
|
+ list_del_init(&vm_bo->vm_status);
|
|
|
+ spin_unlock(&vm_bo->vm->invalidated_lock);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
|
|
|
+ *
|
|
|
+ * @base: base structure for tracking BO usage in a VM
|
|
|
+ * @vm: vm to which bo is to be added
|
|
|
+ * @bo: amdgpu buffer object
|
|
|
+ *
|
|
|
+ * Initialize a bo_va_base structure and add it to the appropriate lists
|
|
|
+ *
|
|
|
+ */
|
|
|
+static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
|
|
|
+ struct amdgpu_vm *vm,
|
|
|
+ struct amdgpu_bo *bo)
|
|
|
+{
|
|
|
+ base->vm = vm;
|
|
|
+ base->bo = bo;
|
|
|
+ base->next = NULL;
|
|
|
+ INIT_LIST_HEAD(&base->vm_status);
|
|
|
+
|
|
|
+ if (!bo)
|
|
|
+ return;
|
|
|
+ base->next = bo->vm_bo;
|
|
|
+ bo->vm_bo = base;
|
|
|
+
|
|
|
+ if (bo->tbo.resv != vm->root.base.bo->tbo.resv)
|
|
|
+ return;
|
|
|
+
|
|
|
+ vm->bulk_moveable = false;
|
|
|
+ if (bo->tbo.type == ttm_bo_type_kernel)
|
|
|
+ amdgpu_vm_bo_relocated(base);
|
|
|
+ else
|
|
|
+ amdgpu_vm_bo_idle(base);
|
|
|
+
|
|
|
+ if (bo->preferred_domains &
|
|
|
+ amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * we checked all the prerequisites, but it looks like this per vm bo
|
|
|
+ * is currently evicted. add the bo to the evicted list to make sure it
|
|
|
+ * is validated on next vm use to avoid fault.
|
|
|
+ * */
|
|
|
+ amdgpu_vm_bo_evicted(base);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_parent - get the parent page directory
|
|
|
+ *
|
|
|
+ * @pt: child page table
|
|
|
+ *
|
|
|
+ * Helper to get the parent entry for the child page table. NULL if we are at
|
|
|
+ * the root page directory.
|
|
|
+ */
|
|
|
+static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt)
|
|
|
+{
|
|
|
+ struct amdgpu_bo *parent = pt->base.bo->parent;
|
|
|
+
|
|
|
+ if (!parent)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
+ return container_of(parent->vm_bo, struct amdgpu_vm_pt, base);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt
|
|
|
+ */
|
|
|
+struct amdgpu_vm_pt_cursor {
|
|
|
+ uint64_t pfn;
|
|
|
+ struct amdgpu_vm_pt *parent;
|
|
|
+ struct amdgpu_vm_pt *entry;
|
|
|
+ unsigned level;
|
|
|
+};
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_start - start PD/PT walk
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device pointer
|
|
|
+ * @vm: amdgpu_vm structure
|
|
|
+ * @start: start address of the walk
|
|
|
+ * @cursor: state to initialize
|
|
|
+ *
|
|
|
+ * Initialize a amdgpu_vm_pt_cursor to start a walk.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_pt_start(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm *vm, uint64_t start,
|
|
|
+ struct amdgpu_vm_pt_cursor *cursor)
|
|
|
+{
|
|
|
+ cursor->pfn = start;
|
|
|
+ cursor->parent = NULL;
|
|
|
+ cursor->entry = &vm->root;
|
|
|
+ cursor->level = adev->vm_manager.root_level;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_descendant - go to child node
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device pointer
|
|
|
+ * @cursor: current state
|
|
|
+ *
|
|
|
+ * Walk to the child node of the current node.
|
|
|
+ * Returns:
|
|
|
+ * True if the walk was possible, false otherwise.
|
|
|
+ */
|
|
|
+static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm_pt_cursor *cursor)
|
|
|
+{
|
|
|
+ unsigned mask, shift, idx;
|
|
|
+
|
|
|
+ if (!cursor->entry->entries)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ BUG_ON(!cursor->entry->base.bo);
|
|
|
+ mask = amdgpu_vm_entries_mask(adev, cursor->level);
|
|
|
+ shift = amdgpu_vm_level_shift(adev, cursor->level);
|
|
|
+
|
|
|
+ ++cursor->level;
|
|
|
+ idx = (cursor->pfn >> shift) & mask;
|
|
|
+ cursor->parent = cursor->entry;
|
|
|
+ cursor->entry = &cursor->entry->entries[idx];
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_sibling - go to sibling node
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device pointer
|
|
|
+ * @cursor: current state
|
|
|
+ *
|
|
|
+ * Walk to the sibling node of the current node.
|
|
|
+ * Returns:
|
|
|
+ * True if the walk was possible, false otherwise.
|
|
|
+ */
|
|
|
+static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm_pt_cursor *cursor)
|
|
|
+{
|
|
|
+ unsigned shift, num_entries;
|
|
|
+
|
|
|
+ /* Root doesn't have a sibling */
|
|
|
+ if (!cursor->parent)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ /* Go to our parents and see if we got a sibling */
|
|
|
+ shift = amdgpu_vm_level_shift(adev, cursor->level - 1);
|
|
|
+ num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1);
|
|
|
+
|
|
|
+ if (cursor->entry == &cursor->parent->entries[num_entries - 1])
|
|
|
+ return false;
|
|
|
+
|
|
|
+ cursor->pfn += 1ULL << shift;
|
|
|
+ cursor->pfn &= ~((1ULL << shift) - 1);
|
|
|
+ ++cursor->entry;
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_ancestor - go to parent node
|
|
|
+ *
|
|
|
+ * @cursor: current state
|
|
|
+ *
|
|
|
+ * Walk to the parent node of the current node.
|
|
|
+ * Returns:
|
|
|
+ * True if the walk was possible, false otherwise.
|
|
|
+ */
|
|
|
+static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor)
|
|
|
+{
|
|
|
+ if (!cursor->parent)
|
|
|
+ return false;
|
|
|
+
|
|
|
+ --cursor->level;
|
|
|
+ cursor->entry = cursor->parent;
|
|
|
+ cursor->parent = amdgpu_vm_pt_parent(cursor->parent);
|
|
|
+ return true;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_next - get next PD/PT in hieratchy
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device pointer
|
|
|
+ * @cursor: current state
|
|
|
+ *
|
|
|
+ * Walk the PD/PT tree to the next node.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_pt_next(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm_pt_cursor *cursor)
|
|
|
+{
|
|
|
+ /* First try a newborn child */
|
|
|
+ if (amdgpu_vm_pt_descendant(adev, cursor))
|
|
|
+ return;
|
|
|
+
|
|
|
+ /* If that didn't worked try to find a sibling */
|
|
|
+ while (!amdgpu_vm_pt_sibling(adev, cursor)) {
|
|
|
+ /* No sibling, go to our parents and grandparents */
|
|
|
+ if (!amdgpu_vm_pt_ancestor(cursor)) {
|
|
|
+ cursor->pfn = ~0ll;
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_first_leaf - get first leaf PD/PT
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device pointer
|
|
|
+ * @vm: amdgpu_vm structure
|
|
|
+ * @start: start addr of the walk
|
|
|
+ * @cursor: state to initialize
|
|
|
+ *
|
|
|
+ * Start a walk and go directly to the leaf node.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm *vm, uint64_t start,
|
|
|
+ struct amdgpu_vm_pt_cursor *cursor)
|
|
|
+{
|
|
|
+ amdgpu_vm_pt_start(adev, vm, start, cursor);
|
|
|
+ while (amdgpu_vm_pt_descendant(adev, cursor));
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_next_leaf - get next leaf PD/PT
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device pointer
|
|
|
+ * @cursor: current state
|
|
|
+ *
|
|
|
+ * Walk the PD/PT tree to the next leaf node.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm_pt_cursor *cursor)
|
|
|
+{
|
|
|
+ amdgpu_vm_pt_next(adev, cursor);
|
|
|
+ while (amdgpu_vm_pt_descendant(adev, cursor));
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy
|
|
|
+ */
|
|
|
+#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \
|
|
|
+ for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \
|
|
|
+ (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor)))
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_first_dfs - start a deep first search
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device structure
|
|
|
+ * @vm: amdgpu_vm structure
|
|
|
+ * @cursor: state to initialize
|
|
|
+ *
|
|
|
+ * Starts a deep first traversal of the PD/PT tree.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm *vm,
|
|
|
+ struct amdgpu_vm_pt_cursor *cursor)
|
|
|
+{
|
|
|
+ amdgpu_vm_pt_start(adev, vm, 0, cursor);
|
|
|
+ while (amdgpu_vm_pt_descendant(adev, cursor));
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_pt_next_dfs - get the next node for a deep first search
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device structure
|
|
|
+ * @cursor: current state
|
|
|
+ *
|
|
|
+ * Move the cursor to the next node in a deep first search.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm_pt_cursor *cursor)
|
|
|
+{
|
|
|
+ if (!cursor->entry)
|
|
|
+ return;
|
|
|
+
|
|
|
+ if (!cursor->parent)
|
|
|
+ cursor->entry = NULL;
|
|
|
+ else if (amdgpu_vm_pt_sibling(adev, cursor))
|
|
|
+ while (amdgpu_vm_pt_descendant(adev, cursor));
|
|
|
+ else
|
|
|
+ amdgpu_vm_pt_ancestor(cursor);
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs
|
|
|
+ */
|
|
|
+#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) \
|
|
|
+ for (amdgpu_vm_pt_first_dfs((adev), (vm), &(cursor)), \
|
|
|
+ (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\
|
|
|
+ (entry); (entry) = (cursor).entry, \
|
|
|
+ amdgpu_vm_pt_next_dfs((adev), &(cursor)))
|
|
|
+
|
|
|
/**
|
|
|
* amdgpu_vm_get_pd_bo - add the VM PD to a validation list
|
|
|
*
|
|
@@ -260,14 +614,54 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
|
|
|
struct list_head *validated,
|
|
|
struct amdgpu_bo_list_entry *entry)
|
|
|
{
|
|
|
- entry->robj = vm->root.base.bo;
|
|
|
entry->priority = 0;
|
|
|
- entry->tv.bo = &entry->robj->tbo;
|
|
|
+ entry->tv.bo = &vm->root.base.bo->tbo;
|
|
|
entry->tv.shared = true;
|
|
|
entry->user_pages = NULL;
|
|
|
list_add(&entry->tv.head, validated);
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
|
|
|
+ *
|
|
|
+ * @adev: amdgpu device pointer
|
|
|
+ * @vm: vm providing the BOs
|
|
|
+ *
|
|
|
+ * Move all BOs to the end of LRU and remember their positions to put them
|
|
|
+ * together.
|
|
|
+ */
|
|
|
+void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm *vm)
|
|
|
+{
|
|
|
+ struct ttm_bo_global *glob = adev->mman.bdev.glob;
|
|
|
+ struct amdgpu_vm_bo_base *bo_base;
|
|
|
+
|
|
|
+ if (vm->bulk_moveable) {
|
|
|
+ spin_lock(&glob->lru_lock);
|
|
|
+ ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move);
|
|
|
+ spin_unlock(&glob->lru_lock);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move));
|
|
|
+
|
|
|
+ spin_lock(&glob->lru_lock);
|
|
|
+ list_for_each_entry(bo_base, &vm->idle, vm_status) {
|
|
|
+ struct amdgpu_bo *bo = bo_base->bo;
|
|
|
+
|
|
|
+ if (!bo->parent)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move);
|
|
|
+ if (bo->shadow)
|
|
|
+ ttm_bo_move_to_lru_tail(&bo->shadow->tbo,
|
|
|
+ &vm->lru_bulk_move);
|
|
|
+ }
|
|
|
+ spin_unlock(&glob->lru_lock);
|
|
|
+
|
|
|
+ vm->bulk_moveable = true;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* amdgpu_vm_validate_pt_bos - validate the page table BOs
|
|
|
*
|
|
@@ -285,47 +679,36 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|
|
int (*validate)(void *p, struct amdgpu_bo *bo),
|
|
|
void *param)
|
|
|
{
|
|
|
- struct ttm_bo_global *glob = adev->mman.bdev.glob;
|
|
|
struct amdgpu_vm_bo_base *bo_base, *tmp;
|
|
|
int r = 0;
|
|
|
|
|
|
+ vm->bulk_moveable &= list_empty(&vm->evicted);
|
|
|
+
|
|
|
list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
|
|
|
struct amdgpu_bo *bo = bo_base->bo;
|
|
|
|
|
|
- if (bo->parent) {
|
|
|
- r = validate(param, bo);
|
|
|
- if (r)
|
|
|
- break;
|
|
|
-
|
|
|
- spin_lock(&glob->lru_lock);
|
|
|
- ttm_bo_move_to_lru_tail(&bo->tbo);
|
|
|
- if (bo->shadow)
|
|
|
- ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
|
|
|
- spin_unlock(&glob->lru_lock);
|
|
|
- }
|
|
|
+ r = validate(param, bo);
|
|
|
+ if (r)
|
|
|
+ break;
|
|
|
|
|
|
if (bo->tbo.type != ttm_bo_type_kernel) {
|
|
|
- spin_lock(&vm->moved_lock);
|
|
|
- list_move(&bo_base->vm_status, &vm->moved);
|
|
|
- spin_unlock(&vm->moved_lock);
|
|
|
+ amdgpu_vm_bo_moved(bo_base);
|
|
|
} else {
|
|
|
- list_move(&bo_base->vm_status, &vm->relocated);
|
|
|
+ if (vm->use_cpu_for_update)
|
|
|
+ r = amdgpu_bo_kmap(bo, NULL);
|
|
|
+ else
|
|
|
+ r = amdgpu_ttm_alloc_gart(&bo->tbo);
|
|
|
+ if (r)
|
|
|
+ break;
|
|
|
+ if (bo->shadow) {
|
|
|
+ r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo);
|
|
|
+ if (r)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ amdgpu_vm_bo_relocated(bo_base);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- spin_lock(&glob->lru_lock);
|
|
|
- list_for_each_entry(bo_base, &vm->idle, vm_status) {
|
|
|
- struct amdgpu_bo *bo = bo_base->bo;
|
|
|
-
|
|
|
- if (!bo->parent)
|
|
|
- continue;
|
|
|
-
|
|
|
- ttm_bo_move_to_lru_tail(&bo->tbo);
|
|
|
- if (bo->shadow)
|
|
|
- ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
|
|
|
- }
|
|
|
- spin_unlock(&glob->lru_lock);
|
|
|
-
|
|
|
return r;
|
|
|
}
|
|
|
|
|
@@ -376,7 +759,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
|
|
if (level == adev->vm_manager.root_level) {
|
|
|
ats_entries = amdgpu_vm_level_shift(adev, level);
|
|
|
ats_entries += AMDGPU_GPU_PAGE_SHIFT;
|
|
|
- ats_entries = AMDGPU_VA_HOLE_START >> ats_entries;
|
|
|
+ ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries;
|
|
|
ats_entries = min(ats_entries, entries);
|
|
|
entries -= ats_entries;
|
|
|
} else {
|
|
@@ -397,6 +780,10 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
|
|
|
if (r)
|
|
|
goto error;
|
|
|
|
|
|
+ r = amdgpu_ttm_alloc_gart(&bo->tbo);
|
|
|
+ if (r)
|
|
|
+ return r;
|
|
|
+
|
|
|
r = amdgpu_job_alloc_with_ib(adev, 64, &job);
|
|
|
if (r)
|
|
|
goto error;
|
|
@@ -448,159 +835,154 @@ error:
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * amdgpu_vm_alloc_levels - allocate the PD/PT levels
|
|
|
+ * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
|
|
|
*
|
|
|
* @adev: amdgpu_device pointer
|
|
|
- * @vm: requested vm
|
|
|
- * @parent: parent PT
|
|
|
- * @saddr: start of the address range
|
|
|
- * @eaddr: end of the address range
|
|
|
- * @level: VMPT level
|
|
|
- * @ats: indicate ATS support from PTE
|
|
|
+ * @vm: requesting vm
|
|
|
+ * @bp: resulting BO allocation parameters
|
|
|
+ */
|
|
|
+static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|
|
+ int level, struct amdgpu_bo_param *bp)
|
|
|
+{
|
|
|
+ memset(bp, 0, sizeof(*bp));
|
|
|
+
|
|
|
+ bp->size = amdgpu_vm_bo_size(adev, level);
|
|
|
+ bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
|
|
|
+ bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
|
|
|
+ if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 &&
|
|
|
+ adev->flags & AMD_IS_APU)
|
|
|
+ bp->domain |= AMDGPU_GEM_DOMAIN_GTT;
|
|
|
+ bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
|
|
|
+ bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
|
|
|
+ AMDGPU_GEM_CREATE_CPU_GTT_USWC;
|
|
|
+ if (vm->use_cpu_for_update)
|
|
|
+ bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
|
|
+ else if (!vm->root.base.bo || vm->root.base.bo->shadow)
|
|
|
+ bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
|
|
|
+ bp->type = ttm_bo_type_kernel;
|
|
|
+ if (vm->root.base.bo)
|
|
|
+ bp->resv = vm->root.base.bo->tbo.resv;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_alloc_pts - Allocate page tables.
|
|
|
+ *
|
|
|
+ * @adev: amdgpu_device pointer
|
|
|
+ * @vm: VM to allocate page tables for
|
|
|
+ * @saddr: Start address which needs to be allocated
|
|
|
+ * @size: Size from start address we need.
|
|
|
*
|
|
|
* Make sure the page directories and page tables are allocated
|
|
|
*
|
|
|
* Returns:
|
|
|
* 0 on success, errno otherwise.
|
|
|
*/
|
|
|
-static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
|
|
|
- struct amdgpu_vm *vm,
|
|
|
- struct amdgpu_vm_pt *parent,
|
|
|
- uint64_t saddr, uint64_t eaddr,
|
|
|
- unsigned level, bool ats)
|
|
|
+int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm *vm,
|
|
|
+ uint64_t saddr, uint64_t size)
|
|
|
{
|
|
|
- unsigned shift = amdgpu_vm_level_shift(adev, level);
|
|
|
- unsigned pt_idx, from, to;
|
|
|
- u64 flags;
|
|
|
+ struct amdgpu_vm_pt_cursor cursor;
|
|
|
+ struct amdgpu_bo *pt;
|
|
|
+ bool ats = false;
|
|
|
+ uint64_t eaddr;
|
|
|
int r;
|
|
|
|
|
|
- if (!parent->entries) {
|
|
|
- unsigned num_entries = amdgpu_vm_num_entries(adev, level);
|
|
|
+ /* validate the parameters */
|
|
|
+ if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ eaddr = saddr + size - 1;
|
|
|
|
|
|
- parent->entries = kvmalloc_array(num_entries,
|
|
|
- sizeof(struct amdgpu_vm_pt),
|
|
|
- GFP_KERNEL | __GFP_ZERO);
|
|
|
- if (!parent->entries)
|
|
|
- return -ENOMEM;
|
|
|
- memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt));
|
|
|
- }
|
|
|
+ if (vm->pte_support_ats)
|
|
|
+ ats = saddr < AMDGPU_GMC_HOLE_START;
|
|
|
|
|
|
- from = saddr >> shift;
|
|
|
- to = eaddr >> shift;
|
|
|
- if (from >= amdgpu_vm_num_entries(adev, level) ||
|
|
|
- to >= amdgpu_vm_num_entries(adev, level))
|
|
|
+ saddr /= AMDGPU_GPU_PAGE_SIZE;
|
|
|
+ eaddr /= AMDGPU_GPU_PAGE_SIZE;
|
|
|
+
|
|
|
+ if (eaddr >= adev->vm_manager.max_pfn) {
|
|
|
+ dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
|
|
|
+ eaddr, adev->vm_manager.max_pfn);
|
|
|
return -EINVAL;
|
|
|
+ }
|
|
|
|
|
|
- ++level;
|
|
|
- saddr = saddr & ((1 << shift) - 1);
|
|
|
- eaddr = eaddr & ((1 << shift) - 1);
|
|
|
+ for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) {
|
|
|
+ struct amdgpu_vm_pt *entry = cursor.entry;
|
|
|
+ struct amdgpu_bo_param bp;
|
|
|
|
|
|
- flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
|
|
|
- if (vm->root.base.bo->shadow)
|
|
|
- flags |= AMDGPU_GEM_CREATE_SHADOW;
|
|
|
- if (vm->use_cpu_for_update)
|
|
|
- flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
|
|
- else
|
|
|
- flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
|
|
|
-
|
|
|
- /* walk over the address space and allocate the page tables */
|
|
|
- for (pt_idx = from; pt_idx <= to; ++pt_idx) {
|
|
|
- struct reservation_object *resv = vm->root.base.bo->tbo.resv;
|
|
|
- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
|
|
|
- struct amdgpu_bo *pt;
|
|
|
-
|
|
|
- if (!entry->base.bo) {
|
|
|
- struct amdgpu_bo_param bp;
|
|
|
-
|
|
|
- memset(&bp, 0, sizeof(bp));
|
|
|
- bp.size = amdgpu_vm_bo_size(adev, level);
|
|
|
- bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
|
|
|
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
|
|
|
- bp.flags = flags;
|
|
|
- bp.type = ttm_bo_type_kernel;
|
|
|
- bp.resv = resv;
|
|
|
- r = amdgpu_bo_create(adev, &bp, &pt);
|
|
|
- if (r)
|
|
|
- return r;
|
|
|
+ if (cursor.level < AMDGPU_VM_PTB) {
|
|
|
+ unsigned num_entries;
|
|
|
|
|
|
- r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats);
|
|
|
- if (r) {
|
|
|
- amdgpu_bo_unref(&pt->shadow);
|
|
|
- amdgpu_bo_unref(&pt);
|
|
|
- return r;
|
|
|
- }
|
|
|
+ num_entries = amdgpu_vm_num_entries(adev, cursor.level);
|
|
|
+ entry->entries = kvmalloc_array(num_entries,
|
|
|
+ sizeof(*entry->entries),
|
|
|
+ GFP_KERNEL |
|
|
|
+ __GFP_ZERO);
|
|
|
+ if (!entry->entries)
|
|
|
+ return -ENOMEM;
|
|
|
+ }
|
|
|
|
|
|
- if (vm->use_cpu_for_update) {
|
|
|
- r = amdgpu_bo_kmap(pt, NULL);
|
|
|
- if (r) {
|
|
|
- amdgpu_bo_unref(&pt->shadow);
|
|
|
- amdgpu_bo_unref(&pt);
|
|
|
- return r;
|
|
|
- }
|
|
|
- }
|
|
|
|
|
|
- /* Keep a reference to the root directory to avoid
|
|
|
- * freeing them up in the wrong order.
|
|
|
- */
|
|
|
- pt->parent = amdgpu_bo_ref(parent->base.bo);
|
|
|
+ if (entry->base.bo)
|
|
|
+ continue;
|
|
|
|
|
|
- amdgpu_vm_bo_base_init(&entry->base, vm, pt);
|
|
|
- }
|
|
|
+ amdgpu_vm_bo_param(adev, vm, cursor.level, &bp);
|
|
|
|
|
|
- if (level < AMDGPU_VM_PTB) {
|
|
|
- uint64_t sub_saddr = (pt_idx == from) ? saddr : 0;
|
|
|
- uint64_t sub_eaddr = (pt_idx == to) ? eaddr :
|
|
|
- ((1 << shift) - 1);
|
|
|
- r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr,
|
|
|
- sub_eaddr, level, ats);
|
|
|
+ r = amdgpu_bo_create(adev, &bp, &pt);
|
|
|
+ if (r)
|
|
|
+ return r;
|
|
|
+
|
|
|
+ r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats);
|
|
|
+ if (r)
|
|
|
+ goto error_free_pt;
|
|
|
+
|
|
|
+ if (vm->use_cpu_for_update) {
|
|
|
+ r = amdgpu_bo_kmap(pt, NULL);
|
|
|
if (r)
|
|
|
- return r;
|
|
|
+ goto error_free_pt;
|
|
|
}
|
|
|
+
|
|
|
+ /* Keep a reference to the root directory to avoid
|
|
|
+ * freeing them up in the wrong order.
|
|
|
+ */
|
|
|
+ pt->parent = amdgpu_bo_ref(cursor.parent->base.bo);
|
|
|
+
|
|
|
+ amdgpu_vm_bo_base_init(&entry->base, vm, pt);
|
|
|
}
|
|
|
|
|
|
return 0;
|
|
|
-}
|
|
|
|
|
|
-/**
|
|
|
- * amdgpu_vm_alloc_pts - Allocate page tables.
|
|
|
- *
|
|
|
- * @adev: amdgpu_device pointer
|
|
|
- * @vm: VM to allocate page tables for
|
|
|
- * @saddr: Start address which needs to be allocated
|
|
|
- * @size: Size from start address we need.
|
|
|
+error_free_pt:
|
|
|
+ amdgpu_bo_unref(&pt->shadow);
|
|
|
+ amdgpu_bo_unref(&pt);
|
|
|
+ return r;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_free_pts - free PD/PT levels
|
|
|
*
|
|
|
- * Make sure the page tables are allocated.
|
|
|
+ * @adev: amdgpu device structure
|
|
|
+ * @vm: amdgpu vm structure
|
|
|
*
|
|
|
- * Returns:
|
|
|
- * 0 on success, errno otherwise.
|
|
|
+ * Free the page directory or page table level and all sub levels.
|
|
|
*/
|
|
|
-int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
|
|
|
- struct amdgpu_vm *vm,
|
|
|
- uint64_t saddr, uint64_t size)
|
|
|
+static void amdgpu_vm_free_pts(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm *vm)
|
|
|
{
|
|
|
- uint64_t eaddr;
|
|
|
- bool ats = false;
|
|
|
-
|
|
|
- /* validate the parameters */
|
|
|
- if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK)
|
|
|
- return -EINVAL;
|
|
|
-
|
|
|
- eaddr = saddr + size - 1;
|
|
|
-
|
|
|
- if (vm->pte_support_ats)
|
|
|
- ats = saddr < AMDGPU_VA_HOLE_START;
|
|
|
+ struct amdgpu_vm_pt_cursor cursor;
|
|
|
+ struct amdgpu_vm_pt *entry;
|
|
|
|
|
|
- saddr /= AMDGPU_GPU_PAGE_SIZE;
|
|
|
- eaddr /= AMDGPU_GPU_PAGE_SIZE;
|
|
|
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) {
|
|
|
|
|
|
- if (eaddr >= adev->vm_manager.max_pfn) {
|
|
|
- dev_err(adev->dev, "va above limit (0x%08llX >= 0x%08llX)\n",
|
|
|
- eaddr, adev->vm_manager.max_pfn);
|
|
|
- return -EINVAL;
|
|
|
+ if (entry->base.bo) {
|
|
|
+ entry->base.bo->vm_bo = NULL;
|
|
|
+ list_del(&entry->base.vm_status);
|
|
|
+ amdgpu_bo_unref(&entry->base.bo->shadow);
|
|
|
+ amdgpu_bo_unref(&entry->base.bo);
|
|
|
+ }
|
|
|
+ kvfree(entry->entries);
|
|
|
}
|
|
|
|
|
|
- return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr,
|
|
|
- adev->vm_manager.root_level, ats);
|
|
|
+ BUG_ON(vm->root.base.bo);
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -714,7 +1096,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
|
|
|
}
|
|
|
|
|
|
gds_switch_needed &= !!ring->funcs->emit_gds_switch;
|
|
|
- vm_flush_needed &= !!ring->funcs->emit_vm_flush;
|
|
|
+ vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
|
|
|
+ job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
|
|
|
pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping &&
|
|
|
ring->funcs->emit_wreg;
|
|
|
|
|
@@ -799,12 +1182,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
|
|
|
struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
|
|
|
struct amdgpu_bo *bo)
|
|
|
{
|
|
|
- struct amdgpu_bo_va *bo_va;
|
|
|
+ struct amdgpu_vm_bo_base *base;
|
|
|
|
|
|
- list_for_each_entry(bo_va, &bo->va, base.bo_list) {
|
|
|
- if (bo_va->base.vm == vm) {
|
|
|
- return bo_va;
|
|
|
- }
|
|
|
+ for (base = bo->vm_bo; base; base = base->next) {
|
|
|
+ if (base->vm != vm)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ return container_of(base, struct amdgpu_bo_va, base);
|
|
|
}
|
|
|
return NULL;
|
|
|
}
|
|
@@ -957,6 +1341,22 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|
|
return r;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * amdgpu_vm_update_func - helper to call update function
|
|
|
+ *
|
|
|
+ * Calls the update function for both the given BO as well as its shadow.
|
|
|
+ */
|
|
|
+static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params,
|
|
|
+ struct amdgpu_bo *bo,
|
|
|
+ uint64_t pe, uint64_t addr,
|
|
|
+ unsigned count, uint32_t incr,
|
|
|
+ uint64_t flags)
|
|
|
+{
|
|
|
+ if (bo->shadow)
|
|
|
+ params->func(params, bo->shadow, pe, addr, count, incr, flags);
|
|
|
+ params->func(params, bo, pe, addr, count, incr, flags);
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* amdgpu_vm_update_pde - update a single level in the hierarchy
|
|
|
*
|
|
@@ -984,47 +1384,28 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
|
|
|
pbo = pbo->parent;
|
|
|
|
|
|
level += params->adev->vm_manager.root_level;
|
|
|
- pt = amdgpu_bo_gpu_offset(entry->base.bo);
|
|
|
- flags = AMDGPU_PTE_VALID;
|
|
|
- amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags);
|
|
|
+ amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags);
|
|
|
pde = (entry - parent->entries) * 8;
|
|
|
- if (bo->shadow)
|
|
|
- params->func(params, bo->shadow, pde, pt, 1, 0, flags);
|
|
|
- params->func(params, bo, pde, pt, 1, 0, flags);
|
|
|
+ amdgpu_vm_update_func(params, bo, pde, pt, 1, 0, flags);
|
|
|
}
|
|
|
|
|
|
/*
|
|
|
- * amdgpu_vm_invalidate_level - mark all PD levels as invalid
|
|
|
+ * amdgpu_vm_invalidate_pds - mark all PDs as invalid
|
|
|
*
|
|
|
* @adev: amdgpu_device pointer
|
|
|
* @vm: related vm
|
|
|
- * @parent: parent PD
|
|
|
- * @level: VMPT level
|
|
|
*
|
|
|
* Mark all PD level as invalid after an error.
|
|
|
*/
|
|
|
-static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
|
|
|
- struct amdgpu_vm *vm,
|
|
|
- struct amdgpu_vm_pt *parent,
|
|
|
- unsigned level)
|
|
|
+static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev,
|
|
|
+ struct amdgpu_vm *vm)
|
|
|
{
|
|
|
- unsigned pt_idx, num_entries;
|
|
|
-
|
|
|
- /*
|
|
|
- * Recurse into the subdirectories. This recursion is harmless because
|
|
|
- * we only have a maximum of 5 layers.
|
|
|
- */
|
|
|
- num_entries = amdgpu_vm_num_entries(adev, level);
|
|
|
- for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) {
|
|
|
- struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
|
|
|
-
|
|
|
- if (!entry->base.bo)
|
|
|
- continue;
|
|
|
+ struct amdgpu_vm_pt_cursor cursor;
|
|
|
+ struct amdgpu_vm_pt *entry;
|
|
|
|
|
|
- if (!entry->base.moved)
|
|
|
- list_move(&entry->base.vm_status, &vm->relocated);
|
|
|
- amdgpu_vm_invalidate_level(adev, vm, entry, level + 1);
|
|
|
- }
|
|
|
+ for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry)
|
|
|
+ if (entry->base.bo && !entry->base.moved)
|
|
|
+ amdgpu_vm_bo_relocated(&entry->base);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -1054,14 +1435,6 @@ restart:
|
|
|
params.adev = adev;
|
|
|
|
|
|
if (vm->use_cpu_for_update) {
|
|
|
- struct amdgpu_vm_bo_base *bo_base;
|
|
|
-
|
|
|
- list_for_each_entry(bo_base, &vm->relocated, vm_status) {
|
|
|
- r = amdgpu_bo_kmap(bo_base->bo, NULL);
|
|
|
- if (unlikely(r))
|
|
|
- return r;
|
|
|
- }
|
|
|
-
|
|
|
r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
|
|
|
if (unlikely(r))
|
|
|
return r;
|
|
@@ -1078,25 +1451,16 @@ restart:
|
|
|
}
|
|
|
|
|
|
while (!list_empty(&vm->relocated)) {
|
|
|
- struct amdgpu_vm_bo_base *bo_base, *parent;
|
|
|
struct amdgpu_vm_pt *pt, *entry;
|
|
|
- struct amdgpu_bo *bo;
|
|
|
|
|
|
- bo_base = list_first_entry(&vm->relocated,
|
|
|
- struct amdgpu_vm_bo_base,
|
|
|
- vm_status);
|
|
|
- bo_base->moved = false;
|
|
|
- list_del_init(&bo_base->vm_status);
|
|
|
+ entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt,
|
|
|
+ base.vm_status);
|
|
|
+ amdgpu_vm_bo_idle(&entry->base);
|
|
|
|
|
|
- bo = bo_base->bo->parent;
|
|
|
- if (!bo)
|
|
|
+ pt = amdgpu_vm_pt_parent(entry);
|
|
|
+ if (!pt)
|
|
|
continue;
|
|
|
|
|
|
- parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base,
|
|
|
- bo_list);
|
|
|
- pt = container_of(parent, struct amdgpu_vm_pt, base);
|
|
|
- entry = container_of(bo_base, struct amdgpu_vm_pt, base);
|
|
|
-
|
|
|
amdgpu_vm_update_pde(¶ms, vm, pt, entry);
|
|
|
|
|
|
if (!vm->use_cpu_for_update &&
|
|
@@ -1138,85 +1502,90 @@ restart:
|
|
|
return 0;
|
|
|
|
|
|
error:
|
|
|
- amdgpu_vm_invalidate_level(adev, vm, &vm->root,
|
|
|
- adev->vm_manager.root_level);
|
|
|
+ amdgpu_vm_invalidate_pds(adev, vm);
|
|
|
amdgpu_job_free(job);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * amdgpu_vm_find_entry - find the entry for an address
|
|
|
+ * amdgpu_vm_update_huge - figure out parameters for PTE updates
|
|
|
*
|
|
|
- * @p: see amdgpu_pte_update_params definition
|
|
|
- * @addr: virtual address in question
|
|
|
- * @entry: resulting entry or NULL
|
|
|
- * @parent: parent entry
|
|
|
- *
|
|
|
- * Find the vm_pt entry and it's parent for the given address.
|
|
|
+ * Make sure to set the right flags for the PTEs at the desired level.
|
|
|
*/
|
|
|
-void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
|
|
|
- struct amdgpu_vm_pt **entry,
|
|
|
- struct amdgpu_vm_pt **parent)
|
|
|
-{
|
|
|
- unsigned level = p->adev->vm_manager.root_level;
|
|
|
-
|
|
|
- *parent = NULL;
|
|
|
- *entry = &p->vm->root;
|
|
|
- while ((*entry)->entries) {
|
|
|
- unsigned shift = amdgpu_vm_level_shift(p->adev, level++);
|
|
|
+static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params,
|
|
|
+ struct amdgpu_bo *bo, unsigned level,
|
|
|
+ uint64_t pe, uint64_t addr,
|
|
|
+ unsigned count, uint32_t incr,
|
|
|
+ uint64_t flags)
|
|
|
|
|
|
- *parent = *entry;
|
|
|
- *entry = &(*entry)->entries[addr >> shift];
|
|
|
- addr &= (1ULL << shift) - 1;
|
|
|
+{
|
|
|
+ if (level != AMDGPU_VM_PTB) {
|
|
|
+ flags |= AMDGPU_PDE_PTE;
|
|
|
+ amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
|
|
|
}
|
|
|
|
|
|
- if (level != AMDGPU_VM_PTB)
|
|
|
- *entry = NULL;
|
|
|
+ amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags);
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages
|
|
|
+ * amdgpu_vm_fragment - get fragment for PTEs
|
|
|
*
|
|
|
- * @p: see amdgpu_pte_update_params definition
|
|
|
- * @entry: vm_pt entry to check
|
|
|
- * @parent: parent entry
|
|
|
- * @nptes: number of PTEs updated with this operation
|
|
|
- * @dst: destination address where the PTEs should point to
|
|
|
- * @flags: access flags fro the PTEs
|
|
|
+ * @params: see amdgpu_pte_update_params definition
|
|
|
+ * @start: first PTE to handle
|
|
|
+ * @end: last PTE to handle
|
|
|
+ * @flags: hw mapping flags
|
|
|
+ * @frag: resulting fragment size
|
|
|
+ * @frag_end: end of this fragment
|
|
|
*
|
|
|
- * Check if we can update the PD with a huge page.
|
|
|
+ * Returns the first possible fragment for the start and end address.
|
|
|
*/
|
|
|
-static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
|
|
|
- struct amdgpu_vm_pt *entry,
|
|
|
- struct amdgpu_vm_pt *parent,
|
|
|
- unsigned nptes, uint64_t dst,
|
|
|
- uint64_t flags)
|
|
|
+static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params,
|
|
|
+ uint64_t start, uint64_t end, uint64_t flags,
|
|
|
+ unsigned int *frag, uint64_t *frag_end)
|
|
|
{
|
|
|
- uint64_t pde;
|
|
|
+ /**
|
|
|
+ * The MC L1 TLB supports variable sized pages, based on a fragment
|
|
|
+ * field in the PTE. When this field is set to a non-zero value, page
|
|
|
+ * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
|
|
|
+ * flags are considered valid for all PTEs within the fragment range
|
|
|
+ * and corresponding mappings are assumed to be physically contiguous.
|
|
|
+ *
|
|
|
+ * The L1 TLB can store a single PTE for the whole fragment,
|
|
|
+ * significantly increasing the space available for translation
|
|
|
+ * caching. This leads to large improvements in throughput when the
|
|
|
+ * TLB is under pressure.
|
|
|
+ *
|
|
|
+ * The L2 TLB distributes small and large fragments into two
|
|
|
+ * asymmetric partitions. The large fragment cache is significantly
|
|
|
+ * larger. Thus, we try to use large fragments wherever possible.
|
|
|
+ * Userspace can support this by aligning virtual base address and
|
|
|
+ * allocation size to the fragment size.
|
|
|
+ *
|
|
|
+ * Starting with Vega10 the fragment size only controls the L1. The L2
|
|
|
+ * is now directly feed with small/huge/giant pages from the walker.
|
|
|
+ */
|
|
|
+ unsigned max_frag;
|
|
|
|
|
|
- /* In the case of a mixed PT the PDE must point to it*/
|
|
|
- if (p->adev->asic_type >= CHIP_VEGA10 && !p->src &&
|
|
|
- nptes == AMDGPU_VM_PTE_COUNT(p->adev)) {
|
|
|
- /* Set the huge page flag to stop scanning at this PDE */
|
|
|
- flags |= AMDGPU_PDE_PTE;
|
|
|
- }
|
|
|
+ if (params->adev->asic_type < CHIP_VEGA10)
|
|
|
+ max_frag = params->adev->vm_manager.fragment_size;
|
|
|
+ else
|
|
|
+ max_frag = 31;
|
|
|
|
|
|
- if (!(flags & AMDGPU_PDE_PTE)) {
|
|
|
- if (entry->huge) {
|
|
|
- /* Add the entry to the relocated list to update it. */
|
|
|
- entry->huge = false;
|
|
|
- list_move(&entry->base.vm_status, &p->vm->relocated);
|
|
|
- }
|
|
|
+ /* system pages are non continuously */
|
|
|
+ if (params->src) {
|
|
|
+ *frag = 0;
|
|
|
+ *frag_end = end;
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
- entry->huge = true;
|
|
|
- amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags);
|
|
|
-
|
|
|
- pde = (entry - parent->entries) * 8;
|
|
|
- if (parent->base.bo->shadow)
|
|
|
- p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags);
|
|
|
- p->func(p, parent->base.bo, pde, dst, 1, 0, flags);
|
|
|
+ /* This intentionally wraps around if no bit is set */
|
|
|
+ *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1);
|
|
|
+ if (*frag >= max_frag) {
|
|
|
+ *frag = max_frag;
|
|
|
+ *frag_end = end & ~((1ULL << max_frag) - 1);
|
|
|
+ } else {
|
|
|
+ *frag_end = start + (1 << *frag);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -1234,112 +1603,105 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
|
|
|
* 0 for success, -EINVAL for failure.
|
|
|
*/
|
|
|
static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
|
|
|
- uint64_t start, uint64_t end,
|
|
|
- uint64_t dst, uint64_t flags)
|
|
|
+ uint64_t start, uint64_t end,
|
|
|
+ uint64_t dst, uint64_t flags)
|
|
|
{
|
|
|
struct amdgpu_device *adev = params->adev;
|
|
|
- const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
|
|
|
+ struct amdgpu_vm_pt_cursor cursor;
|
|
|
+ uint64_t frag_start = start, frag_end;
|
|
|
+ unsigned int frag;
|
|
|
|
|
|
- uint64_t addr, pe_start;
|
|
|
- struct amdgpu_bo *pt;
|
|
|
- unsigned nptes;
|
|
|
+ /* figure out the initial fragment */
|
|
|
+ amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end);
|
|
|
|
|
|
- /* walk over the address space and update the page tables */
|
|
|
- for (addr = start; addr < end; addr += nptes,
|
|
|
- dst += nptes * AMDGPU_GPU_PAGE_SIZE) {
|
|
|
- struct amdgpu_vm_pt *entry, *parent;
|
|
|
+ /* walk over the address space and update the PTs */
|
|
|
+ amdgpu_vm_pt_start(adev, params->vm, start, &cursor);
|
|
|
+ while (cursor.pfn < end) {
|
|
|
+ struct amdgpu_bo *pt = cursor.entry->base.bo;
|
|
|
+ unsigned shift, parent_shift, mask;
|
|
|
+ uint64_t incr, entry_end, pe_start;
|
|
|
|
|
|
- amdgpu_vm_get_entry(params, addr, &entry, &parent);
|
|
|
- if (!entry)
|
|
|
+ if (!pt)
|
|
|
return -ENOENT;
|
|
|
|
|
|
- if ((addr & ~mask) == (end & ~mask))
|
|
|
- nptes = end - addr;
|
|
|
- else
|
|
|
- nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
|
|
|
-
|
|
|
- amdgpu_vm_handle_huge_pages(params, entry, parent,
|
|
|
- nptes, dst, flags);
|
|
|
- /* We don't need to update PTEs for huge pages */
|
|
|
- if (entry->huge)
|
|
|
+ /* The root level can't be a huge page */
|
|
|
+ if (cursor.level == adev->vm_manager.root_level) {
|
|
|
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
|
|
|
+ return -ENOENT;
|
|
|
continue;
|
|
|
+ }
|
|
|
|
|
|
- pt = entry->base.bo;
|
|
|
- pe_start = (addr & mask) * 8;
|
|
|
- if (pt->shadow)
|
|
|
- params->func(params, pt->shadow, pe_start, dst, nptes,
|
|
|
- AMDGPU_GPU_PAGE_SIZE, flags);
|
|
|
- params->func(params, pt, pe_start, dst, nptes,
|
|
|
- AMDGPU_GPU_PAGE_SIZE, flags);
|
|
|
- }
|
|
|
-
|
|
|
- return 0;
|
|
|
-}
|
|
|
+ /* First check if the entry is already handled */
|
|
|
+ if (cursor.pfn < frag_start) {
|
|
|
+ cursor.entry->huge = true;
|
|
|
+ amdgpu_vm_pt_next(adev, &cursor);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
|
|
|
-/*
|
|
|
- * amdgpu_vm_frag_ptes - add fragment information to PTEs
|
|
|
- *
|
|
|
- * @params: see amdgpu_pte_update_params definition
|
|
|
- * @vm: requested vm
|
|
|
- * @start: first PTE to handle
|
|
|
- * @end: last PTE to handle
|
|
|
- * @dst: addr those PTEs should point to
|
|
|
- * @flags: hw mapping flags
|
|
|
- *
|
|
|
- * Returns:
|
|
|
- * 0 for success, -EINVAL for failure.
|
|
|
- */
|
|
|
-static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params,
|
|
|
- uint64_t start, uint64_t end,
|
|
|
- uint64_t dst, uint64_t flags)
|
|
|
-{
|
|
|
- /**
|
|
|
- * The MC L1 TLB supports variable sized pages, based on a fragment
|
|
|
- * field in the PTE. When this field is set to a non-zero value, page
|
|
|
- * granularity is increased from 4KB to (1 << (12 + frag)). The PTE
|
|
|
- * flags are considered valid for all PTEs within the fragment range
|
|
|
- * and corresponding mappings are assumed to be physically contiguous.
|
|
|
- *
|
|
|
- * The L1 TLB can store a single PTE for the whole fragment,
|
|
|
- * significantly increasing the space available for translation
|
|
|
- * caching. This leads to large improvements in throughput when the
|
|
|
- * TLB is under pressure.
|
|
|
- *
|
|
|
- * The L2 TLB distributes small and large fragments into two
|
|
|
- * asymmetric partitions. The large fragment cache is significantly
|
|
|
- * larger. Thus, we try to use large fragments wherever possible.
|
|
|
- * Userspace can support this by aligning virtual base address and
|
|
|
- * allocation size to the fragment size.
|
|
|
- */
|
|
|
- unsigned max_frag = params->adev->vm_manager.fragment_size;
|
|
|
- int r;
|
|
|
+ /* If it isn't already handled it can't be a huge page */
|
|
|
+ if (cursor.entry->huge) {
|
|
|
+ /* Add the entry to the relocated list to update it. */
|
|
|
+ cursor.entry->huge = false;
|
|
|
+ amdgpu_vm_bo_relocated(&cursor.entry->base);
|
|
|
+ }
|
|
|
|
|
|
- /* system pages are non continuously */
|
|
|
- if (params->src || !(flags & AMDGPU_PTE_VALID))
|
|
|
- return amdgpu_vm_update_ptes(params, start, end, dst, flags);
|
|
|
-
|
|
|
- while (start != end) {
|
|
|
- uint64_t frag_flags, frag_end;
|
|
|
- unsigned frag;
|
|
|
-
|
|
|
- /* This intentionally wraps around if no bit is set */
|
|
|
- frag = min((unsigned)ffs(start) - 1,
|
|
|
- (unsigned)fls64(end - start) - 1);
|
|
|
- if (frag >= max_frag) {
|
|
|
- frag_flags = AMDGPU_PTE_FRAG(max_frag);
|
|
|
- frag_end = end & ~((1ULL << max_frag) - 1);
|
|
|
- } else {
|
|
|
- frag_flags = AMDGPU_PTE_FRAG(frag);
|
|
|
- frag_end = start + (1 << frag);
|
|
|
+ shift = amdgpu_vm_level_shift(adev, cursor.level);
|
|
|
+ parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1);
|
|
|
+ if (adev->asic_type < CHIP_VEGA10) {
|
|
|
+ /* No huge page support before GMC v9 */
|
|
|
+ if (cursor.level != AMDGPU_VM_PTB) {
|
|
|
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
|
|
|
+ return -ENOENT;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ } else if (frag < shift) {
|
|
|
+ /* We can't use this level when the fragment size is
|
|
|
+ * smaller than the address shift. Go to the next
|
|
|
+ * child entry and try again.
|
|
|
+ */
|
|
|
+ if (!amdgpu_vm_pt_descendant(adev, &cursor))
|
|
|
+ return -ENOENT;
|
|
|
+ continue;
|
|
|
+ } else if (frag >= parent_shift) {
|
|
|
+ /* If the fragment size is even larger than the parent
|
|
|
+ * shift we should go up one level and check it again.
|
|
|
+ */
|
|
|
+ if (!amdgpu_vm_pt_ancestor(&cursor))
|
|
|
+ return -ENOENT;
|
|
|
+ continue;
|
|
|
}
|
|
|
|
|
|
- r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
|
|
|
- flags | frag_flags);
|
|
|
- if (r)
|
|
|
- return r;
|
|
|
+ /* Looks good so far, calculate parameters for the update */
|
|
|
+ incr = AMDGPU_GPU_PAGE_SIZE << shift;
|
|
|
+ mask = amdgpu_vm_entries_mask(adev, cursor.level);
|
|
|
+ pe_start = ((cursor.pfn >> shift) & mask) * 8;
|
|
|
+ entry_end = (mask + 1) << shift;
|
|
|
+ entry_end += cursor.pfn & ~(entry_end - 1);
|
|
|
+ entry_end = min(entry_end, end);
|
|
|
+
|
|
|
+ do {
|
|
|
+ uint64_t upd_end = min(entry_end, frag_end);
|
|
|
+ unsigned nptes = (upd_end - frag_start) >> shift;
|
|
|
+
|
|
|
+ amdgpu_vm_update_huge(params, pt, cursor.level,
|
|
|
+ pe_start, dst, nptes, incr,
|
|
|
+ flags | AMDGPU_PTE_FRAG(frag));
|
|
|
+
|
|
|
+ pe_start += nptes * 8;
|
|
|
+ dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift;
|
|
|
+
|
|
|
+ frag_start = upd_end;
|
|
|
+ if (frag_start >= frag_end) {
|
|
|
+ /* figure out the next fragment */
|
|
|
+ amdgpu_vm_fragment(params, frag_start, end,
|
|
|
+ flags, &frag, &frag_end);
|
|
|
+ if (frag < shift)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ } while (frag_start < entry_end);
|
|
|
|
|
|
- dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
|
|
|
- start = frag_end;
|
|
|
+ if (frag >= shift)
|
|
|
+ amdgpu_vm_pt_next(adev, &cursor);
|
|
|
}
|
|
|
|
|
|
return 0;
|
|
@@ -1401,8 +1763,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
|
|
|
|
|
params.func = amdgpu_vm_cpu_set_ptes;
|
|
|
params.pages_addr = pages_addr;
|
|
|
- return amdgpu_vm_frag_ptes(¶ms, start, last + 1,
|
|
|
- addr, flags);
|
|
|
+ return amdgpu_vm_update_ptes(¶ms, start, last + 1,
|
|
|
+ addr, flags);
|
|
|
}
|
|
|
|
|
|
ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
|
|
@@ -1481,7 +1843,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
|
|
if (r)
|
|
|
goto error_free;
|
|
|
|
|
|
- r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags);
|
|
|
+ r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags);
|
|
|
if (r)
|
|
|
goto error_free;
|
|
|
|
|
@@ -1696,10 +2058,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
|
|
|
amdgpu_asic_flush_hdp(adev, NULL);
|
|
|
}
|
|
|
|
|
|
- spin_lock(&vm->moved_lock);
|
|
|
- list_del_init(&bo_va->base.vm_status);
|
|
|
- spin_unlock(&vm->moved_lock);
|
|
|
-
|
|
|
/* If the BO is not in its preferred location add it back to
|
|
|
* the evicted list so that it gets validated again on the
|
|
|
* next command submission.
|
|
@@ -1708,9 +2066,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
|
|
|
uint32_t mem_type = bo->tbo.mem.mem_type;
|
|
|
|
|
|
if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type)))
|
|
|
- list_add_tail(&bo_va->base.vm_status, &vm->evicted);
|
|
|
+ amdgpu_vm_bo_evicted(&bo_va->base);
|
|
|
else
|
|
|
- list_add(&bo_va->base.vm_status, &vm->idle);
|
|
|
+ amdgpu_vm_bo_idle(&bo_va->base);
|
|
|
+ } else {
|
|
|
+ amdgpu_vm_bo_done(&bo_va->base);
|
|
|
}
|
|
|
|
|
|
list_splice_init(&bo_va->invalids, &bo_va->valids);
|
|
@@ -1895,7 +2255,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
|
|
|
struct amdgpu_bo_va_mapping, list);
|
|
|
list_del(&mapping->list);
|
|
|
|
|
|
- if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START)
|
|
|
+ if (vm->pte_support_ats &&
|
|
|
+ mapping->start < AMDGPU_GMC_HOLE_START)
|
|
|
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
|
|
|
|
|
|
r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
|
|
@@ -1936,40 +2297,40 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
|
|
|
struct amdgpu_vm *vm)
|
|
|
{
|
|
|
struct amdgpu_bo_va *bo_va, *tmp;
|
|
|
- struct list_head moved;
|
|
|
+ struct reservation_object *resv;
|
|
|
bool clear;
|
|
|
int r;
|
|
|
|
|
|
- INIT_LIST_HEAD(&moved);
|
|
|
- spin_lock(&vm->moved_lock);
|
|
|
- list_splice_init(&vm->moved, &moved);
|
|
|
- spin_unlock(&vm->moved_lock);
|
|
|
+ list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) {
|
|
|
+ /* Per VM BOs never need to bo cleared in the page tables */
|
|
|
+ r = amdgpu_vm_bo_update(adev, bo_va, false);
|
|
|
+ if (r)
|
|
|
+ return r;
|
|
|
+ }
|
|
|
|
|
|
- list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) {
|
|
|
- struct reservation_object *resv = bo_va->base.bo->tbo.resv;
|
|
|
+ spin_lock(&vm->invalidated_lock);
|
|
|
+ while (!list_empty(&vm->invalidated)) {
|
|
|
+ bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va,
|
|
|
+ base.vm_status);
|
|
|
+ resv = bo_va->base.bo->tbo.resv;
|
|
|
+ spin_unlock(&vm->invalidated_lock);
|
|
|
|
|
|
- /* Per VM BOs never need to bo cleared in the page tables */
|
|
|
- if (resv == vm->root.base.bo->tbo.resv)
|
|
|
- clear = false;
|
|
|
/* Try to reserve the BO to avoid clearing its ptes */
|
|
|
- else if (!amdgpu_vm_debug && reservation_object_trylock(resv))
|
|
|
+ if (!amdgpu_vm_debug && reservation_object_trylock(resv))
|
|
|
clear = false;
|
|
|
/* Somebody else is using the BO right now */
|
|
|
else
|
|
|
clear = true;
|
|
|
|
|
|
r = amdgpu_vm_bo_update(adev, bo_va, clear);
|
|
|
- if (r) {
|
|
|
- spin_lock(&vm->moved_lock);
|
|
|
- list_splice(&moved, &vm->moved);
|
|
|
- spin_unlock(&vm->moved_lock);
|
|
|
+ if (r)
|
|
|
return r;
|
|
|
- }
|
|
|
|
|
|
- if (!clear && resv != vm->root.base.bo->tbo.resv)
|
|
|
+ if (!clear)
|
|
|
reservation_object_unlock(resv);
|
|
|
-
|
|
|
+ spin_lock(&vm->invalidated_lock);
|
|
|
}
|
|
|
+ spin_unlock(&vm->invalidated_lock);
|
|
|
|
|
|
return 0;
|
|
|
}
|
|
@@ -2034,9 +2395,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
|
|
|
|
|
|
if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv &&
|
|
|
!bo_va->base.moved) {
|
|
|
- spin_lock(&vm->moved_lock);
|
|
|
list_move(&bo_va->base.vm_status, &vm->moved);
|
|
|
- spin_unlock(&vm->moved_lock);
|
|
|
}
|
|
|
trace_amdgpu_vm_bo_map(bo_va, mapping);
|
|
|
}
|
|
@@ -2388,13 +2747,27 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
|
|
|
struct amdgpu_bo_va *bo_va)
|
|
|
{
|
|
|
struct amdgpu_bo_va_mapping *mapping, *next;
|
|
|
+ struct amdgpu_bo *bo = bo_va->base.bo;
|
|
|
struct amdgpu_vm *vm = bo_va->base.vm;
|
|
|
+ struct amdgpu_vm_bo_base **base;
|
|
|
+
|
|
|
+ if (bo) {
|
|
|
+ if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
|
|
|
+ vm->bulk_moveable = false;
|
|
|
+
|
|
|
+ for (base = &bo_va->base.bo->vm_bo; *base;
|
|
|
+ base = &(*base)->next) {
|
|
|
+ if (*base != &bo_va->base)
|
|
|
+ continue;
|
|
|
|
|
|
- list_del(&bo_va->base.bo_list);
|
|
|
+ *base = bo_va->base.next;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- spin_lock(&vm->moved_lock);
|
|
|
+ spin_lock(&vm->invalidated_lock);
|
|
|
list_del(&bo_va->base.vm_status);
|
|
|
- spin_unlock(&vm->moved_lock);
|
|
|
+ spin_unlock(&vm->invalidated_lock);
|
|
|
|
|
|
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
|
|
|
list_del(&mapping->list);
|
|
@@ -2432,30 +2805,24 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
|
|
|
if (bo->parent && bo->parent->shadow == bo)
|
|
|
bo = bo->parent;
|
|
|
|
|
|
- list_for_each_entry(bo_base, &bo->va, bo_list) {
|
|
|
+ for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
|
|
|
struct amdgpu_vm *vm = bo_base->vm;
|
|
|
- bool was_moved = bo_base->moved;
|
|
|
|
|
|
- bo_base->moved = true;
|
|
|
if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
|
|
|
- if (bo->tbo.type == ttm_bo_type_kernel)
|
|
|
- list_move(&bo_base->vm_status, &vm->evicted);
|
|
|
- else
|
|
|
- list_move_tail(&bo_base->vm_status,
|
|
|
- &vm->evicted);
|
|
|
+ amdgpu_vm_bo_evicted(bo_base);
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
- if (was_moved)
|
|
|
+ if (bo_base->moved)
|
|
|
continue;
|
|
|
+ bo_base->moved = true;
|
|
|
|
|
|
- if (bo->tbo.type == ttm_bo_type_kernel) {
|
|
|
- list_move(&bo_base->vm_status, &vm->relocated);
|
|
|
- } else {
|
|
|
- spin_lock(&bo_base->vm->moved_lock);
|
|
|
- list_move(&bo_base->vm_status, &vm->moved);
|
|
|
- spin_unlock(&bo_base->vm->moved_lock);
|
|
|
- }
|
|
|
+ if (bo->tbo.type == ttm_bo_type_kernel)
|
|
|
+ amdgpu_vm_bo_relocated(bo_base);
|
|
|
+ else if (bo->tbo.resv == vm->root.base.bo->tbo.resv)
|
|
|
+ amdgpu_vm_bo_moved(bo_base);
|
|
|
+ else
|
|
|
+ amdgpu_vm_bo_invalidated(bo_base);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -2574,6 +2941,22 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size,
|
|
|
adev->vm_manager.fragment_size);
|
|
|
}
|
|
|
|
|
|
+static struct amdgpu_retryfault_hashtable *init_fault_hash(void)
|
|
|
+{
|
|
|
+ struct amdgpu_retryfault_hashtable *fault_hash;
|
|
|
+
|
|
|
+ fault_hash = kmalloc(sizeof(*fault_hash), GFP_KERNEL);
|
|
|
+ if (!fault_hash)
|
|
|
+ return fault_hash;
|
|
|
+
|
|
|
+ INIT_CHASH_TABLE(fault_hash->hash,
|
|
|
+ AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
|
|
|
+ spin_lock_init(&fault_hash->lock);
|
|
|
+ fault_hash->count = 0;
|
|
|
+
|
|
|
+ return fault_hash;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* amdgpu_vm_init - initialize a vm instance
|
|
|
*
|
|
@@ -2592,13 +2975,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|
|
{
|
|
|
struct amdgpu_bo_param bp;
|
|
|
struct amdgpu_bo *root;
|
|
|
- const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
|
|
|
- AMDGPU_VM_PTE_COUNT(adev) * 8);
|
|
|
- unsigned ring_instance;
|
|
|
- struct amdgpu_ring *ring;
|
|
|
- struct drm_sched_rq *rq;
|
|
|
- unsigned long size;
|
|
|
- uint64_t flags;
|
|
|
int r, i;
|
|
|
|
|
|
vm->va = RB_ROOT_CACHED;
|
|
@@ -2606,18 +2982,15 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|
|
vm->reserved_vmid[i] = NULL;
|
|
|
INIT_LIST_HEAD(&vm->evicted);
|
|
|
INIT_LIST_HEAD(&vm->relocated);
|
|
|
- spin_lock_init(&vm->moved_lock);
|
|
|
INIT_LIST_HEAD(&vm->moved);
|
|
|
INIT_LIST_HEAD(&vm->idle);
|
|
|
+ INIT_LIST_HEAD(&vm->invalidated);
|
|
|
+ spin_lock_init(&vm->invalidated_lock);
|
|
|
INIT_LIST_HEAD(&vm->freed);
|
|
|
|
|
|
/* create scheduler entity for page table updates */
|
|
|
-
|
|
|
- ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
|
|
|
- ring_instance %= adev->vm_manager.vm_pte_num_rings;
|
|
|
- ring = adev->vm_manager.vm_pte_rings[ring_instance];
|
|
|
- rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
|
|
|
- r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL);
|
|
|
+ r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs,
|
|
|
+ adev->vm_manager.vm_pte_num_rqs, NULL);
|
|
|
if (r)
|
|
|
return r;
|
|
|
|
|
@@ -2639,20 +3012,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|
|
"CPU update of VM recommended only for large BAR system\n");
|
|
|
vm->last_update = NULL;
|
|
|
|
|
|
- flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
|
|
|
- if (vm->use_cpu_for_update)
|
|
|
- flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
|
|
|
- else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE)
|
|
|
- flags |= AMDGPU_GEM_CREATE_SHADOW;
|
|
|
-
|
|
|
- size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
|
|
|
- memset(&bp, 0, sizeof(bp));
|
|
|
- bp.size = size;
|
|
|
- bp.byte_align = align;
|
|
|
- bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
|
|
|
- bp.flags = flags;
|
|
|
- bp.type = ttm_bo_type_kernel;
|
|
|
- bp.resv = NULL;
|
|
|
+ amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp);
|
|
|
+ if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
|
|
|
+ bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
|
|
|
r = amdgpu_bo_create(adev, &bp, &root);
|
|
|
if (r)
|
|
|
goto error_free_sched_entity;
|
|
@@ -2683,6 +3045,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
|
|
|
vm->pasid = pasid;
|
|
|
}
|
|
|
|
|
|
+ vm->fault_hash = init_fault_hash();
|
|
|
+ if (!vm->fault_hash) {
|
|
|
+ r = -ENOMEM;
|
|
|
+ goto error_free_root;
|
|
|
+ }
|
|
|
+
|
|
|
INIT_KFIFO(vm->faults);
|
|
|
vm->fault_credit = 16;
|
|
|
|
|
@@ -2722,7 +3090,7 @@ error_free_sched_entity:
|
|
|
* Returns:
|
|
|
* 0 for success, -errno for errors.
|
|
|
*/
|
|
|
-int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|
|
+int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid)
|
|
|
{
|
|
|
bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
|
|
|
int r;
|
|
@@ -2734,7 +3102,20 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|
|
/* Sanity checks */
|
|
|
if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) {
|
|
|
r = -EINVAL;
|
|
|
- goto error;
|
|
|
+ goto unreserve_bo;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (pasid) {
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
|
|
|
+ r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
|
|
|
+ GFP_ATOMIC);
|
|
|
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
|
|
|
+
|
|
|
+ if (r == -ENOSPC)
|
|
|
+ goto unreserve_bo;
|
|
|
+ r = 0;
|
|
|
}
|
|
|
|
|
|
/* Check if PD needs to be reinitialized and do it before
|
|
@@ -2745,7 +3126,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|
|
adev->vm_manager.root_level,
|
|
|
pte_support_ats);
|
|
|
if (r)
|
|
|
- goto error;
|
|
|
+ goto free_idr;
|
|
|
}
|
|
|
|
|
|
/* Update VM state */
|
|
@@ -2764,45 +3145,52 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|
|
idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
|
|
|
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
|
|
|
|
|
|
+ /* Free the original amdgpu allocated pasid
|
|
|
+ * Will be replaced with kfd allocated pasid
|
|
|
+ */
|
|
|
+ amdgpu_pasid_free(vm->pasid);
|
|
|
vm->pasid = 0;
|
|
|
}
|
|
|
|
|
|
/* Free the shadow bo for compute VM */
|
|
|
amdgpu_bo_unref(&vm->root.base.bo->shadow);
|
|
|
|
|
|
-error:
|
|
|
+ if (pasid)
|
|
|
+ vm->pasid = pasid;
|
|
|
+
|
|
|
+ goto unreserve_bo;
|
|
|
+
|
|
|
+free_idr:
|
|
|
+ if (pasid) {
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
|
|
|
+ idr_remove(&adev->vm_manager.pasid_idr, pasid);
|
|
|
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
|
|
|
+ }
|
|
|
+unreserve_bo:
|
|
|
amdgpu_bo_unreserve(vm->root.base.bo);
|
|
|
return r;
|
|
|
}
|
|
|
|
|
|
/**
|
|
|
- * amdgpu_vm_free_levels - free PD/PT levels
|
|
|
- *
|
|
|
- * @adev: amdgpu device structure
|
|
|
- * @parent: PD/PT starting level to free
|
|
|
- * @level: level of parent structure
|
|
|
+ * amdgpu_vm_release_compute - release a compute vm
|
|
|
+ * @adev: amdgpu_device pointer
|
|
|
+ * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
|
|
|
*
|
|
|
- * Free the page directory or page table level and all sub levels.
|
|
|
+ * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
|
|
|
+ * pasid from vm. Compute should stop use of vm after this call.
|
|
|
*/
|
|
|
-static void amdgpu_vm_free_levels(struct amdgpu_device *adev,
|
|
|
- struct amdgpu_vm_pt *parent,
|
|
|
- unsigned level)
|
|
|
+void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|
|
{
|
|
|
- unsigned i, num_entries = amdgpu_vm_num_entries(adev, level);
|
|
|
+ if (vm->pasid) {
|
|
|
+ unsigned long flags;
|
|
|
|
|
|
- if (parent->base.bo) {
|
|
|
- list_del(&parent->base.bo_list);
|
|
|
- list_del(&parent->base.vm_status);
|
|
|
- amdgpu_bo_unref(&parent->base.bo->shadow);
|
|
|
- amdgpu_bo_unref(&parent->base.bo);
|
|
|
+ spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
|
|
|
+ idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
|
|
|
+ spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
|
|
|
}
|
|
|
-
|
|
|
- if (parent->entries)
|
|
|
- for (i = 0; i < num_entries; i++)
|
|
|
- amdgpu_vm_free_levels(adev, &parent->entries[i],
|
|
|
- level + 1);
|
|
|
-
|
|
|
- kvfree(parent->entries);
|
|
|
+ vm->pasid = 0;
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -2826,7 +3214,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|
|
|
|
|
/* Clear pending page faults from IH when the VM is destroyed */
|
|
|
while (kfifo_get(&vm->faults, &fault))
|
|
|
- amdgpu_ih_clear_fault(adev, fault);
|
|
|
+ amdgpu_vm_clear_fault(vm->fault_hash, fault);
|
|
|
|
|
|
if (vm->pasid) {
|
|
|
unsigned long flags;
|
|
@@ -2836,6 +3224,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|
|
spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
|
|
|
}
|
|
|
|
|
|
+ kfree(vm->fault_hash);
|
|
|
+ vm->fault_hash = NULL;
|
|
|
+
|
|
|
drm_sched_entity_destroy(&vm->entity);
|
|
|
|
|
|
if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
|
|
@@ -2862,8 +3253,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
|
|
|
if (r) {
|
|
|
dev_err(adev->dev, "Leaking page tables because BO reservation failed\n");
|
|
|
} else {
|
|
|
- amdgpu_vm_free_levels(adev, &vm->root,
|
|
|
- adev->vm_manager.root_level);
|
|
|
+ amdgpu_vm_free_pts(adev, vm);
|
|
|
amdgpu_bo_unreserve(root);
|
|
|
}
|
|
|
amdgpu_bo_unref(&root);
|
|
@@ -2926,7 +3316,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
|
|
|
for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
|
|
|
adev->vm_manager.seqno[i] = 0;
|
|
|
|
|
|
- atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
|
|
|
spin_lock_init(&adev->vm_manager.prt_lock);
|
|
|
atomic_set(&adev->vm_manager.num_prt_users, 0);
|
|
|
|
|
@@ -3002,7 +3391,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
|
|
/**
|
|
|
* amdgpu_vm_get_task_info - Extracts task info for a PASID.
|
|
|
*
|
|
|
- * @dev: drm device pointer
|
|
|
+ * @adev: drm device pointer
|
|
|
* @pasid: PASID identifier for VM
|
|
|
* @task_info: task_info to fill.
|
|
|
*/
|
|
@@ -3037,3 +3426,78 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_add_fault - Add a page fault record to fault hash table
|
|
|
+ *
|
|
|
+ * @fault_hash: fault hash table
|
|
|
+ * @key: 64-bit encoding of PASID and address
|
|
|
+ *
|
|
|
+ * This should be called when a retry page fault interrupt is
|
|
|
+ * received. If this is a new page fault, it will be added to a hash
|
|
|
+ * table. The return value indicates whether this is a new fault, or
|
|
|
+ * a fault that was already known and is already being handled.
|
|
|
+ *
|
|
|
+ * If there are too many pending page faults, this will fail. Retry
|
|
|
+ * interrupts should be ignored in this case until there is enough
|
|
|
+ * free space.
|
|
|
+ *
|
|
|
+ * Returns 0 if the fault was added, 1 if the fault was already known,
|
|
|
+ * -ENOSPC if there are too many pending faults.
|
|
|
+ */
|
|
|
+int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+ int r = -ENOSPC;
|
|
|
+
|
|
|
+ if (WARN_ON_ONCE(!fault_hash))
|
|
|
+ /* Should be allocated in amdgpu_vm_init
|
|
|
+ */
|
|
|
+ return r;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&fault_hash->lock, flags);
|
|
|
+
|
|
|
+ /* Only let the hash table fill up to 50% for best performance */
|
|
|
+ if (fault_hash->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
|
|
|
+ goto unlock_out;
|
|
|
+
|
|
|
+ r = chash_table_copy_in(&fault_hash->hash, key, NULL);
|
|
|
+ if (!r)
|
|
|
+ fault_hash->count++;
|
|
|
+
|
|
|
+ /* chash_table_copy_in should never fail unless we're losing count */
|
|
|
+ WARN_ON_ONCE(r < 0);
|
|
|
+
|
|
|
+unlock_out:
|
|
|
+ spin_unlock_irqrestore(&fault_hash->lock, flags);
|
|
|
+ return r;
|
|
|
+}
|
|
|
+
|
|
|
+/**
|
|
|
+ * amdgpu_vm_clear_fault - Remove a page fault record
|
|
|
+ *
|
|
|
+ * @fault_hash: fault hash table
|
|
|
+ * @key: 64-bit encoding of PASID and address
|
|
|
+ *
|
|
|
+ * This should be called when a page fault has been handled. Any
|
|
|
+ * future interrupt with this key will be processed as a new
|
|
|
+ * page fault.
|
|
|
+ */
|
|
|
+void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+ int r;
|
|
|
+
|
|
|
+ if (!fault_hash)
|
|
|
+ return;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&fault_hash->lock, flags);
|
|
|
+
|
|
|
+ r = chash_table_remove(&fault_hash->hash, key, NULL);
|
|
|
+ if (!WARN_ON_ONCE(r < 0)) {
|
|
|
+ fault_hash->count--;
|
|
|
+ WARN_ON_ONCE(fault_hash->count < 0);
|
|
|
+ }
|
|
|
+
|
|
|
+ spin_unlock_irqrestore(&fault_hash->lock, flags);
|
|
|
+}
|