|
@@ -35,6 +35,7 @@
|
|
|
#include <linux/msi.h>
|
|
|
#include <linux/dma-contiguous.h>
|
|
|
#include <linux/irqdomain.h>
|
|
|
+#include <linux/percpu.h>
|
|
|
#include <asm/irq_remapping.h>
|
|
|
#include <asm/io_apic.h>
|
|
|
#include <asm/apic.h>
|
|
@@ -114,6 +115,45 @@ struct kmem_cache *amd_iommu_irq_cache;
|
|
|
static void update_domain(struct protection_domain *domain);
|
|
|
static int protection_domain_init(struct protection_domain *domain);
|
|
|
|
|
|
+/*
|
|
|
+ * For dynamic growth the aperture size is split into ranges of 128MB of
|
|
|
+ * DMA address space each. This struct represents one such range.
|
|
|
+ */
|
|
|
+struct aperture_range {
|
|
|
+
|
|
|
+ spinlock_t bitmap_lock;
|
|
|
+
|
|
|
+ /* address allocation bitmap */
|
|
|
+ unsigned long *bitmap;
|
|
|
+ unsigned long offset;
|
|
|
+ unsigned long next_bit;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Array of PTE pages for the aperture. In this array we save all the
|
|
|
+ * leaf pages of the domain page table used for the aperture. This way
|
|
|
+ * we don't need to walk the page table to find a specific PTE. We can
|
|
|
+ * just calculate its address in constant time.
|
|
|
+ */
|
|
|
+ u64 *pte_pages[64];
|
|
|
+};
|
|
|
+
|
|
|
+/*
|
|
|
+ * Data container for a dma_ops specific protection domain
|
|
|
+ */
|
|
|
+struct dma_ops_domain {
|
|
|
+ /* generic protection domain information */
|
|
|
+ struct protection_domain domain;
|
|
|
+
|
|
|
+ /* size of the aperture for the mappings */
|
|
|
+ unsigned long aperture_size;
|
|
|
+
|
|
|
+ /* aperture index we start searching for free addresses */
|
|
|
+ u32 __percpu *next_index;
|
|
|
+
|
|
|
+ /* address space relevant data */
|
|
|
+ struct aperture_range *aperture[APERTURE_MAX_RANGES];
|
|
|
+};
|
|
|
+
|
|
|
/****************************************************************************
|
|
|
*
|
|
|
* Helper functions
|
|
@@ -1167,11 +1207,21 @@ static u64 *alloc_pte(struct protection_domain *domain,
|
|
|
end_lvl = PAGE_SIZE_LEVEL(page_size);
|
|
|
|
|
|
while (level > end_lvl) {
|
|
|
- if (!IOMMU_PTE_PRESENT(*pte)) {
|
|
|
+ u64 __pte, __npte;
|
|
|
+
|
|
|
+ __pte = *pte;
|
|
|
+
|
|
|
+ if (!IOMMU_PTE_PRESENT(__pte)) {
|
|
|
page = (u64 *)get_zeroed_page(gfp);
|
|
|
if (!page)
|
|
|
return NULL;
|
|
|
- *pte = PM_LEVEL_PDE(level, virt_to_phys(page));
|
|
|
+
|
|
|
+ __npte = PM_LEVEL_PDE(level, virt_to_phys(page));
|
|
|
+
|
|
|
+ if (cmpxchg64(pte, __pte, __npte)) {
|
|
|
+ free_page((unsigned long)page);
|
|
|
+ continue;
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
/* No level skipping support yet */
|
|
@@ -1376,8 +1426,10 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
|
|
|
bool populate, gfp_t gfp)
|
|
|
{
|
|
|
int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
|
|
|
- struct amd_iommu *iommu;
|
|
|
unsigned long i, old_size, pte_pgsize;
|
|
|
+ struct aperture_range *range;
|
|
|
+ struct amd_iommu *iommu;
|
|
|
+ unsigned long flags;
|
|
|
|
|
|
#ifdef CONFIG_IOMMU_STRESS
|
|
|
populate = false;
|
|
@@ -1386,15 +1438,17 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
|
|
|
if (index >= APERTURE_MAX_RANGES)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
- dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp);
|
|
|
- if (!dma_dom->aperture[index])
|
|
|
+ range = kzalloc(sizeof(struct aperture_range), gfp);
|
|
|
+ if (!range)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
- dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp);
|
|
|
- if (!dma_dom->aperture[index]->bitmap)
|
|
|
+ range->bitmap = (void *)get_zeroed_page(gfp);
|
|
|
+ if (!range->bitmap)
|
|
|
goto out_free;
|
|
|
|
|
|
- dma_dom->aperture[index]->offset = dma_dom->aperture_size;
|
|
|
+ range->offset = dma_dom->aperture_size;
|
|
|
+
|
|
|
+ spin_lock_init(&range->bitmap_lock);
|
|
|
|
|
|
if (populate) {
|
|
|
unsigned long address = dma_dom->aperture_size;
|
|
@@ -1407,14 +1461,20 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
|
|
|
if (!pte)
|
|
|
goto out_free;
|
|
|
|
|
|
- dma_dom->aperture[index]->pte_pages[i] = pte_page;
|
|
|
+ range->pte_pages[i] = pte_page;
|
|
|
|
|
|
address += APERTURE_RANGE_SIZE / 64;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
- old_size = dma_dom->aperture_size;
|
|
|
- dma_dom->aperture_size += APERTURE_RANGE_SIZE;
|
|
|
+ spin_lock_irqsave(&dma_dom->domain.lock, flags);
|
|
|
+
|
|
|
+ /* First take the bitmap_lock and then publish the range */
|
|
|
+ spin_lock(&range->bitmap_lock);
|
|
|
+
|
|
|
+ old_size = dma_dom->aperture_size;
|
|
|
+ dma_dom->aperture[index] = range;
|
|
|
+ dma_dom->aperture_size += APERTURE_RANGE_SIZE;
|
|
|
|
|
|
/* Reserve address range used for MSI messages */
|
|
|
if (old_size < MSI_ADDR_BASE_LO &&
|
|
@@ -1461,62 +1521,123 @@ static int alloc_new_range(struct dma_ops_domain *dma_dom,
|
|
|
|
|
|
update_domain(&dma_dom->domain);
|
|
|
|
|
|
+ spin_unlock(&range->bitmap_lock);
|
|
|
+
|
|
|
+ spin_unlock_irqrestore(&dma_dom->domain.lock, flags);
|
|
|
+
|
|
|
return 0;
|
|
|
|
|
|
out_free:
|
|
|
update_domain(&dma_dom->domain);
|
|
|
|
|
|
- free_page((unsigned long)dma_dom->aperture[index]->bitmap);
|
|
|
+ free_page((unsigned long)range->bitmap);
|
|
|
|
|
|
- kfree(dma_dom->aperture[index]);
|
|
|
- dma_dom->aperture[index] = NULL;
|
|
|
+ kfree(range);
|
|
|
|
|
|
return -ENOMEM;
|
|
|
}
|
|
|
|
|
|
+static dma_addr_t dma_ops_aperture_alloc(struct dma_ops_domain *dom,
|
|
|
+ struct aperture_range *range,
|
|
|
+ unsigned long pages,
|
|
|
+ unsigned long dma_mask,
|
|
|
+ unsigned long boundary_size,
|
|
|
+ unsigned long align_mask,
|
|
|
+ bool trylock)
|
|
|
+{
|
|
|
+ unsigned long offset, limit, flags;
|
|
|
+ dma_addr_t address;
|
|
|
+ bool flush = false;
|
|
|
+
|
|
|
+ offset = range->offset >> PAGE_SHIFT;
|
|
|
+ limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
|
|
|
+ dma_mask >> PAGE_SHIFT);
|
|
|
+
|
|
|
+ if (trylock) {
|
|
|
+ if (!spin_trylock_irqsave(&range->bitmap_lock, flags))
|
|
|
+ return -1;
|
|
|
+ } else {
|
|
|
+ spin_lock_irqsave(&range->bitmap_lock, flags);
|
|
|
+ }
|
|
|
+
|
|
|
+ address = iommu_area_alloc(range->bitmap, limit, range->next_bit,
|
|
|
+ pages, offset, boundary_size, align_mask);
|
|
|
+ if (address == -1) {
|
|
|
+ /* Nothing found, retry one time */
|
|
|
+ address = iommu_area_alloc(range->bitmap, limit,
|
|
|
+ 0, pages, offset, boundary_size,
|
|
|
+ align_mask);
|
|
|
+ flush = true;
|
|
|
+ }
|
|
|
+
|
|
|
+ if (address != -1)
|
|
|
+ range->next_bit = address + pages;
|
|
|
+
|
|
|
+ spin_unlock_irqrestore(&range->bitmap_lock, flags);
|
|
|
+
|
|
|
+ if (flush) {
|
|
|
+ domain_flush_tlb(&dom->domain);
|
|
|
+ domain_flush_complete(&dom->domain);
|
|
|
+ }
|
|
|
+
|
|
|
+ return address;
|
|
|
+}
|
|
|
+
|
|
|
static unsigned long dma_ops_area_alloc(struct device *dev,
|
|
|
struct dma_ops_domain *dom,
|
|
|
unsigned int pages,
|
|
|
unsigned long align_mask,
|
|
|
- u64 dma_mask,
|
|
|
- unsigned long start)
|
|
|
+ u64 dma_mask)
|
|
|
{
|
|
|
- unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE;
|
|
|
- int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT;
|
|
|
- int i = start >> APERTURE_RANGE_SHIFT;
|
|
|
unsigned long boundary_size, mask;
|
|
|
unsigned long address = -1;
|
|
|
- unsigned long limit;
|
|
|
+ bool first = true;
|
|
|
+ u32 start, i;
|
|
|
|
|
|
- next_bit >>= PAGE_SHIFT;
|
|
|
+ preempt_disable();
|
|
|
|
|
|
mask = dma_get_seg_boundary(dev);
|
|
|
|
|
|
+again:
|
|
|
+ start = this_cpu_read(*dom->next_index);
|
|
|
+
|
|
|
+ /* Sanity check - is it really necessary? */
|
|
|
+ if (unlikely(start > APERTURE_MAX_RANGES)) {
|
|
|
+ start = 0;
|
|
|
+ this_cpu_write(*dom->next_index, 0);
|
|
|
+ }
|
|
|
+
|
|
|
boundary_size = mask + 1 ? ALIGN(mask + 1, PAGE_SIZE) >> PAGE_SHIFT :
|
|
|
1UL << (BITS_PER_LONG - PAGE_SHIFT);
|
|
|
|
|
|
- for (;i < max_index; ++i) {
|
|
|
- unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT;
|
|
|
+ for (i = 0; i < APERTURE_MAX_RANGES; ++i) {
|
|
|
+ struct aperture_range *range;
|
|
|
+ int index;
|
|
|
|
|
|
- if (dom->aperture[i]->offset >= dma_mask)
|
|
|
- break;
|
|
|
+ index = (start + i) % APERTURE_MAX_RANGES;
|
|
|
|
|
|
- limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset,
|
|
|
- dma_mask >> PAGE_SHIFT);
|
|
|
+ range = dom->aperture[index];
|
|
|
|
|
|
- address = iommu_area_alloc(dom->aperture[i]->bitmap,
|
|
|
- limit, next_bit, pages, 0,
|
|
|
- boundary_size, align_mask);
|
|
|
+ if (!range || range->offset >= dma_mask)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ address = dma_ops_aperture_alloc(dom, range, pages,
|
|
|
+ dma_mask, boundary_size,
|
|
|
+ align_mask, first);
|
|
|
if (address != -1) {
|
|
|
- address = dom->aperture[i]->offset +
|
|
|
- (address << PAGE_SHIFT);
|
|
|
- dom->next_address = address + (pages << PAGE_SHIFT);
|
|
|
+ address = range->offset + (address << PAGE_SHIFT);
|
|
|
+ this_cpu_write(*dom->next_index, index);
|
|
|
break;
|
|
|
}
|
|
|
+ }
|
|
|
|
|
|
- next_bit = 0;
|
|
|
+ if (address == -1 && first) {
|
|
|
+ first = false;
|
|
|
+ goto again;
|
|
|
}
|
|
|
|
|
|
+ preempt_enable();
|
|
|
+
|
|
|
return address;
|
|
|
}
|
|
|
|
|
@@ -1526,21 +1647,14 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev,
|
|
|
unsigned long align_mask,
|
|
|
u64 dma_mask)
|
|
|
{
|
|
|
- unsigned long address;
|
|
|
-
|
|
|
-#ifdef CONFIG_IOMMU_STRESS
|
|
|
- dom->next_address = 0;
|
|
|
- dom->need_flush = true;
|
|
|
-#endif
|
|
|
+ unsigned long address = -1;
|
|
|
|
|
|
- address = dma_ops_area_alloc(dev, dom, pages, align_mask,
|
|
|
- dma_mask, dom->next_address);
|
|
|
+ while (address == -1) {
|
|
|
+ address = dma_ops_area_alloc(dev, dom, pages,
|
|
|
+ align_mask, dma_mask);
|
|
|
|
|
|
- if (address == -1) {
|
|
|
- dom->next_address = 0;
|
|
|
- address = dma_ops_area_alloc(dev, dom, pages, align_mask,
|
|
|
- dma_mask, 0);
|
|
|
- dom->need_flush = true;
|
|
|
+ if (address == -1 && alloc_new_range(dom, false, GFP_ATOMIC))
|
|
|
+ break;
|
|
|
}
|
|
|
|
|
|
if (unlikely(address == -1))
|
|
@@ -1562,6 +1676,7 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
|
|
|
{
|
|
|
unsigned i = address >> APERTURE_RANGE_SHIFT;
|
|
|
struct aperture_range *range = dom->aperture[i];
|
|
|
+ unsigned long flags;
|
|
|
|
|
|
BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL);
|
|
|
|
|
@@ -1570,12 +1685,18 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom,
|
|
|
return;
|
|
|
#endif
|
|
|
|
|
|
- if (address >= dom->next_address)
|
|
|
- dom->need_flush = true;
|
|
|
+ if (amd_iommu_unmap_flush) {
|
|
|
+ domain_flush_tlb(&dom->domain);
|
|
|
+ domain_flush_complete(&dom->domain);
|
|
|
+ }
|
|
|
|
|
|
address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT;
|
|
|
|
|
|
+ spin_lock_irqsave(&range->bitmap_lock, flags);
|
|
|
+ if (address + pages > range->next_bit)
|
|
|
+ range->next_bit = address + pages;
|
|
|
bitmap_clear(range->bitmap, address, pages);
|
|
|
+ spin_unlock_irqrestore(&range->bitmap_lock, flags);
|
|
|
|
|
|
}
|
|
|
|
|
@@ -1755,6 +1876,8 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
|
|
|
if (!dom)
|
|
|
return;
|
|
|
|
|
|
+ free_percpu(dom->next_index);
|
|
|
+
|
|
|
del_domain_from_list(&dom->domain);
|
|
|
|
|
|
free_pagetable(&dom->domain);
|
|
@@ -1769,6 +1892,23 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
|
|
|
kfree(dom);
|
|
|
}
|
|
|
|
|
|
+static int dma_ops_domain_alloc_apertures(struct dma_ops_domain *dma_dom,
|
|
|
+ int max_apertures)
|
|
|
+{
|
|
|
+ int ret, i, apertures;
|
|
|
+
|
|
|
+ apertures = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT;
|
|
|
+ ret = 0;
|
|
|
+
|
|
|
+ for (i = apertures; i < max_apertures; ++i) {
|
|
|
+ ret = alloc_new_range(dma_dom, false, GFP_KERNEL);
|
|
|
+ if (ret)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+
|
|
|
+ return ret;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* Allocates a new protection domain usable for the dma_ops functions.
|
|
|
* It also initializes the page table and the address allocator data
|
|
@@ -1777,6 +1917,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
|
|
|
static struct dma_ops_domain *dma_ops_domain_alloc(void)
|
|
|
{
|
|
|
struct dma_ops_domain *dma_dom;
|
|
|
+ int cpu;
|
|
|
|
|
|
dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL);
|
|
|
if (!dma_dom)
|
|
@@ -1785,6 +1926,10 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
|
|
|
if (protection_domain_init(&dma_dom->domain))
|
|
|
goto free_dma_dom;
|
|
|
|
|
|
+ dma_dom->next_index = alloc_percpu(u32);
|
|
|
+ if (!dma_dom->next_index)
|
|
|
+ goto free_dma_dom;
|
|
|
+
|
|
|
dma_dom->domain.mode = PAGE_MODE_2_LEVEL;
|
|
|
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
|
|
|
dma_dom->domain.flags = PD_DMA_OPS_MASK;
|
|
@@ -1792,8 +1937,6 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
|
|
|
if (!dma_dom->domain.pt_root)
|
|
|
goto free_dma_dom;
|
|
|
|
|
|
- dma_dom->need_flush = false;
|
|
|
-
|
|
|
add_domain_to_list(&dma_dom->domain);
|
|
|
|
|
|
if (alloc_new_range(dma_dom, true, GFP_KERNEL))
|
|
@@ -1804,8 +1947,9 @@ static struct dma_ops_domain *dma_ops_domain_alloc(void)
|
|
|
* a valid dma-address. So we can use 0 as error value
|
|
|
*/
|
|
|
dma_dom->aperture[0]->bitmap[0] = 1;
|
|
|
- dma_dom->next_address = 0;
|
|
|
|
|
|
+ for_each_possible_cpu(cpu)
|
|
|
+ *per_cpu_ptr(dma_dom->next_index, cpu) = 0;
|
|
|
|
|
|
return dma_dom;
|
|
|
|
|
@@ -2328,7 +2472,7 @@ static dma_addr_t dma_ops_domain_map(struct dma_ops_domain *dom,
|
|
|
else if (direction == DMA_BIDIRECTIONAL)
|
|
|
__pte |= IOMMU_PTE_IR | IOMMU_PTE_IW;
|
|
|
|
|
|
- WARN_ON(*pte);
|
|
|
+ WARN_ON_ONCE(*pte);
|
|
|
|
|
|
*pte = __pte;
|
|
|
|
|
@@ -2357,7 +2501,7 @@ static void dma_ops_domain_unmap(struct dma_ops_domain *dom,
|
|
|
|
|
|
pte += PM_LEVEL_INDEX(0, address);
|
|
|
|
|
|
- WARN_ON(!*pte);
|
|
|
+ WARN_ON_ONCE(!*pte);
|
|
|
|
|
|
*pte = 0ULL;
|
|
|
}
|
|
@@ -2393,26 +2537,11 @@ static dma_addr_t __map_single(struct device *dev,
|
|
|
if (align)
|
|
|
align_mask = (1UL << get_order(size)) - 1;
|
|
|
|
|
|
-retry:
|
|
|
address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask,
|
|
|
dma_mask);
|
|
|
- if (unlikely(address == DMA_ERROR_CODE)) {
|
|
|
- /*
|
|
|
- * setting next_address here will let the address
|
|
|
- * allocator only scan the new allocated range in the
|
|
|
- * first run. This is a small optimization.
|
|
|
- */
|
|
|
- dma_dom->next_address = dma_dom->aperture_size;
|
|
|
|
|
|
- if (alloc_new_range(dma_dom, false, GFP_ATOMIC))
|
|
|
- goto out;
|
|
|
-
|
|
|
- /*
|
|
|
- * aperture was successfully enlarged by 128 MB, try
|
|
|
- * allocation again
|
|
|
- */
|
|
|
- goto retry;
|
|
|
- }
|
|
|
+ if (address == DMA_ERROR_CODE)
|
|
|
+ goto out;
|
|
|
|
|
|
start = address;
|
|
|
for (i = 0; i < pages; ++i) {
|
|
@@ -2427,11 +2556,10 @@ retry:
|
|
|
|
|
|
ADD_STATS_COUNTER(alloced_io_mem, size);
|
|
|
|
|
|
- if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
|
|
|
- domain_flush_tlb(&dma_dom->domain);
|
|
|
- dma_dom->need_flush = false;
|
|
|
- } else if (unlikely(amd_iommu_np_cache))
|
|
|
+ if (unlikely(amd_iommu_np_cache)) {
|
|
|
domain_flush_pages(&dma_dom->domain, address, size);
|
|
|
+ domain_flush_complete(&dma_dom->domain);
|
|
|
+ }
|
|
|
|
|
|
out:
|
|
|
return address;
|
|
@@ -2478,11 +2606,6 @@ static void __unmap_single(struct dma_ops_domain *dma_dom,
|
|
|
SUB_STATS_COUNTER(alloced_io_mem, size);
|
|
|
|
|
|
dma_ops_free_addresses(dma_dom, dma_addr, pages);
|
|
|
-
|
|
|
- if (amd_iommu_unmap_flush || dma_dom->need_flush) {
|
|
|
- domain_flush_pages(&dma_dom->domain, flush_addr, size);
|
|
|
- dma_dom->need_flush = false;
|
|
|
- }
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2493,11 +2616,9 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
|
|
|
enum dma_data_direction dir,
|
|
|
struct dma_attrs *attrs)
|
|
|
{
|
|
|
- unsigned long flags;
|
|
|
+ phys_addr_t paddr = page_to_phys(page) + offset;
|
|
|
struct protection_domain *domain;
|
|
|
- dma_addr_t addr;
|
|
|
u64 dma_mask;
|
|
|
- phys_addr_t paddr = page_to_phys(page) + offset;
|
|
|
|
|
|
INC_STATS_COUNTER(cnt_map_single);
|
|
|
|
|
@@ -2509,19 +2630,8 @@ static dma_addr_t map_page(struct device *dev, struct page *page,
|
|
|
|
|
|
dma_mask = *dev->dma_mask;
|
|
|
|
|
|
- spin_lock_irqsave(&domain->lock, flags);
|
|
|
-
|
|
|
- addr = __map_single(dev, domain->priv, paddr, size, dir, false,
|
|
|
+ return __map_single(dev, domain->priv, paddr, size, dir, false,
|
|
|
dma_mask);
|
|
|
- if (addr == DMA_ERROR_CODE)
|
|
|
- goto out;
|
|
|
-
|
|
|
- domain_flush_complete(domain);
|
|
|
-
|
|
|
-out:
|
|
|
- spin_unlock_irqrestore(&domain->lock, flags);
|
|
|
-
|
|
|
- return addr;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2530,7 +2640,6 @@ out:
|
|
|
static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
|
|
|
enum dma_data_direction dir, struct dma_attrs *attrs)
|
|
|
{
|
|
|
- unsigned long flags;
|
|
|
struct protection_domain *domain;
|
|
|
|
|
|
INC_STATS_COUNTER(cnt_unmap_single);
|
|
@@ -2539,13 +2648,7 @@ static void unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
|
|
|
if (IS_ERR(domain))
|
|
|
return;
|
|
|
|
|
|
- spin_lock_irqsave(&domain->lock, flags);
|
|
|
-
|
|
|
__unmap_single(domain->priv, dma_addr, size, dir);
|
|
|
-
|
|
|
- domain_flush_complete(domain);
|
|
|
-
|
|
|
- spin_unlock_irqrestore(&domain->lock, flags);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2556,7 +2659,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
|
|
|
int nelems, enum dma_data_direction dir,
|
|
|
struct dma_attrs *attrs)
|
|
|
{
|
|
|
- unsigned long flags;
|
|
|
struct protection_domain *domain;
|
|
|
int i;
|
|
|
struct scatterlist *s;
|
|
@@ -2572,8 +2674,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
|
|
|
|
|
|
dma_mask = *dev->dma_mask;
|
|
|
|
|
|
- spin_lock_irqsave(&domain->lock, flags);
|
|
|
-
|
|
|
for_each_sg(sglist, s, nelems, i) {
|
|
|
paddr = sg_phys(s);
|
|
|
|
|
@@ -2588,12 +2688,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
|
|
|
goto unmap;
|
|
|
}
|
|
|
|
|
|
- domain_flush_complete(domain);
|
|
|
-
|
|
|
-out:
|
|
|
- spin_unlock_irqrestore(&domain->lock, flags);
|
|
|
-
|
|
|
return mapped_elems;
|
|
|
+
|
|
|
unmap:
|
|
|
for_each_sg(sglist, s, mapped_elems, i) {
|
|
|
if (s->dma_address)
|
|
@@ -2602,9 +2698,7 @@ unmap:
|
|
|
s->dma_address = s->dma_length = 0;
|
|
|
}
|
|
|
|
|
|
- mapped_elems = 0;
|
|
|
-
|
|
|
- goto out;
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2615,7 +2709,6 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
|
|
|
int nelems, enum dma_data_direction dir,
|
|
|
struct dma_attrs *attrs)
|
|
|
{
|
|
|
- unsigned long flags;
|
|
|
struct protection_domain *domain;
|
|
|
struct scatterlist *s;
|
|
|
int i;
|
|
@@ -2626,17 +2719,11 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
|
|
|
if (IS_ERR(domain))
|
|
|
return;
|
|
|
|
|
|
- spin_lock_irqsave(&domain->lock, flags);
|
|
|
-
|
|
|
for_each_sg(sglist, s, nelems, i) {
|
|
|
__unmap_single(domain->priv, s->dma_address,
|
|
|
s->dma_length, dir);
|
|
|
s->dma_address = s->dma_length = 0;
|
|
|
}
|
|
|
-
|
|
|
- domain_flush_complete(domain);
|
|
|
-
|
|
|
- spin_unlock_irqrestore(&domain->lock, flags);
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -2648,7 +2735,6 @@ static void *alloc_coherent(struct device *dev, size_t size,
|
|
|
{
|
|
|
u64 dma_mask = dev->coherent_dma_mask;
|
|
|
struct protection_domain *domain;
|
|
|
- unsigned long flags;
|
|
|
struct page *page;
|
|
|
|
|
|
INC_STATS_COUNTER(cnt_alloc_coherent);
|
|
@@ -2680,19 +2766,11 @@ static void *alloc_coherent(struct device *dev, size_t size,
|
|
|
if (!dma_mask)
|
|
|
dma_mask = *dev->dma_mask;
|
|
|
|
|
|
- spin_lock_irqsave(&domain->lock, flags);
|
|
|
-
|
|
|
*dma_addr = __map_single(dev, domain->priv, page_to_phys(page),
|
|
|
size, DMA_BIDIRECTIONAL, true, dma_mask);
|
|
|
|
|
|
- if (*dma_addr == DMA_ERROR_CODE) {
|
|
|
- spin_unlock_irqrestore(&domain->lock, flags);
|
|
|
+ if (*dma_addr == DMA_ERROR_CODE)
|
|
|
goto out_free;
|
|
|
- }
|
|
|
-
|
|
|
- domain_flush_complete(domain);
|
|
|
-
|
|
|
- spin_unlock_irqrestore(&domain->lock, flags);
|
|
|
|
|
|
return page_address(page);
|
|
|
|
|
@@ -2712,7 +2790,6 @@ static void free_coherent(struct device *dev, size_t size,
|
|
|
struct dma_attrs *attrs)
|
|
|
{
|
|
|
struct protection_domain *domain;
|
|
|
- unsigned long flags;
|
|
|
struct page *page;
|
|
|
|
|
|
INC_STATS_COUNTER(cnt_free_coherent);
|
|
@@ -2724,14 +2801,8 @@ static void free_coherent(struct device *dev, size_t size,
|
|
|
if (IS_ERR(domain))
|
|
|
goto free_mem;
|
|
|
|
|
|
- spin_lock_irqsave(&domain->lock, flags);
|
|
|
-
|
|
|
__unmap_single(domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
|
|
|
|
|
|
- domain_flush_complete(domain);
|
|
|
-
|
|
|
- spin_unlock_irqrestore(&domain->lock, flags);
|
|
|
-
|
|
|
free_mem:
|
|
|
if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT))
|
|
|
__free_pages(page, get_order(size));
|
|
@@ -2746,14 +2817,43 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
|
|
|
return check_device(dev);
|
|
|
}
|
|
|
|
|
|
+static int set_dma_mask(struct device *dev, u64 mask)
|
|
|
+{
|
|
|
+ struct protection_domain *domain;
|
|
|
+ int max_apertures = 1;
|
|
|
+
|
|
|
+ domain = get_domain(dev);
|
|
|
+ if (IS_ERR(domain))
|
|
|
+ return PTR_ERR(domain);
|
|
|
+
|
|
|
+ if (mask == DMA_BIT_MASK(64))
|
|
|
+ max_apertures = 8;
|
|
|
+ else if (mask > DMA_BIT_MASK(32))
|
|
|
+ max_apertures = 4;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * To prevent lock contention it doesn't make sense to allocate more
|
|
|
+ * apertures than online cpus
|
|
|
+ */
|
|
|
+ if (max_apertures > num_online_cpus())
|
|
|
+ max_apertures = num_online_cpus();
|
|
|
+
|
|
|
+ if (dma_ops_domain_alloc_apertures(domain->priv, max_apertures))
|
|
|
+ dev_err(dev, "Can't allocate %d iommu apertures\n",
|
|
|
+ max_apertures);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
static struct dma_map_ops amd_iommu_dma_ops = {
|
|
|
- .alloc = alloc_coherent,
|
|
|
- .free = free_coherent,
|
|
|
- .map_page = map_page,
|
|
|
- .unmap_page = unmap_page,
|
|
|
- .map_sg = map_sg,
|
|
|
- .unmap_sg = unmap_sg,
|
|
|
- .dma_supported = amd_iommu_dma_supported,
|
|
|
+ .alloc = alloc_coherent,
|
|
|
+ .free = free_coherent,
|
|
|
+ .map_page = map_page,
|
|
|
+ .unmap_page = unmap_page,
|
|
|
+ .map_sg = map_sg,
|
|
|
+ .unmap_sg = unmap_sg,
|
|
|
+ .dma_supported = amd_iommu_dma_supported,
|
|
|
+ .set_dma_mask = set_dma_mask,
|
|
|
};
|
|
|
|
|
|
int __init amd_iommu_init_api(void)
|
|
@@ -3757,11 +3857,9 @@ static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
|
|
|
case X86_IRQ_ALLOC_TYPE_MSI:
|
|
|
case X86_IRQ_ALLOC_TYPE_MSIX:
|
|
|
devid = get_device_id(&info->msi_dev->dev);
|
|
|
- if (devid >= 0) {
|
|
|
- iommu = amd_iommu_rlookup_table[devid];
|
|
|
- if (iommu)
|
|
|
- return iommu->msi_domain;
|
|
|
- }
|
|
|
+ iommu = amd_iommu_rlookup_table[devid];
|
|
|
+ if (iommu)
|
|
|
+ return iommu->msi_domain;
|
|
|
break;
|
|
|
default:
|
|
|
break;
|