|
@@ -42,6 +42,55 @@
|
|
|
#include "dma.h"
|
|
|
#include "mm.h"
|
|
|
|
|
|
+struct arm_dma_alloc_args {
|
|
|
+ struct device *dev;
|
|
|
+ size_t size;
|
|
|
+ gfp_t gfp;
|
|
|
+ pgprot_t prot;
|
|
|
+ const void *caller;
|
|
|
+ bool want_vaddr;
|
|
|
+};
|
|
|
+
|
|
|
+struct arm_dma_free_args {
|
|
|
+ struct device *dev;
|
|
|
+ size_t size;
|
|
|
+ void *cpu_addr;
|
|
|
+ struct page *page;
|
|
|
+ bool want_vaddr;
|
|
|
+};
|
|
|
+
|
|
|
+struct arm_dma_allocator {
|
|
|
+ void *(*alloc)(struct arm_dma_alloc_args *args,
|
|
|
+ struct page **ret_page);
|
|
|
+ void (*free)(struct arm_dma_free_args *args);
|
|
|
+};
|
|
|
+
|
|
|
+struct arm_dma_buffer {
|
|
|
+ struct list_head list;
|
|
|
+ void *virt;
|
|
|
+ struct arm_dma_allocator *allocator;
|
|
|
+};
|
|
|
+
|
|
|
+static LIST_HEAD(arm_dma_bufs);
|
|
|
+static DEFINE_SPINLOCK(arm_dma_bufs_lock);
|
|
|
+
|
|
|
+static struct arm_dma_buffer *arm_dma_buffer_find(void *virt)
|
|
|
+{
|
|
|
+ struct arm_dma_buffer *buf, *found = NULL;
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&arm_dma_bufs_lock, flags);
|
|
|
+ list_for_each_entry(buf, &arm_dma_bufs, list) {
|
|
|
+ if (buf->virt == virt) {
|
|
|
+ list_del(&buf->list);
|
|
|
+ found = buf;
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
|
|
|
+ return found;
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* The DMA API is built upon the notion of "buffer ownership". A buffer
|
|
|
* is either exclusively owned by the CPU (and therefore may be accessed
|
|
@@ -592,7 +641,7 @@ static inline pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot)
|
|
|
#define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL
|
|
|
#define __alloc_from_pool(size, ret_page) NULL
|
|
|
#define __alloc_from_contiguous(dev, size, prot, ret, c, wv) NULL
|
|
|
-#define __free_from_pool(cpu_addr, size) 0
|
|
|
+#define __free_from_pool(cpu_addr, size) do { } while (0)
|
|
|
#define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0)
|
|
|
#define __dma_free_remap(cpu_addr, size) do { } while (0)
|
|
|
|
|
@@ -610,7 +659,78 @@ static void *__alloc_simple_buffer(struct device *dev, size_t size, gfp_t gfp,
|
|
|
return page_address(page);
|
|
|
}
|
|
|
|
|
|
+static void *simple_allocator_alloc(struct arm_dma_alloc_args *args,
|
|
|
+ struct page **ret_page)
|
|
|
+{
|
|
|
+ return __alloc_simple_buffer(args->dev, args->size, args->gfp,
|
|
|
+ ret_page);
|
|
|
+}
|
|
|
+
|
|
|
+static void simple_allocator_free(struct arm_dma_free_args *args)
|
|
|
+{
|
|
|
+ __dma_free_buffer(args->page, args->size);
|
|
|
+}
|
|
|
|
|
|
+static struct arm_dma_allocator simple_allocator = {
|
|
|
+ .alloc = simple_allocator_alloc,
|
|
|
+ .free = simple_allocator_free,
|
|
|
+};
|
|
|
+
|
|
|
+static void *cma_allocator_alloc(struct arm_dma_alloc_args *args,
|
|
|
+ struct page **ret_page)
|
|
|
+{
|
|
|
+ return __alloc_from_contiguous(args->dev, args->size, args->prot,
|
|
|
+ ret_page, args->caller,
|
|
|
+ args->want_vaddr);
|
|
|
+}
|
|
|
+
|
|
|
+static void cma_allocator_free(struct arm_dma_free_args *args)
|
|
|
+{
|
|
|
+ __free_from_contiguous(args->dev, args->page, args->cpu_addr,
|
|
|
+ args->size, args->want_vaddr);
|
|
|
+}
|
|
|
+
|
|
|
+static struct arm_dma_allocator cma_allocator = {
|
|
|
+ .alloc = cma_allocator_alloc,
|
|
|
+ .free = cma_allocator_free,
|
|
|
+};
|
|
|
+
|
|
|
+static void *pool_allocator_alloc(struct arm_dma_alloc_args *args,
|
|
|
+ struct page **ret_page)
|
|
|
+{
|
|
|
+ return __alloc_from_pool(args->size, ret_page);
|
|
|
+}
|
|
|
+
|
|
|
+static void pool_allocator_free(struct arm_dma_free_args *args)
|
|
|
+{
|
|
|
+ __free_from_pool(args->cpu_addr, args->size);
|
|
|
+}
|
|
|
+
|
|
|
+static struct arm_dma_allocator pool_allocator = {
|
|
|
+ .alloc = pool_allocator_alloc,
|
|
|
+ .free = pool_allocator_free,
|
|
|
+};
|
|
|
+
|
|
|
+static void *remap_allocator_alloc(struct arm_dma_alloc_args *args,
|
|
|
+ struct page **ret_page)
|
|
|
+{
|
|
|
+ return __alloc_remap_buffer(args->dev, args->size, args->gfp,
|
|
|
+ args->prot, ret_page, args->caller,
|
|
|
+ args->want_vaddr);
|
|
|
+}
|
|
|
+
|
|
|
+static void remap_allocator_free(struct arm_dma_free_args *args)
|
|
|
+{
|
|
|
+ if (args->want_vaddr)
|
|
|
+ __dma_free_remap(args->cpu_addr, args->size);
|
|
|
+
|
|
|
+ __dma_free_buffer(args->page, args->size);
|
|
|
+}
|
|
|
+
|
|
|
+static struct arm_dma_allocator remap_allocator = {
|
|
|
+ .alloc = remap_allocator_alloc,
|
|
|
+ .free = remap_allocator_free,
|
|
|
+};
|
|
|
|
|
|
static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
|
|
|
gfp_t gfp, pgprot_t prot, bool is_coherent,
|
|
@@ -619,7 +739,16 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
|
|
|
u64 mask = get_coherent_dma_mask(dev);
|
|
|
struct page *page = NULL;
|
|
|
void *addr;
|
|
|
- bool want_vaddr;
|
|
|
+ bool allowblock, cma;
|
|
|
+ struct arm_dma_buffer *buf;
|
|
|
+ struct arm_dma_alloc_args args = {
|
|
|
+ .dev = dev,
|
|
|
+ .size = PAGE_ALIGN(size),
|
|
|
+ .gfp = gfp,
|
|
|
+ .prot = prot,
|
|
|
+ .caller = caller,
|
|
|
+ .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
|
|
|
+ };
|
|
|
|
|
|
#ifdef CONFIG_DMA_API_DEBUG
|
|
|
u64 limit = (mask + 1) & ~mask;
|
|
@@ -633,6 +762,10 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
|
|
|
if (!mask)
|
|
|
return NULL;
|
|
|
|
|
|
+ buf = kzalloc(sizeof(*buf), gfp);
|
|
|
+ if (!buf)
|
|
|
+ return NULL;
|
|
|
+
|
|
|
if (mask < 0xffffffffULL)
|
|
|
gfp |= GFP_DMA;
|
|
|
|
|
@@ -644,28 +777,37 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle,
|
|
|
* platform; see CONFIG_HUGETLBFS.
|
|
|
*/
|
|
|
gfp &= ~(__GFP_COMP);
|
|
|
+ args.gfp = gfp;
|
|
|
|
|
|
*handle = DMA_ERROR_CODE;
|
|
|
- size = PAGE_ALIGN(size);
|
|
|
- want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
|
|
|
-
|
|
|
- if (nommu())
|
|
|
- addr = __alloc_simple_buffer(dev, size, gfp, &page);
|
|
|
- else if (dev_get_cma_area(dev) && (gfp & __GFP_DIRECT_RECLAIM))
|
|
|
- addr = __alloc_from_contiguous(dev, size, prot, &page,
|
|
|
- caller, want_vaddr);
|
|
|
- else if (is_coherent)
|
|
|
- addr = __alloc_simple_buffer(dev, size, gfp, &page);
|
|
|
- else if (!gfpflags_allow_blocking(gfp))
|
|
|
- addr = __alloc_from_pool(size, &page);
|
|
|
+ allowblock = gfpflags_allow_blocking(gfp);
|
|
|
+ cma = allowblock ? dev_get_cma_area(dev) : false;
|
|
|
+
|
|
|
+ if (cma)
|
|
|
+ buf->allocator = &cma_allocator;
|
|
|
+ else if (nommu() || is_coherent)
|
|
|
+ buf->allocator = &simple_allocator;
|
|
|
+ else if (allowblock)
|
|
|
+ buf->allocator = &remap_allocator;
|
|
|
else
|
|
|
- addr = __alloc_remap_buffer(dev, size, gfp, prot, &page,
|
|
|
- caller, want_vaddr);
|
|
|
+ buf->allocator = &pool_allocator;
|
|
|
+
|
|
|
+ addr = buf->allocator->alloc(&args, &page);
|
|
|
+
|
|
|
+ if (page) {
|
|
|
+ unsigned long flags;
|
|
|
|
|
|
- if (page)
|
|
|
*handle = pfn_to_dma(dev, page_to_pfn(page));
|
|
|
+ buf->virt = args.want_vaddr ? addr : page;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&arm_dma_bufs_lock, flags);
|
|
|
+ list_add(&buf->list, &arm_dma_bufs);
|
|
|
+ spin_unlock_irqrestore(&arm_dma_bufs_lock, flags);
|
|
|
+ } else {
|
|
|
+ kfree(buf);
|
|
|
+ }
|
|
|
|
|
|
- return want_vaddr ? addr : page;
|
|
|
+ return args.want_vaddr ? addr : page;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -741,25 +883,21 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
|
|
|
bool is_coherent)
|
|
|
{
|
|
|
struct page *page = pfn_to_page(dma_to_pfn(dev, handle));
|
|
|
- bool want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs);
|
|
|
-
|
|
|
- size = PAGE_ALIGN(size);
|
|
|
-
|
|
|
- if (nommu()) {
|
|
|
- __dma_free_buffer(page, size);
|
|
|
- } else if (!is_coherent && __free_from_pool(cpu_addr, size)) {
|
|
|
+ struct arm_dma_buffer *buf;
|
|
|
+ struct arm_dma_free_args args = {
|
|
|
+ .dev = dev,
|
|
|
+ .size = PAGE_ALIGN(size),
|
|
|
+ .cpu_addr = cpu_addr,
|
|
|
+ .page = page,
|
|
|
+ .want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs),
|
|
|
+ };
|
|
|
+
|
|
|
+ buf = arm_dma_buffer_find(cpu_addr);
|
|
|
+ if (WARN(!buf, "Freeing invalid buffer %p\n", cpu_addr))
|
|
|
return;
|
|
|
- } else if (!dev_get_cma_area(dev)) {
|
|
|
- if (want_vaddr && !is_coherent)
|
|
|
- __dma_free_remap(cpu_addr, size);
|
|
|
- __dma_free_buffer(page, size);
|
|
|
- } else {
|
|
|
- /*
|
|
|
- * Non-atomic allocations cannot be freed with IRQs disabled
|
|
|
- */
|
|
|
- WARN_ON(irqs_disabled());
|
|
|
- __free_from_contiguous(dev, page, cpu_addr, size, want_vaddr);
|
|
|
- }
|
|
|
+
|
|
|
+ buf->allocator->free(&args);
|
|
|
+ kfree(buf);
|
|
|
}
|
|
|
|
|
|
void arm_dma_free(struct device *dev, size_t size, void *cpu_addr,
|
|
@@ -1122,6 +1260,9 @@ static inline void __free_iova(struct dma_iommu_mapping *mapping,
|
|
|
spin_unlock_irqrestore(&mapping->lock, flags);
|
|
|
}
|
|
|
|
|
|
+/* We'll try 2M, 1M, 64K, and finally 4K; array must end with 0! */
|
|
|
+static const int iommu_order_array[] = { 9, 8, 4, 0 };
|
|
|
+
|
|
|
static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
|
|
|
gfp_t gfp, struct dma_attrs *attrs)
|
|
|
{
|
|
@@ -1129,6 +1270,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
|
|
|
int count = size >> PAGE_SHIFT;
|
|
|
int array_size = count * sizeof(struct page *);
|
|
|
int i = 0;
|
|
|
+ int order_idx = 0;
|
|
|
|
|
|
if (array_size <= PAGE_SIZE)
|
|
|
pages = kzalloc(array_size, GFP_KERNEL);
|
|
@@ -1154,6 +1296,10 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
|
|
|
return pages;
|
|
|
}
|
|
|
|
|
|
+ /* Go straight to 4K chunks if caller says it's OK. */
|
|
|
+ if (dma_get_attr(DMA_ATTR_ALLOC_SINGLE_PAGES, attrs))
|
|
|
+ order_idx = ARRAY_SIZE(iommu_order_array) - 1;
|
|
|
+
|
|
|
/*
|
|
|
* IOMMU can map any pages, so himem can also be used here
|
|
|
*/
|
|
@@ -1162,22 +1308,24 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size,
|
|
|
while (count) {
|
|
|
int j, order;
|
|
|
|
|
|
- for (order = __fls(count); order > 0; --order) {
|
|
|
- /*
|
|
|
- * We do not want OOM killer to be invoked as long
|
|
|
- * as we can fall back to single pages, so we force
|
|
|
- * __GFP_NORETRY for orders higher than zero.
|
|
|
- */
|
|
|
- pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
|
|
|
- if (pages[i])
|
|
|
- break;
|
|
|
+ order = iommu_order_array[order_idx];
|
|
|
+
|
|
|
+ /* Drop down when we get small */
|
|
|
+ if (__fls(count) < order) {
|
|
|
+ order_idx++;
|
|
|
+ continue;
|
|
|
}
|
|
|
|
|
|
- if (!pages[i]) {
|
|
|
- /*
|
|
|
- * Fall back to single page allocation.
|
|
|
- * Might invoke OOM killer as last resort.
|
|
|
- */
|
|
|
+ if (order) {
|
|
|
+ /* See if it's easy to allocate a high-order chunk */
|
|
|
+ pages[i] = alloc_pages(gfp | __GFP_NORETRY, order);
|
|
|
+
|
|
|
+ /* Go down a notch at first sign of pressure */
|
|
|
+ if (!pages[i]) {
|
|
|
+ order_idx++;
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ } else {
|
|
|
pages[i] = alloc_pages(gfp, 0);
|
|
|
if (!pages[i])
|
|
|
goto error;
|