Преглед изворни кода

arm64: Implement coherent DMA API based on swiotlb

This patch adds support for DMA API cache maintenance on SoCs without
hardware device cache coherency.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Catalin Marinas пре 12 година
родитељ
комит
7363590d2c

+ 7 - 0
arch/arm64/include/asm/cacheflush.h

@@ -84,6 +84,13 @@ static inline void flush_cache_page(struct vm_area_struct *vma,
 {
 }
 
+/*
+ * Cache maintenance functions used by the DMA API. No to be used directly.
+ */
+extern void __dma_map_area(const void *, size_t, int);
+extern void __dma_unmap_area(const void *, size_t, int);
+extern void __dma_flush_range(const void *, const void *);
+
 /*
  * Copy user data from/to a page which is mapped into a different
  * processes address space.  Really, we want to allow our "user

+ 7 - 0
arch/arm64/include/asm/dma-mapping.h

@@ -30,6 +30,8 @@
 
 #define DMA_ERROR_CODE	(~(dma_addr_t)0)
 extern struct dma_map_ops *dma_ops;
+extern struct dma_map_ops coherent_swiotlb_dma_ops;
+extern struct dma_map_ops noncoherent_swiotlb_dma_ops;
 
 static inline struct dma_map_ops *__generic_dma_ops(struct device *dev)
 {
@@ -47,6 +49,11 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev)
 		return __generic_dma_ops(dev);
 }
 
+static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops)
+{
+	dev->archdata.dma_ops = ops;
+}
+
 #include <asm-generic/dma-mapping-common.h>
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)

+ 78 - 0
arch/arm64/mm/cache.S

@@ -166,3 +166,81 @@ ENTRY(__flush_dcache_area)
 	dsb	sy
 	ret
 ENDPROC(__flush_dcache_area)
+
+/*
+ *	__dma_inv_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+__dma_inv_range:
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+	bic	x1, x1, x3
+1:	dc	ivac, x0			// invalidate D / U line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_inv_range)
+
+/*
+ *	__dma_clean_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+__dma_clean_range:
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	cvac, x0			// clean D / U line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_clean_range)
+
+/*
+ *	__dma_flush_range(start, end)
+ *	- start   - virtual start address of region
+ *	- end     - virtual end address of region
+ */
+ENTRY(__dma_flush_range)
+	dcache_line_size x2, x3
+	sub	x3, x2, #1
+	bic	x0, x0, x3
+1:	dc	civac, x0			// clean & invalidate D / U line
+	add	x0, x0, x2
+	cmp	x0, x1
+	b.lo	1b
+	dsb	sy
+	ret
+ENDPROC(__dma_flush_range)
+
+/*
+ *	__dma_map_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(__dma_map_area)
+	add	x1, x1, x0
+	cmp	w2, #DMA_FROM_DEVICE
+	b.eq	__dma_inv_range
+	b	__dma_clean_range
+ENDPROC(__dma_map_area)
+
+/*
+ *	__dma_unmap_area(start, size, dir)
+ *	- start	- kernel virtual start address
+ *	- size	- size of region
+ *	- dir	- DMA direction
+ */
+ENTRY(__dma_unmap_area)
+	add	x1, x1, x0
+	cmp	w2, #DMA_TO_DEVICE
+	b.ne	__dma_inv_range
+	ret
+ENDPROC(__dma_unmap_area)

+ 161 - 1
arch/arm64/mm/dma-mapping.c

@@ -78,7 +78,166 @@ static void __dma_free_coherent(struct device *dev, size_t size,
 	}
 }
 
-static struct dma_map_ops coherent_swiotlb_dma_ops = {
+static void *__dma_alloc_noncoherent(struct device *dev, size_t size,
+				     dma_addr_t *dma_handle, gfp_t flags,
+				     struct dma_attrs *attrs)
+{
+	struct page *page, **map;
+	void *ptr, *coherent_ptr;
+	int order, i;
+
+	size = PAGE_ALIGN(size);
+	order = get_order(size);
+
+	ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
+	if (!ptr)
+		goto no_mem;
+	map = kmalloc(sizeof(struct page *) << order, flags & ~GFP_DMA);
+	if (!map)
+		goto no_map;
+
+	/* remove any dirty cache lines on the kernel alias */
+	__dma_flush_range(ptr, ptr + size);
+
+	/* create a coherent mapping */
+	page = virt_to_page(ptr);
+	for (i = 0; i < (size >> PAGE_SHIFT); i++)
+		map[i] = page + i;
+	coherent_ptr = vmap(map, size >> PAGE_SHIFT, VM_MAP,
+			    pgprot_dmacoherent(pgprot_default));
+	kfree(map);
+	if (!coherent_ptr)
+		goto no_map;
+
+	return coherent_ptr;
+
+no_map:
+	__dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
+no_mem:
+	*dma_handle = ~0;
+	return NULL;
+}
+
+static void __dma_free_noncoherent(struct device *dev, size_t size,
+				   void *vaddr, dma_addr_t dma_handle,
+				   struct dma_attrs *attrs)
+{
+	void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+
+	vunmap(vaddr);
+	__dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
+}
+
+static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
+				     unsigned long offset, size_t size,
+				     enum dma_data_direction dir,
+				     struct dma_attrs *attrs)
+{
+	dma_addr_t dev_addr;
+
+	dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
+	__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+
+	return dev_addr;
+}
+
+
+static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
+				 size_t size, enum dma_data_direction dir,
+				 struct dma_attrs *attrs)
+{
+	__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+	swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+}
+
+static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
+				  int nelems, enum dma_data_direction dir,
+				  struct dma_attrs *attrs)
+{
+	struct scatterlist *sg;
+	int i, ret;
+
+	ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
+	for_each_sg(sgl, sg, ret, i)
+		__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+			       sg->length, dir);
+
+	return ret;
+}
+
+static void __swiotlb_unmap_sg_attrs(struct device *dev,
+				     struct scatterlist *sgl, int nelems,
+				     enum dma_data_direction dir,
+				     struct dma_attrs *attrs)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+				 sg->length, dir);
+	swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+}
+
+static void __swiotlb_sync_single_for_cpu(struct device *dev,
+					  dma_addr_t dev_addr, size_t size,
+					  enum dma_data_direction dir)
+{
+	__dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+	swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
+}
+
+static void __swiotlb_sync_single_for_device(struct device *dev,
+					     dma_addr_t dev_addr, size_t size,
+					     enum dma_data_direction dir)
+{
+	swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
+	__dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+}
+
+static void __swiotlb_sync_sg_for_cpu(struct device *dev,
+				      struct scatterlist *sgl, int nelems,
+				      enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+				 sg->length, dir);
+	swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
+}
+
+static void __swiotlb_sync_sg_for_device(struct device *dev,
+					 struct scatterlist *sgl, int nelems,
+					 enum dma_data_direction dir)
+{
+	struct scatterlist *sg;
+	int i;
+
+	swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
+	for_each_sg(sgl, sg, nelems, i)
+		__dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
+			       sg->length, dir);
+}
+
+struct dma_map_ops noncoherent_swiotlb_dma_ops = {
+	.alloc = __dma_alloc_noncoherent,
+	.free = __dma_free_noncoherent,
+	.map_page = __swiotlb_map_page,
+	.unmap_page = __swiotlb_unmap_page,
+	.map_sg = __swiotlb_map_sg_attrs,
+	.unmap_sg = __swiotlb_unmap_sg_attrs,
+	.sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
+	.sync_single_for_device = __swiotlb_sync_single_for_device,
+	.sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
+	.sync_sg_for_device = __swiotlb_sync_sg_for_device,
+	.dma_supported = swiotlb_dma_supported,
+	.mapping_error = swiotlb_dma_mapping_error,
+};
+EXPORT_SYMBOL(noncoherent_swiotlb_dma_ops);
+
+struct dma_map_ops coherent_swiotlb_dma_ops = {
 	.alloc = __dma_alloc_coherent,
 	.free = __dma_free_coherent,
 	.map_page = swiotlb_map_page,
@@ -92,6 +251,7 @@ static struct dma_map_ops coherent_swiotlb_dma_ops = {
 	.dma_supported = swiotlb_dma_supported,
 	.mapping_error = swiotlb_dma_mapping_error,
 };
+EXPORT_SYMBOL(coherent_swiotlb_dma_ops);
 
 extern int swiotlb_late_init_with_default_size(size_t default_size);