dma-mapping.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964
  1. /*
  2. * SWIOTLB-based DMA API implementation
  3. *
  4. * Copyright (C) 2012 ARM Ltd.
  5. * Author: Catalin Marinas <catalin.marinas@arm.com>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include <linux/gfp.h>
  20. #include <linux/acpi.h>
  21. #include <linux/bootmem.h>
  22. #include <linux/export.h>
  23. #include <linux/slab.h>
  24. #include <linux/genalloc.h>
  25. #include <linux/dma-mapping.h>
  26. #include <linux/dma-contiguous.h>
  27. #include <linux/vmalloc.h>
  28. #include <linux/swiotlb.h>
  29. #include <asm/cacheflush.h>
  30. static int swiotlb __read_mostly;
  31. static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
  32. bool coherent)
  33. {
  34. if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
  35. return pgprot_writecombine(prot);
  36. return prot;
  37. }
  38. static struct gen_pool *atomic_pool;
  39. #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
  40. static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
  41. static int __init early_coherent_pool(char *p)
  42. {
  43. atomic_pool_size = memparse(p, &p);
  44. return 0;
  45. }
  46. early_param("coherent_pool", early_coherent_pool);
  47. static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
  48. {
  49. unsigned long val;
  50. void *ptr = NULL;
  51. if (!atomic_pool) {
  52. WARN(1, "coherent pool not initialised!\n");
  53. return NULL;
  54. }
  55. val = gen_pool_alloc(atomic_pool, size);
  56. if (val) {
  57. phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
  58. *ret_page = phys_to_page(phys);
  59. ptr = (void *)val;
  60. memset(ptr, 0, size);
  61. }
  62. return ptr;
  63. }
  64. static bool __in_atomic_pool(void *start, size_t size)
  65. {
  66. return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
  67. }
  68. static int __free_from_pool(void *start, size_t size)
  69. {
  70. if (!__in_atomic_pool(start, size))
  71. return 0;
  72. gen_pool_free(atomic_pool, (unsigned long)start, size);
  73. return 1;
  74. }
  75. static void *__dma_alloc_coherent(struct device *dev, size_t size,
  76. dma_addr_t *dma_handle, gfp_t flags,
  77. unsigned long attrs)
  78. {
  79. if (dev == NULL) {
  80. WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
  81. return NULL;
  82. }
  83. if (IS_ENABLED(CONFIG_ZONE_DMA) &&
  84. dev->coherent_dma_mask <= DMA_BIT_MASK(32))
  85. flags |= GFP_DMA;
  86. if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
  87. struct page *page;
  88. void *addr;
  89. page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
  90. get_order(size));
  91. if (!page)
  92. return NULL;
  93. *dma_handle = phys_to_dma(dev, page_to_phys(page));
  94. addr = page_address(page);
  95. memset(addr, 0, size);
  96. return addr;
  97. } else {
  98. return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
  99. }
  100. }
  101. static void __dma_free_coherent(struct device *dev, size_t size,
  102. void *vaddr, dma_addr_t dma_handle,
  103. unsigned long attrs)
  104. {
  105. bool freed;
  106. phys_addr_t paddr = dma_to_phys(dev, dma_handle);
  107. if (dev == NULL) {
  108. WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
  109. return;
  110. }
  111. freed = dma_release_from_contiguous(dev,
  112. phys_to_page(paddr),
  113. size >> PAGE_SHIFT);
  114. if (!freed)
  115. swiotlb_free_coherent(dev, size, vaddr, dma_handle);
  116. }
  117. static void *__dma_alloc(struct device *dev, size_t size,
  118. dma_addr_t *dma_handle, gfp_t flags,
  119. unsigned long attrs)
  120. {
  121. struct page *page;
  122. void *ptr, *coherent_ptr;
  123. bool coherent = is_device_dma_coherent(dev);
  124. pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
  125. size = PAGE_ALIGN(size);
  126. if (!coherent && !gfpflags_allow_blocking(flags)) {
  127. struct page *page = NULL;
  128. void *addr = __alloc_from_pool(size, &page, flags);
  129. if (addr)
  130. *dma_handle = phys_to_dma(dev, page_to_phys(page));
  131. return addr;
  132. }
  133. ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
  134. if (!ptr)
  135. goto no_mem;
  136. /* no need for non-cacheable mapping if coherent */
  137. if (coherent)
  138. return ptr;
  139. /* remove any dirty cache lines on the kernel alias */
  140. __dma_flush_range(ptr, ptr + size);
  141. /* create a coherent mapping */
  142. page = virt_to_page(ptr);
  143. coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
  144. prot, NULL);
  145. if (!coherent_ptr)
  146. goto no_map;
  147. return coherent_ptr;
  148. no_map:
  149. __dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
  150. no_mem:
  151. *dma_handle = DMA_ERROR_CODE;
  152. return NULL;
  153. }
  154. static void __dma_free(struct device *dev, size_t size,
  155. void *vaddr, dma_addr_t dma_handle,
  156. unsigned long attrs)
  157. {
  158. void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
  159. size = PAGE_ALIGN(size);
  160. if (!is_device_dma_coherent(dev)) {
  161. if (__free_from_pool(vaddr, size))
  162. return;
  163. vunmap(vaddr);
  164. }
  165. __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
  166. }
  167. static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
  168. unsigned long offset, size_t size,
  169. enum dma_data_direction dir,
  170. unsigned long attrs)
  171. {
  172. dma_addr_t dev_addr;
  173. dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
  174. if (!is_device_dma_coherent(dev))
  175. __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
  176. return dev_addr;
  177. }
  178. static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
  179. size_t size, enum dma_data_direction dir,
  180. unsigned long attrs)
  181. {
  182. if (!is_device_dma_coherent(dev))
  183. __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
  184. swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
  185. }
  186. static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
  187. int nelems, enum dma_data_direction dir,
  188. unsigned long attrs)
  189. {
  190. struct scatterlist *sg;
  191. int i, ret;
  192. ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
  193. if (!is_device_dma_coherent(dev))
  194. for_each_sg(sgl, sg, ret, i)
  195. __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
  196. sg->length, dir);
  197. return ret;
  198. }
  199. static void __swiotlb_unmap_sg_attrs(struct device *dev,
  200. struct scatterlist *sgl, int nelems,
  201. enum dma_data_direction dir,
  202. unsigned long attrs)
  203. {
  204. struct scatterlist *sg;
  205. int i;
  206. if (!is_device_dma_coherent(dev))
  207. for_each_sg(sgl, sg, nelems, i)
  208. __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
  209. sg->length, dir);
  210. swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
  211. }
  212. static void __swiotlb_sync_single_for_cpu(struct device *dev,
  213. dma_addr_t dev_addr, size_t size,
  214. enum dma_data_direction dir)
  215. {
  216. if (!is_device_dma_coherent(dev))
  217. __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
  218. swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
  219. }
  220. static void __swiotlb_sync_single_for_device(struct device *dev,
  221. dma_addr_t dev_addr, size_t size,
  222. enum dma_data_direction dir)
  223. {
  224. swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
  225. if (!is_device_dma_coherent(dev))
  226. __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
  227. }
  228. static void __swiotlb_sync_sg_for_cpu(struct device *dev,
  229. struct scatterlist *sgl, int nelems,
  230. enum dma_data_direction dir)
  231. {
  232. struct scatterlist *sg;
  233. int i;
  234. if (!is_device_dma_coherent(dev))
  235. for_each_sg(sgl, sg, nelems, i)
  236. __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
  237. sg->length, dir);
  238. swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
  239. }
  240. static void __swiotlb_sync_sg_for_device(struct device *dev,
  241. struct scatterlist *sgl, int nelems,
  242. enum dma_data_direction dir)
  243. {
  244. struct scatterlist *sg;
  245. int i;
  246. swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
  247. if (!is_device_dma_coherent(dev))
  248. for_each_sg(sgl, sg, nelems, i)
  249. __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
  250. sg->length, dir);
  251. }
  252. static int __swiotlb_mmap(struct device *dev,
  253. struct vm_area_struct *vma,
  254. void *cpu_addr, dma_addr_t dma_addr, size_t size,
  255. unsigned long attrs)
  256. {
  257. int ret = -ENXIO;
  258. unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
  259. PAGE_SHIFT;
  260. unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
  261. unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
  262. unsigned long off = vma->vm_pgoff;
  263. vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
  264. is_device_dma_coherent(dev));
  265. if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
  266. return ret;
  267. if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
  268. ret = remap_pfn_range(vma, vma->vm_start,
  269. pfn + off,
  270. vma->vm_end - vma->vm_start,
  271. vma->vm_page_prot);
  272. }
  273. return ret;
  274. }
  275. static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
  276. void *cpu_addr, dma_addr_t handle, size_t size,
  277. unsigned long attrs)
  278. {
  279. int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
  280. if (!ret)
  281. sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)),
  282. PAGE_ALIGN(size), 0);
  283. return ret;
  284. }
  285. static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
  286. {
  287. if (swiotlb)
  288. return swiotlb_dma_supported(hwdev, mask);
  289. return 1;
  290. }
  291. static struct dma_map_ops swiotlb_dma_ops = {
  292. .alloc = __dma_alloc,
  293. .free = __dma_free,
  294. .mmap = __swiotlb_mmap,
  295. .get_sgtable = __swiotlb_get_sgtable,
  296. .map_page = __swiotlb_map_page,
  297. .unmap_page = __swiotlb_unmap_page,
  298. .map_sg = __swiotlb_map_sg_attrs,
  299. .unmap_sg = __swiotlb_unmap_sg_attrs,
  300. .sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
  301. .sync_single_for_device = __swiotlb_sync_single_for_device,
  302. .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
  303. .sync_sg_for_device = __swiotlb_sync_sg_for_device,
  304. .dma_supported = __swiotlb_dma_supported,
  305. .mapping_error = swiotlb_dma_mapping_error,
  306. };
  307. static int __init atomic_pool_init(void)
  308. {
  309. pgprot_t prot = __pgprot(PROT_NORMAL_NC);
  310. unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
  311. struct page *page;
  312. void *addr;
  313. unsigned int pool_size_order = get_order(atomic_pool_size);
  314. if (dev_get_cma_area(NULL))
  315. page = dma_alloc_from_contiguous(NULL, nr_pages,
  316. pool_size_order);
  317. else
  318. page = alloc_pages(GFP_DMA, pool_size_order);
  319. if (page) {
  320. int ret;
  321. void *page_addr = page_address(page);
  322. memset(page_addr, 0, atomic_pool_size);
  323. __dma_flush_range(page_addr, page_addr + atomic_pool_size);
  324. atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
  325. if (!atomic_pool)
  326. goto free_page;
  327. addr = dma_common_contiguous_remap(page, atomic_pool_size,
  328. VM_USERMAP, prot, atomic_pool_init);
  329. if (!addr)
  330. goto destroy_genpool;
  331. ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
  332. page_to_phys(page),
  333. atomic_pool_size, -1);
  334. if (ret)
  335. goto remove_mapping;
  336. gen_pool_set_algo(atomic_pool,
  337. gen_pool_first_fit_order_align,
  338. (void *)PAGE_SHIFT);
  339. pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
  340. atomic_pool_size / 1024);
  341. return 0;
  342. }
  343. goto out;
  344. remove_mapping:
  345. dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
  346. destroy_genpool:
  347. gen_pool_destroy(atomic_pool);
  348. atomic_pool = NULL;
  349. free_page:
  350. if (!dma_release_from_contiguous(NULL, page, nr_pages))
  351. __free_pages(page, pool_size_order);
  352. out:
  353. pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
  354. atomic_pool_size / 1024);
  355. return -ENOMEM;
  356. }
  357. /********************************************
  358. * The following APIs are for dummy DMA ops *
  359. ********************************************/
  360. static void *__dummy_alloc(struct device *dev, size_t size,
  361. dma_addr_t *dma_handle, gfp_t flags,
  362. unsigned long attrs)
  363. {
  364. return NULL;
  365. }
  366. static void __dummy_free(struct device *dev, size_t size,
  367. void *vaddr, dma_addr_t dma_handle,
  368. unsigned long attrs)
  369. {
  370. }
  371. static int __dummy_mmap(struct device *dev,
  372. struct vm_area_struct *vma,
  373. void *cpu_addr, dma_addr_t dma_addr, size_t size,
  374. unsigned long attrs)
  375. {
  376. return -ENXIO;
  377. }
  378. static dma_addr_t __dummy_map_page(struct device *dev, struct page *page,
  379. unsigned long offset, size_t size,
  380. enum dma_data_direction dir,
  381. unsigned long attrs)
  382. {
  383. return DMA_ERROR_CODE;
  384. }
  385. static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr,
  386. size_t size, enum dma_data_direction dir,
  387. unsigned long attrs)
  388. {
  389. }
  390. static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl,
  391. int nelems, enum dma_data_direction dir,
  392. unsigned long attrs)
  393. {
  394. return 0;
  395. }
  396. static void __dummy_unmap_sg(struct device *dev,
  397. struct scatterlist *sgl, int nelems,
  398. enum dma_data_direction dir,
  399. unsigned long attrs)
  400. {
  401. }
  402. static void __dummy_sync_single(struct device *dev,
  403. dma_addr_t dev_addr, size_t size,
  404. enum dma_data_direction dir)
  405. {
  406. }
  407. static void __dummy_sync_sg(struct device *dev,
  408. struct scatterlist *sgl, int nelems,
  409. enum dma_data_direction dir)
  410. {
  411. }
  412. static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
  413. {
  414. return 1;
  415. }
  416. static int __dummy_dma_supported(struct device *hwdev, u64 mask)
  417. {
  418. return 0;
  419. }
  420. struct dma_map_ops dummy_dma_ops = {
  421. .alloc = __dummy_alloc,
  422. .free = __dummy_free,
  423. .mmap = __dummy_mmap,
  424. .map_page = __dummy_map_page,
  425. .unmap_page = __dummy_unmap_page,
  426. .map_sg = __dummy_map_sg,
  427. .unmap_sg = __dummy_unmap_sg,
  428. .sync_single_for_cpu = __dummy_sync_single,
  429. .sync_single_for_device = __dummy_sync_single,
  430. .sync_sg_for_cpu = __dummy_sync_sg,
  431. .sync_sg_for_device = __dummy_sync_sg,
  432. .mapping_error = __dummy_mapping_error,
  433. .dma_supported = __dummy_dma_supported,
  434. };
  435. EXPORT_SYMBOL(dummy_dma_ops);
  436. static int __init arm64_dma_init(void)
  437. {
  438. if (swiotlb_force || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
  439. swiotlb = 1;
  440. return atomic_pool_init();
  441. }
  442. arch_initcall(arm64_dma_init);
  443. #define PREALLOC_DMA_DEBUG_ENTRIES 4096
  444. static int __init dma_debug_do_init(void)
  445. {
  446. dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
  447. return 0;
  448. }
  449. fs_initcall(dma_debug_do_init);
  450. #ifdef CONFIG_IOMMU_DMA
  451. #include <linux/dma-iommu.h>
  452. #include <linux/platform_device.h>
  453. #include <linux/amba/bus.h>
  454. /* Thankfully, all cache ops are by VA so we can ignore phys here */
  455. static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
  456. {
  457. __dma_flush_range(virt, virt + PAGE_SIZE);
  458. }
  459. static void *__iommu_alloc_attrs(struct device *dev, size_t size,
  460. dma_addr_t *handle, gfp_t gfp,
  461. unsigned long attrs)
  462. {
  463. bool coherent = is_device_dma_coherent(dev);
  464. int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
  465. size_t iosize = size;
  466. void *addr;
  467. if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n"))
  468. return NULL;
  469. size = PAGE_ALIGN(size);
  470. /*
  471. * Some drivers rely on this, and we probably don't want the
  472. * possibility of stale kernel data being read by devices anyway.
  473. */
  474. gfp |= __GFP_ZERO;
  475. if (gfpflags_allow_blocking(gfp)) {
  476. struct page **pages;
  477. pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
  478. pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
  479. handle, flush_page);
  480. if (!pages)
  481. return NULL;
  482. addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
  483. __builtin_return_address(0));
  484. if (!addr)
  485. iommu_dma_free(dev, pages, iosize, handle);
  486. } else {
  487. struct page *page;
  488. /*
  489. * In atomic context we can't remap anything, so we'll only
  490. * get the virtually contiguous buffer we need by way of a
  491. * physically contiguous allocation.
  492. */
  493. if (coherent) {
  494. page = alloc_pages(gfp, get_order(size));
  495. addr = page ? page_address(page) : NULL;
  496. } else {
  497. addr = __alloc_from_pool(size, &page, gfp);
  498. }
  499. if (!addr)
  500. return NULL;
  501. *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
  502. if (iommu_dma_mapping_error(dev, *handle)) {
  503. if (coherent)
  504. __free_pages(page, get_order(size));
  505. else
  506. __free_from_pool(addr, size);
  507. addr = NULL;
  508. }
  509. }
  510. return addr;
  511. }
  512. static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
  513. dma_addr_t handle, unsigned long attrs)
  514. {
  515. size_t iosize = size;
  516. size = PAGE_ALIGN(size);
  517. /*
  518. * @cpu_addr will be one of 3 things depending on how it was allocated:
  519. * - A remapped array of pages from iommu_dma_alloc(), for all
  520. * non-atomic allocations.
  521. * - A non-cacheable alias from the atomic pool, for atomic
  522. * allocations by non-coherent devices.
  523. * - A normal lowmem address, for atomic allocations by
  524. * coherent devices.
  525. * Hence how dodgy the below logic looks...
  526. */
  527. if (__in_atomic_pool(cpu_addr, size)) {
  528. iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
  529. __free_from_pool(cpu_addr, size);
  530. } else if (is_vmalloc_addr(cpu_addr)){
  531. struct vm_struct *area = find_vm_area(cpu_addr);
  532. if (WARN_ON(!area || !area->pages))
  533. return;
  534. iommu_dma_free(dev, area->pages, iosize, &handle);
  535. dma_common_free_remap(cpu_addr, size, VM_USERMAP);
  536. } else {
  537. iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
  538. __free_pages(virt_to_page(cpu_addr), get_order(size));
  539. }
  540. }
  541. static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
  542. void *cpu_addr, dma_addr_t dma_addr, size_t size,
  543. unsigned long attrs)
  544. {
  545. struct vm_struct *area;
  546. int ret;
  547. vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
  548. is_device_dma_coherent(dev));
  549. if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
  550. return ret;
  551. area = find_vm_area(cpu_addr);
  552. if (WARN_ON(!area || !area->pages))
  553. return -ENXIO;
  554. return iommu_dma_mmap(area->pages, size, vma);
  555. }
  556. static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
  557. void *cpu_addr, dma_addr_t dma_addr,
  558. size_t size, unsigned long attrs)
  559. {
  560. unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
  561. struct vm_struct *area = find_vm_area(cpu_addr);
  562. if (WARN_ON(!area || !area->pages))
  563. return -ENXIO;
  564. return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
  565. GFP_KERNEL);
  566. }
  567. static void __iommu_sync_single_for_cpu(struct device *dev,
  568. dma_addr_t dev_addr, size_t size,
  569. enum dma_data_direction dir)
  570. {
  571. phys_addr_t phys;
  572. if (is_device_dma_coherent(dev))
  573. return;
  574. phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
  575. __dma_unmap_area(phys_to_virt(phys), size, dir);
  576. }
  577. static void __iommu_sync_single_for_device(struct device *dev,
  578. dma_addr_t dev_addr, size_t size,
  579. enum dma_data_direction dir)
  580. {
  581. phys_addr_t phys;
  582. if (is_device_dma_coherent(dev))
  583. return;
  584. phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
  585. __dma_map_area(phys_to_virt(phys), size, dir);
  586. }
  587. static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
  588. unsigned long offset, size_t size,
  589. enum dma_data_direction dir,
  590. unsigned long attrs)
  591. {
  592. bool coherent = is_device_dma_coherent(dev);
  593. int prot = dma_direction_to_prot(dir, coherent);
  594. dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
  595. if (!iommu_dma_mapping_error(dev, dev_addr) &&
  596. (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  597. __iommu_sync_single_for_device(dev, dev_addr, size, dir);
  598. return dev_addr;
  599. }
  600. static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr,
  601. size_t size, enum dma_data_direction dir,
  602. unsigned long attrs)
  603. {
  604. if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  605. __iommu_sync_single_for_cpu(dev, dev_addr, size, dir);
  606. iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs);
  607. }
  608. static void __iommu_sync_sg_for_cpu(struct device *dev,
  609. struct scatterlist *sgl, int nelems,
  610. enum dma_data_direction dir)
  611. {
  612. struct scatterlist *sg;
  613. int i;
  614. if (is_device_dma_coherent(dev))
  615. return;
  616. for_each_sg(sgl, sg, nelems, i)
  617. __dma_unmap_area(sg_virt(sg), sg->length, dir);
  618. }
  619. static void __iommu_sync_sg_for_device(struct device *dev,
  620. struct scatterlist *sgl, int nelems,
  621. enum dma_data_direction dir)
  622. {
  623. struct scatterlist *sg;
  624. int i;
  625. if (is_device_dma_coherent(dev))
  626. return;
  627. for_each_sg(sgl, sg, nelems, i)
  628. __dma_map_area(sg_virt(sg), sg->length, dir);
  629. }
  630. static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
  631. int nelems, enum dma_data_direction dir,
  632. unsigned long attrs)
  633. {
  634. bool coherent = is_device_dma_coherent(dev);
  635. if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  636. __iommu_sync_sg_for_device(dev, sgl, nelems, dir);
  637. return iommu_dma_map_sg(dev, sgl, nelems,
  638. dma_direction_to_prot(dir, coherent));
  639. }
  640. static void __iommu_unmap_sg_attrs(struct device *dev,
  641. struct scatterlist *sgl, int nelems,
  642. enum dma_data_direction dir,
  643. unsigned long attrs)
  644. {
  645. if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  646. __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir);
  647. iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
  648. }
  649. static struct dma_map_ops iommu_dma_ops = {
  650. .alloc = __iommu_alloc_attrs,
  651. .free = __iommu_free_attrs,
  652. .mmap = __iommu_mmap_attrs,
  653. .get_sgtable = __iommu_get_sgtable,
  654. .map_page = __iommu_map_page,
  655. .unmap_page = __iommu_unmap_page,
  656. .map_sg = __iommu_map_sg_attrs,
  657. .unmap_sg = __iommu_unmap_sg_attrs,
  658. .sync_single_for_cpu = __iommu_sync_single_for_cpu,
  659. .sync_single_for_device = __iommu_sync_single_for_device,
  660. .sync_sg_for_cpu = __iommu_sync_sg_for_cpu,
  661. .sync_sg_for_device = __iommu_sync_sg_for_device,
  662. .dma_supported = iommu_dma_supported,
  663. .mapping_error = iommu_dma_mapping_error,
  664. };
  665. /*
  666. * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
  667. * everything it needs to - the device is only partially created and the
  668. * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
  669. * need this delayed attachment dance. Once IOMMU probe ordering is sorted
  670. * to move the arch_setup_dma_ops() call later, all the notifier bits below
  671. * become unnecessary, and will go away.
  672. */
  673. struct iommu_dma_notifier_data {
  674. struct list_head list;
  675. struct device *dev;
  676. const struct iommu_ops *ops;
  677. u64 dma_base;
  678. u64 size;
  679. };
  680. static LIST_HEAD(iommu_dma_masters);
  681. static DEFINE_MUTEX(iommu_dma_notifier_lock);
  682. static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
  683. u64 dma_base, u64 size)
  684. {
  685. struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
  686. /*
  687. * If the IOMMU driver has the DMA domain support that we require,
  688. * then the IOMMU core will have already configured a group for this
  689. * device, and allocated the default domain for that group.
  690. */
  691. if (!domain || iommu_dma_init_domain(domain, dma_base, size)) {
  692. pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
  693. dev_name(dev));
  694. return false;
  695. }
  696. dev->archdata.dma_ops = &iommu_dma_ops;
  697. return true;
  698. }
  699. static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
  700. u64 dma_base, u64 size)
  701. {
  702. struct iommu_dma_notifier_data *iommudata;
  703. iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL);
  704. if (!iommudata)
  705. return;
  706. iommudata->dev = dev;
  707. iommudata->ops = ops;
  708. iommudata->dma_base = dma_base;
  709. iommudata->size = size;
  710. mutex_lock(&iommu_dma_notifier_lock);
  711. list_add(&iommudata->list, &iommu_dma_masters);
  712. mutex_unlock(&iommu_dma_notifier_lock);
  713. }
  714. static int __iommu_attach_notifier(struct notifier_block *nb,
  715. unsigned long action, void *data)
  716. {
  717. struct iommu_dma_notifier_data *master, *tmp;
  718. if (action != BUS_NOTIFY_BIND_DRIVER)
  719. return 0;
  720. mutex_lock(&iommu_dma_notifier_lock);
  721. list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) {
  722. if (data == master->dev && do_iommu_attach(master->dev,
  723. master->ops, master->dma_base, master->size)) {
  724. list_del(&master->list);
  725. kfree(master);
  726. break;
  727. }
  728. }
  729. mutex_unlock(&iommu_dma_notifier_lock);
  730. return 0;
  731. }
  732. static int __init register_iommu_dma_ops_notifier(struct bus_type *bus)
  733. {
  734. struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL);
  735. int ret;
  736. if (!nb)
  737. return -ENOMEM;
  738. nb->notifier_call = __iommu_attach_notifier;
  739. ret = bus_register_notifier(bus, nb);
  740. if (ret) {
  741. pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n",
  742. bus->name);
  743. kfree(nb);
  744. }
  745. return ret;
  746. }
  747. static int __init __iommu_dma_init(void)
  748. {
  749. int ret;
  750. ret = iommu_dma_init();
  751. if (!ret)
  752. ret = register_iommu_dma_ops_notifier(&platform_bus_type);
  753. if (!ret)
  754. ret = register_iommu_dma_ops_notifier(&amba_bustype);
  755. #ifdef CONFIG_PCI
  756. if (!ret)
  757. ret = register_iommu_dma_ops_notifier(&pci_bus_type);
  758. #endif
  759. return ret;
  760. }
  761. arch_initcall(__iommu_dma_init);
  762. static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
  763. const struct iommu_ops *ops)
  764. {
  765. struct iommu_group *group;
  766. if (!ops)
  767. return;
  768. /*
  769. * TODO: As a concession to the future, we're ready to handle being
  770. * called both early and late (i.e. after bus_add_device). Once all
  771. * the platform bus code is reworked to call us late and the notifier
  772. * junk above goes away, move the body of do_iommu_attach here.
  773. */
  774. group = iommu_group_get(dev);
  775. if (group) {
  776. do_iommu_attach(dev, ops, dma_base, size);
  777. iommu_group_put(group);
  778. } else {
  779. queue_iommu_attach(dev, ops, dma_base, size);
  780. }
  781. }
  782. void arch_teardown_dma_ops(struct device *dev)
  783. {
  784. struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
  785. if (WARN_ON(domain))
  786. iommu_detach_device(domain, dev);
  787. dev->archdata.dma_ops = NULL;
  788. }
  789. #else
  790. static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
  791. const struct iommu_ops *iommu)
  792. { }
  793. #endif /* CONFIG_IOMMU_DMA */
  794. void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
  795. const struct iommu_ops *iommu, bool coherent)
  796. {
  797. if (!dev->archdata.dma_ops)
  798. dev->archdata.dma_ops = &swiotlb_dma_ops;
  799. dev->archdata.dma_coherent = coherent;
  800. __iommu_setup_dma_ops(dev, dma_base, size, iommu);
  801. }