dma-mapping.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963
  1. /*
  2. * SWIOTLB-based DMA API implementation
  3. *
  4. * Copyright (C) 2012 ARM Ltd.
  5. * Author: Catalin Marinas <catalin.marinas@arm.com>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  18. */
  19. #include <linux/gfp.h>
  20. #include <linux/acpi.h>
  21. #include <linux/bootmem.h>
  22. #include <linux/cache.h>
  23. #include <linux/export.h>
  24. #include <linux/slab.h>
  25. #include <linux/genalloc.h>
  26. #include <linux/dma-mapping.h>
  27. #include <linux/dma-contiguous.h>
  28. #include <linux/vmalloc.h>
  29. #include <linux/swiotlb.h>
  30. #include <asm/cacheflush.h>
  31. static int swiotlb __ro_after_init;
  32. static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
  33. bool coherent)
  34. {
  35. if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
  36. return pgprot_writecombine(prot);
  37. return prot;
  38. }
  39. static struct gen_pool *atomic_pool;
  40. #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
  41. static size_t atomic_pool_size __initdata = DEFAULT_DMA_COHERENT_POOL_SIZE;
  42. static int __init early_coherent_pool(char *p)
  43. {
  44. atomic_pool_size = memparse(p, &p);
  45. return 0;
  46. }
  47. early_param("coherent_pool", early_coherent_pool);
  48. static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
  49. {
  50. unsigned long val;
  51. void *ptr = NULL;
  52. if (!atomic_pool) {
  53. WARN(1, "coherent pool not initialised!\n");
  54. return NULL;
  55. }
  56. val = gen_pool_alloc(atomic_pool, size);
  57. if (val) {
  58. phys_addr_t phys = gen_pool_virt_to_phys(atomic_pool, val);
  59. *ret_page = phys_to_page(phys);
  60. ptr = (void *)val;
  61. memset(ptr, 0, size);
  62. }
  63. return ptr;
  64. }
  65. static bool __in_atomic_pool(void *start, size_t size)
  66. {
  67. return addr_in_gen_pool(atomic_pool, (unsigned long)start, size);
  68. }
  69. static int __free_from_pool(void *start, size_t size)
  70. {
  71. if (!__in_atomic_pool(start, size))
  72. return 0;
  73. gen_pool_free(atomic_pool, (unsigned long)start, size);
  74. return 1;
  75. }
  76. static void *__dma_alloc_coherent(struct device *dev, size_t size,
  77. dma_addr_t *dma_handle, gfp_t flags,
  78. unsigned long attrs)
  79. {
  80. if (dev == NULL) {
  81. WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
  82. return NULL;
  83. }
  84. if (IS_ENABLED(CONFIG_ZONE_DMA) &&
  85. dev->coherent_dma_mask <= DMA_BIT_MASK(32))
  86. flags |= GFP_DMA;
  87. if (dev_get_cma_area(dev) && gfpflags_allow_blocking(flags)) {
  88. struct page *page;
  89. void *addr;
  90. page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
  91. get_order(size));
  92. if (!page)
  93. return NULL;
  94. *dma_handle = phys_to_dma(dev, page_to_phys(page));
  95. addr = page_address(page);
  96. memset(addr, 0, size);
  97. return addr;
  98. } else {
  99. return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
  100. }
  101. }
  102. static void __dma_free_coherent(struct device *dev, size_t size,
  103. void *vaddr, dma_addr_t dma_handle,
  104. unsigned long attrs)
  105. {
  106. bool freed;
  107. phys_addr_t paddr = dma_to_phys(dev, dma_handle);
  108. if (dev == NULL) {
  109. WARN_ONCE(1, "Use an actual device structure for DMA allocation\n");
  110. return;
  111. }
  112. freed = dma_release_from_contiguous(dev,
  113. phys_to_page(paddr),
  114. size >> PAGE_SHIFT);
  115. if (!freed)
  116. swiotlb_free_coherent(dev, size, vaddr, dma_handle);
  117. }
  118. static void *__dma_alloc(struct device *dev, size_t size,
  119. dma_addr_t *dma_handle, gfp_t flags,
  120. unsigned long attrs)
  121. {
  122. struct page *page;
  123. void *ptr, *coherent_ptr;
  124. bool coherent = is_device_dma_coherent(dev);
  125. pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
  126. size = PAGE_ALIGN(size);
  127. if (!coherent && !gfpflags_allow_blocking(flags)) {
  128. struct page *page = NULL;
  129. void *addr = __alloc_from_pool(size, &page, flags);
  130. if (addr)
  131. *dma_handle = phys_to_dma(dev, page_to_phys(page));
  132. return addr;
  133. }
  134. ptr = __dma_alloc_coherent(dev, size, dma_handle, flags, attrs);
  135. if (!ptr)
  136. goto no_mem;
  137. /* no need for non-cacheable mapping if coherent */
  138. if (coherent)
  139. return ptr;
  140. /* remove any dirty cache lines on the kernel alias */
  141. __dma_flush_area(ptr, size);
  142. /* create a coherent mapping */
  143. page = virt_to_page(ptr);
  144. coherent_ptr = dma_common_contiguous_remap(page, size, VM_USERMAP,
  145. prot, NULL);
  146. if (!coherent_ptr)
  147. goto no_map;
  148. return coherent_ptr;
  149. no_map:
  150. __dma_free_coherent(dev, size, ptr, *dma_handle, attrs);
  151. no_mem:
  152. *dma_handle = DMA_ERROR_CODE;
  153. return NULL;
  154. }
  155. static void __dma_free(struct device *dev, size_t size,
  156. void *vaddr, dma_addr_t dma_handle,
  157. unsigned long attrs)
  158. {
  159. void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
  160. size = PAGE_ALIGN(size);
  161. if (!is_device_dma_coherent(dev)) {
  162. if (__free_from_pool(vaddr, size))
  163. return;
  164. vunmap(vaddr);
  165. }
  166. __dma_free_coherent(dev, size, swiotlb_addr, dma_handle, attrs);
  167. }
  168. static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
  169. unsigned long offset, size_t size,
  170. enum dma_data_direction dir,
  171. unsigned long attrs)
  172. {
  173. dma_addr_t dev_addr;
  174. dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
  175. if (!is_device_dma_coherent(dev))
  176. __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
  177. return dev_addr;
  178. }
  179. static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
  180. size_t size, enum dma_data_direction dir,
  181. unsigned long attrs)
  182. {
  183. if (!is_device_dma_coherent(dev))
  184. __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
  185. swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
  186. }
  187. static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
  188. int nelems, enum dma_data_direction dir,
  189. unsigned long attrs)
  190. {
  191. struct scatterlist *sg;
  192. int i, ret;
  193. ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
  194. if (!is_device_dma_coherent(dev))
  195. for_each_sg(sgl, sg, ret, i)
  196. __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
  197. sg->length, dir);
  198. return ret;
  199. }
  200. static void __swiotlb_unmap_sg_attrs(struct device *dev,
  201. struct scatterlist *sgl, int nelems,
  202. enum dma_data_direction dir,
  203. unsigned long attrs)
  204. {
  205. struct scatterlist *sg;
  206. int i;
  207. if (!is_device_dma_coherent(dev))
  208. for_each_sg(sgl, sg, nelems, i)
  209. __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
  210. sg->length, dir);
  211. swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
  212. }
  213. static void __swiotlb_sync_single_for_cpu(struct device *dev,
  214. dma_addr_t dev_addr, size_t size,
  215. enum dma_data_direction dir)
  216. {
  217. if (!is_device_dma_coherent(dev))
  218. __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
  219. swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
  220. }
  221. static void __swiotlb_sync_single_for_device(struct device *dev,
  222. dma_addr_t dev_addr, size_t size,
  223. enum dma_data_direction dir)
  224. {
  225. swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
  226. if (!is_device_dma_coherent(dev))
  227. __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
  228. }
  229. static void __swiotlb_sync_sg_for_cpu(struct device *dev,
  230. struct scatterlist *sgl, int nelems,
  231. enum dma_data_direction dir)
  232. {
  233. struct scatterlist *sg;
  234. int i;
  235. if (!is_device_dma_coherent(dev))
  236. for_each_sg(sgl, sg, nelems, i)
  237. __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
  238. sg->length, dir);
  239. swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
  240. }
  241. static void __swiotlb_sync_sg_for_device(struct device *dev,
  242. struct scatterlist *sgl, int nelems,
  243. enum dma_data_direction dir)
  244. {
  245. struct scatterlist *sg;
  246. int i;
  247. swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
  248. if (!is_device_dma_coherent(dev))
  249. for_each_sg(sgl, sg, nelems, i)
  250. __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
  251. sg->length, dir);
  252. }
  253. static int __swiotlb_mmap(struct device *dev,
  254. struct vm_area_struct *vma,
  255. void *cpu_addr, dma_addr_t dma_addr, size_t size,
  256. unsigned long attrs)
  257. {
  258. int ret = -ENXIO;
  259. unsigned long nr_vma_pages = (vma->vm_end - vma->vm_start) >>
  260. PAGE_SHIFT;
  261. unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
  262. unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
  263. unsigned long off = vma->vm_pgoff;
  264. vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
  265. is_device_dma_coherent(dev));
  266. if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
  267. return ret;
  268. if (off < nr_pages && nr_vma_pages <= (nr_pages - off)) {
  269. ret = remap_pfn_range(vma, vma->vm_start,
  270. pfn + off,
  271. vma->vm_end - vma->vm_start,
  272. vma->vm_page_prot);
  273. }
  274. return ret;
  275. }
  276. static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
  277. void *cpu_addr, dma_addr_t handle, size_t size,
  278. unsigned long attrs)
  279. {
  280. int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
  281. if (!ret)
  282. sg_set_page(sgt->sgl, phys_to_page(dma_to_phys(dev, handle)),
  283. PAGE_ALIGN(size), 0);
  284. return ret;
  285. }
  286. static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
  287. {
  288. if (swiotlb)
  289. return swiotlb_dma_supported(hwdev, mask);
  290. return 1;
  291. }
  292. static struct dma_map_ops swiotlb_dma_ops = {
  293. .alloc = __dma_alloc,
  294. .free = __dma_free,
  295. .mmap = __swiotlb_mmap,
  296. .get_sgtable = __swiotlb_get_sgtable,
  297. .map_page = __swiotlb_map_page,
  298. .unmap_page = __swiotlb_unmap_page,
  299. .map_sg = __swiotlb_map_sg_attrs,
  300. .unmap_sg = __swiotlb_unmap_sg_attrs,
  301. .sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
  302. .sync_single_for_device = __swiotlb_sync_single_for_device,
  303. .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
  304. .sync_sg_for_device = __swiotlb_sync_sg_for_device,
  305. .dma_supported = __swiotlb_dma_supported,
  306. .mapping_error = swiotlb_dma_mapping_error,
  307. };
  308. static int __init atomic_pool_init(void)
  309. {
  310. pgprot_t prot = __pgprot(PROT_NORMAL_NC);
  311. unsigned long nr_pages = atomic_pool_size >> PAGE_SHIFT;
  312. struct page *page;
  313. void *addr;
  314. unsigned int pool_size_order = get_order(atomic_pool_size);
  315. if (dev_get_cma_area(NULL))
  316. page = dma_alloc_from_contiguous(NULL, nr_pages,
  317. pool_size_order);
  318. else
  319. page = alloc_pages(GFP_DMA, pool_size_order);
  320. if (page) {
  321. int ret;
  322. void *page_addr = page_address(page);
  323. memset(page_addr, 0, atomic_pool_size);
  324. __dma_flush_area(page_addr, atomic_pool_size);
  325. atomic_pool = gen_pool_create(PAGE_SHIFT, -1);
  326. if (!atomic_pool)
  327. goto free_page;
  328. addr = dma_common_contiguous_remap(page, atomic_pool_size,
  329. VM_USERMAP, prot, atomic_pool_init);
  330. if (!addr)
  331. goto destroy_genpool;
  332. ret = gen_pool_add_virt(atomic_pool, (unsigned long)addr,
  333. page_to_phys(page),
  334. atomic_pool_size, -1);
  335. if (ret)
  336. goto remove_mapping;
  337. gen_pool_set_algo(atomic_pool,
  338. gen_pool_first_fit_order_align,
  339. (void *)PAGE_SHIFT);
  340. pr_info("DMA: preallocated %zu KiB pool for atomic allocations\n",
  341. atomic_pool_size / 1024);
  342. return 0;
  343. }
  344. goto out;
  345. remove_mapping:
  346. dma_common_free_remap(addr, atomic_pool_size, VM_USERMAP);
  347. destroy_genpool:
  348. gen_pool_destroy(atomic_pool);
  349. atomic_pool = NULL;
  350. free_page:
  351. if (!dma_release_from_contiguous(NULL, page, nr_pages))
  352. __free_pages(page, pool_size_order);
  353. out:
  354. pr_err("DMA: failed to allocate %zu KiB pool for atomic coherent allocation\n",
  355. atomic_pool_size / 1024);
  356. return -ENOMEM;
  357. }
  358. /********************************************
  359. * The following APIs are for dummy DMA ops *
  360. ********************************************/
  361. static void *__dummy_alloc(struct device *dev, size_t size,
  362. dma_addr_t *dma_handle, gfp_t flags,
  363. unsigned long attrs)
  364. {
  365. return NULL;
  366. }
  367. static void __dummy_free(struct device *dev, size_t size,
  368. void *vaddr, dma_addr_t dma_handle,
  369. unsigned long attrs)
  370. {
  371. }
  372. static int __dummy_mmap(struct device *dev,
  373. struct vm_area_struct *vma,
  374. void *cpu_addr, dma_addr_t dma_addr, size_t size,
  375. unsigned long attrs)
  376. {
  377. return -ENXIO;
  378. }
  379. static dma_addr_t __dummy_map_page(struct device *dev, struct page *page,
  380. unsigned long offset, size_t size,
  381. enum dma_data_direction dir,
  382. unsigned long attrs)
  383. {
  384. return DMA_ERROR_CODE;
  385. }
  386. static void __dummy_unmap_page(struct device *dev, dma_addr_t dev_addr,
  387. size_t size, enum dma_data_direction dir,
  388. unsigned long attrs)
  389. {
  390. }
  391. static int __dummy_map_sg(struct device *dev, struct scatterlist *sgl,
  392. int nelems, enum dma_data_direction dir,
  393. unsigned long attrs)
  394. {
  395. return 0;
  396. }
  397. static void __dummy_unmap_sg(struct device *dev,
  398. struct scatterlist *sgl, int nelems,
  399. enum dma_data_direction dir,
  400. unsigned long attrs)
  401. {
  402. }
  403. static void __dummy_sync_single(struct device *dev,
  404. dma_addr_t dev_addr, size_t size,
  405. enum dma_data_direction dir)
  406. {
  407. }
  408. static void __dummy_sync_sg(struct device *dev,
  409. struct scatterlist *sgl, int nelems,
  410. enum dma_data_direction dir)
  411. {
  412. }
  413. static int __dummy_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
  414. {
  415. return 1;
  416. }
  417. static int __dummy_dma_supported(struct device *hwdev, u64 mask)
  418. {
  419. return 0;
  420. }
  421. struct dma_map_ops dummy_dma_ops = {
  422. .alloc = __dummy_alloc,
  423. .free = __dummy_free,
  424. .mmap = __dummy_mmap,
  425. .map_page = __dummy_map_page,
  426. .unmap_page = __dummy_unmap_page,
  427. .map_sg = __dummy_map_sg,
  428. .unmap_sg = __dummy_unmap_sg,
  429. .sync_single_for_cpu = __dummy_sync_single,
  430. .sync_single_for_device = __dummy_sync_single,
  431. .sync_sg_for_cpu = __dummy_sync_sg,
  432. .sync_sg_for_device = __dummy_sync_sg,
  433. .mapping_error = __dummy_mapping_error,
  434. .dma_supported = __dummy_dma_supported,
  435. };
  436. EXPORT_SYMBOL(dummy_dma_ops);
  437. static int __init arm64_dma_init(void)
  438. {
  439. if (swiotlb_force == SWIOTLB_FORCE ||
  440. max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
  441. swiotlb = 1;
  442. return atomic_pool_init();
  443. }
  444. arch_initcall(arm64_dma_init);
  445. #define PREALLOC_DMA_DEBUG_ENTRIES 4096
  446. static int __init dma_debug_do_init(void)
  447. {
  448. dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
  449. return 0;
  450. }
  451. fs_initcall(dma_debug_do_init);
  452. #ifdef CONFIG_IOMMU_DMA
  453. #include <linux/dma-iommu.h>
  454. #include <linux/platform_device.h>
  455. #include <linux/amba/bus.h>
  456. /* Thankfully, all cache ops are by VA so we can ignore phys here */
  457. static void flush_page(struct device *dev, const void *virt, phys_addr_t phys)
  458. {
  459. __dma_flush_area(virt, PAGE_SIZE);
  460. }
  461. static void *__iommu_alloc_attrs(struct device *dev, size_t size,
  462. dma_addr_t *handle, gfp_t gfp,
  463. unsigned long attrs)
  464. {
  465. bool coherent = is_device_dma_coherent(dev);
  466. int ioprot = dma_direction_to_prot(DMA_BIDIRECTIONAL, coherent);
  467. size_t iosize = size;
  468. void *addr;
  469. if (WARN(!dev, "cannot create IOMMU mapping for unknown device\n"))
  470. return NULL;
  471. size = PAGE_ALIGN(size);
  472. /*
  473. * Some drivers rely on this, and we probably don't want the
  474. * possibility of stale kernel data being read by devices anyway.
  475. */
  476. gfp |= __GFP_ZERO;
  477. if (gfpflags_allow_blocking(gfp)) {
  478. struct page **pages;
  479. pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
  480. pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
  481. handle, flush_page);
  482. if (!pages)
  483. return NULL;
  484. addr = dma_common_pages_remap(pages, size, VM_USERMAP, prot,
  485. __builtin_return_address(0));
  486. if (!addr)
  487. iommu_dma_free(dev, pages, iosize, handle);
  488. } else {
  489. struct page *page;
  490. /*
  491. * In atomic context we can't remap anything, so we'll only
  492. * get the virtually contiguous buffer we need by way of a
  493. * physically contiguous allocation.
  494. */
  495. if (coherent) {
  496. page = alloc_pages(gfp, get_order(size));
  497. addr = page ? page_address(page) : NULL;
  498. } else {
  499. addr = __alloc_from_pool(size, &page, gfp);
  500. }
  501. if (!addr)
  502. return NULL;
  503. *handle = iommu_dma_map_page(dev, page, 0, iosize, ioprot);
  504. if (iommu_dma_mapping_error(dev, *handle)) {
  505. if (coherent)
  506. __free_pages(page, get_order(size));
  507. else
  508. __free_from_pool(addr, size);
  509. addr = NULL;
  510. }
  511. }
  512. return addr;
  513. }
  514. static void __iommu_free_attrs(struct device *dev, size_t size, void *cpu_addr,
  515. dma_addr_t handle, unsigned long attrs)
  516. {
  517. size_t iosize = size;
  518. size = PAGE_ALIGN(size);
  519. /*
  520. * @cpu_addr will be one of 3 things depending on how it was allocated:
  521. * - A remapped array of pages from iommu_dma_alloc(), for all
  522. * non-atomic allocations.
  523. * - A non-cacheable alias from the atomic pool, for atomic
  524. * allocations by non-coherent devices.
  525. * - A normal lowmem address, for atomic allocations by
  526. * coherent devices.
  527. * Hence how dodgy the below logic looks...
  528. */
  529. if (__in_atomic_pool(cpu_addr, size)) {
  530. iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
  531. __free_from_pool(cpu_addr, size);
  532. } else if (is_vmalloc_addr(cpu_addr)){
  533. struct vm_struct *area = find_vm_area(cpu_addr);
  534. if (WARN_ON(!area || !area->pages))
  535. return;
  536. iommu_dma_free(dev, area->pages, iosize, &handle);
  537. dma_common_free_remap(cpu_addr, size, VM_USERMAP);
  538. } else {
  539. iommu_dma_unmap_page(dev, handle, iosize, 0, 0);
  540. __free_pages(virt_to_page(cpu_addr), get_order(size));
  541. }
  542. }
  543. static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
  544. void *cpu_addr, dma_addr_t dma_addr, size_t size,
  545. unsigned long attrs)
  546. {
  547. struct vm_struct *area;
  548. int ret;
  549. vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
  550. is_device_dma_coherent(dev));
  551. if (dma_mmap_from_coherent(dev, vma, cpu_addr, size, &ret))
  552. return ret;
  553. area = find_vm_area(cpu_addr);
  554. if (WARN_ON(!area || !area->pages))
  555. return -ENXIO;
  556. return iommu_dma_mmap(area->pages, size, vma);
  557. }
  558. static int __iommu_get_sgtable(struct device *dev, struct sg_table *sgt,
  559. void *cpu_addr, dma_addr_t dma_addr,
  560. size_t size, unsigned long attrs)
  561. {
  562. unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
  563. struct vm_struct *area = find_vm_area(cpu_addr);
  564. if (WARN_ON(!area || !area->pages))
  565. return -ENXIO;
  566. return sg_alloc_table_from_pages(sgt, area->pages, count, 0, size,
  567. GFP_KERNEL);
  568. }
  569. static void __iommu_sync_single_for_cpu(struct device *dev,
  570. dma_addr_t dev_addr, size_t size,
  571. enum dma_data_direction dir)
  572. {
  573. phys_addr_t phys;
  574. if (is_device_dma_coherent(dev))
  575. return;
  576. phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
  577. __dma_unmap_area(phys_to_virt(phys), size, dir);
  578. }
  579. static void __iommu_sync_single_for_device(struct device *dev,
  580. dma_addr_t dev_addr, size_t size,
  581. enum dma_data_direction dir)
  582. {
  583. phys_addr_t phys;
  584. if (is_device_dma_coherent(dev))
  585. return;
  586. phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
  587. __dma_map_area(phys_to_virt(phys), size, dir);
  588. }
  589. static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
  590. unsigned long offset, size_t size,
  591. enum dma_data_direction dir,
  592. unsigned long attrs)
  593. {
  594. bool coherent = is_device_dma_coherent(dev);
  595. int prot = dma_direction_to_prot(dir, coherent);
  596. dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
  597. if (!iommu_dma_mapping_error(dev, dev_addr) &&
  598. (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  599. __iommu_sync_single_for_device(dev, dev_addr, size, dir);
  600. return dev_addr;
  601. }
  602. static void __iommu_unmap_page(struct device *dev, dma_addr_t dev_addr,
  603. size_t size, enum dma_data_direction dir,
  604. unsigned long attrs)
  605. {
  606. if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  607. __iommu_sync_single_for_cpu(dev, dev_addr, size, dir);
  608. iommu_dma_unmap_page(dev, dev_addr, size, dir, attrs);
  609. }
  610. static void __iommu_sync_sg_for_cpu(struct device *dev,
  611. struct scatterlist *sgl, int nelems,
  612. enum dma_data_direction dir)
  613. {
  614. struct scatterlist *sg;
  615. int i;
  616. if (is_device_dma_coherent(dev))
  617. return;
  618. for_each_sg(sgl, sg, nelems, i)
  619. __dma_unmap_area(sg_virt(sg), sg->length, dir);
  620. }
  621. static void __iommu_sync_sg_for_device(struct device *dev,
  622. struct scatterlist *sgl, int nelems,
  623. enum dma_data_direction dir)
  624. {
  625. struct scatterlist *sg;
  626. int i;
  627. if (is_device_dma_coherent(dev))
  628. return;
  629. for_each_sg(sgl, sg, nelems, i)
  630. __dma_map_area(sg_virt(sg), sg->length, dir);
  631. }
  632. static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
  633. int nelems, enum dma_data_direction dir,
  634. unsigned long attrs)
  635. {
  636. bool coherent = is_device_dma_coherent(dev);
  637. if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  638. __iommu_sync_sg_for_device(dev, sgl, nelems, dir);
  639. return iommu_dma_map_sg(dev, sgl, nelems,
  640. dma_direction_to_prot(dir, coherent));
  641. }
  642. static void __iommu_unmap_sg_attrs(struct device *dev,
  643. struct scatterlist *sgl, int nelems,
  644. enum dma_data_direction dir,
  645. unsigned long attrs)
  646. {
  647. if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
  648. __iommu_sync_sg_for_cpu(dev, sgl, nelems, dir);
  649. iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs);
  650. }
  651. static struct dma_map_ops iommu_dma_ops = {
  652. .alloc = __iommu_alloc_attrs,
  653. .free = __iommu_free_attrs,
  654. .mmap = __iommu_mmap_attrs,
  655. .get_sgtable = __iommu_get_sgtable,
  656. .map_page = __iommu_map_page,
  657. .unmap_page = __iommu_unmap_page,
  658. .map_sg = __iommu_map_sg_attrs,
  659. .unmap_sg = __iommu_unmap_sg_attrs,
  660. .sync_single_for_cpu = __iommu_sync_single_for_cpu,
  661. .sync_single_for_device = __iommu_sync_single_for_device,
  662. .sync_sg_for_cpu = __iommu_sync_sg_for_cpu,
  663. .sync_sg_for_device = __iommu_sync_sg_for_device,
  664. .map_resource = iommu_dma_map_resource,
  665. .unmap_resource = iommu_dma_unmap_resource,
  666. .dma_supported = iommu_dma_supported,
  667. .mapping_error = iommu_dma_mapping_error,
  668. };
  669. /*
  670. * TODO: Right now __iommu_setup_dma_ops() gets called too early to do
  671. * everything it needs to - the device is only partially created and the
  672. * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we
  673. * need this delayed attachment dance. Once IOMMU probe ordering is sorted
  674. * to move the arch_setup_dma_ops() call later, all the notifier bits below
  675. * become unnecessary, and will go away.
  676. */
  677. struct iommu_dma_notifier_data {
  678. struct list_head list;
  679. struct device *dev;
  680. const struct iommu_ops *ops;
  681. u64 dma_base;
  682. u64 size;
  683. };
  684. static LIST_HEAD(iommu_dma_masters);
  685. static DEFINE_MUTEX(iommu_dma_notifier_lock);
  686. static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops,
  687. u64 dma_base, u64 size)
  688. {
  689. struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
  690. /*
  691. * If the IOMMU driver has the DMA domain support that we require,
  692. * then the IOMMU core will have already configured a group for this
  693. * device, and allocated the default domain for that group.
  694. */
  695. if (!domain || iommu_dma_init_domain(domain, dma_base, size, dev)) {
  696. pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
  697. dev_name(dev));
  698. return false;
  699. }
  700. dev->archdata.dma_ops = &iommu_dma_ops;
  701. return true;
  702. }
  703. static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops,
  704. u64 dma_base, u64 size)
  705. {
  706. struct iommu_dma_notifier_data *iommudata;
  707. iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL);
  708. if (!iommudata)
  709. return;
  710. iommudata->dev = dev;
  711. iommudata->ops = ops;
  712. iommudata->dma_base = dma_base;
  713. iommudata->size = size;
  714. mutex_lock(&iommu_dma_notifier_lock);
  715. list_add(&iommudata->list, &iommu_dma_masters);
  716. mutex_unlock(&iommu_dma_notifier_lock);
  717. }
  718. static int __iommu_attach_notifier(struct notifier_block *nb,
  719. unsigned long action, void *data)
  720. {
  721. struct iommu_dma_notifier_data *master, *tmp;
  722. if (action != BUS_NOTIFY_BIND_DRIVER)
  723. return 0;
  724. mutex_lock(&iommu_dma_notifier_lock);
  725. list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) {
  726. if (data == master->dev && do_iommu_attach(master->dev,
  727. master->ops, master->dma_base, master->size)) {
  728. list_del(&master->list);
  729. kfree(master);
  730. break;
  731. }
  732. }
  733. mutex_unlock(&iommu_dma_notifier_lock);
  734. return 0;
  735. }
  736. static int __init register_iommu_dma_ops_notifier(struct bus_type *bus)
  737. {
  738. struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL);
  739. int ret;
  740. if (!nb)
  741. return -ENOMEM;
  742. nb->notifier_call = __iommu_attach_notifier;
  743. ret = bus_register_notifier(bus, nb);
  744. if (ret) {
  745. pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n",
  746. bus->name);
  747. kfree(nb);
  748. }
  749. return ret;
  750. }
  751. static int __init __iommu_dma_init(void)
  752. {
  753. int ret;
  754. ret = iommu_dma_init();
  755. if (!ret)
  756. ret = register_iommu_dma_ops_notifier(&platform_bus_type);
  757. if (!ret)
  758. ret = register_iommu_dma_ops_notifier(&amba_bustype);
  759. #ifdef CONFIG_PCI
  760. if (!ret)
  761. ret = register_iommu_dma_ops_notifier(&pci_bus_type);
  762. #endif
  763. return ret;
  764. }
  765. arch_initcall(__iommu_dma_init);
  766. static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
  767. const struct iommu_ops *ops)
  768. {
  769. struct iommu_group *group;
  770. if (!ops)
  771. return;
  772. /*
  773. * TODO: As a concession to the future, we're ready to handle being
  774. * called both early and late (i.e. after bus_add_device). Once all
  775. * the platform bus code is reworked to call us late and the notifier
  776. * junk above goes away, move the body of do_iommu_attach here.
  777. */
  778. group = iommu_group_get(dev);
  779. if (group) {
  780. do_iommu_attach(dev, ops, dma_base, size);
  781. iommu_group_put(group);
  782. } else {
  783. queue_iommu_attach(dev, ops, dma_base, size);
  784. }
  785. }
  786. void arch_teardown_dma_ops(struct device *dev)
  787. {
  788. dev->archdata.dma_ops = NULL;
  789. }
  790. #else
  791. static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
  792. const struct iommu_ops *iommu)
  793. { }
  794. #endif /* CONFIG_IOMMU_DMA */
  795. void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
  796. const struct iommu_ops *iommu, bool coherent)
  797. {
  798. if (!dev->archdata.dma_ops)
  799. dev->archdata.dma_ops = &swiotlb_dma_ops;
  800. dev->archdata.dma_coherent = coherent;
  801. __iommu_setup_dma_ops(dev, dma_base, size, iommu);
  802. }