dma.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. // SPDX-License-Identifier: GPL-2.0
  2. // Copyright (C) 2005-2017 Andes Technology Corporation
  3. #include <linux/types.h>
  4. #include <linux/mm.h>
  5. #include <linux/export.h>
  6. #include <linux/string.h>
  7. #include <linux/scatterlist.h>
  8. #include <linux/dma-mapping.h>
  9. #include <linux/io.h>
  10. #include <linux/cache.h>
  11. #include <linux/highmem.h>
  12. #include <linux/slab.h>
  13. #include <asm/cacheflush.h>
  14. #include <asm/tlbflush.h>
  15. #include <asm/dma-mapping.h>
  16. #include <asm/proc-fns.h>
  17. /*
  18. * This is the page table (2MB) covering uncached, DMA consistent allocations
  19. */
  20. static pte_t *consistent_pte;
  21. static DEFINE_RAW_SPINLOCK(consistent_lock);
  22. enum master_type {
  23. FOR_CPU = 0,
  24. FOR_DEVICE = 1,
  25. };
  26. /*
  27. * VM region handling support.
  28. *
  29. * This should become something generic, handling VM region allocations for
  30. * vmalloc and similar (ioremap, module space, etc).
  31. *
  32. * I envisage vmalloc()'s supporting vm_struct becoming:
  33. *
  34. * struct vm_struct {
  35. * struct vm_region region;
  36. * unsigned long flags;
  37. * struct page **pages;
  38. * unsigned int nr_pages;
  39. * unsigned long phys_addr;
  40. * };
  41. *
  42. * get_vm_area() would then call vm_region_alloc with an appropriate
  43. * struct vm_region head (eg):
  44. *
  45. * struct vm_region vmalloc_head = {
  46. * .vm_list = LIST_HEAD_INIT(vmalloc_head.vm_list),
  47. * .vm_start = VMALLOC_START,
  48. * .vm_end = VMALLOC_END,
  49. * };
  50. *
  51. * However, vmalloc_head.vm_start is variable (typically, it is dependent on
  52. * the amount of RAM found at boot time.) I would imagine that get_vm_area()
  53. * would have to initialise this each time prior to calling vm_region_alloc().
  54. */
  55. struct arch_vm_region {
  56. struct list_head vm_list;
  57. unsigned long vm_start;
  58. unsigned long vm_end;
  59. struct page *vm_pages;
  60. };
  61. static struct arch_vm_region consistent_head = {
  62. .vm_list = LIST_HEAD_INIT(consistent_head.vm_list),
  63. .vm_start = CONSISTENT_BASE,
  64. .vm_end = CONSISTENT_END,
  65. };
  66. static struct arch_vm_region *vm_region_alloc(struct arch_vm_region *head,
  67. size_t size, int gfp)
  68. {
  69. unsigned long addr = head->vm_start, end = head->vm_end - size;
  70. unsigned long flags;
  71. struct arch_vm_region *c, *new;
  72. new = kmalloc(sizeof(struct arch_vm_region), gfp);
  73. if (!new)
  74. goto out;
  75. raw_spin_lock_irqsave(&consistent_lock, flags);
  76. list_for_each_entry(c, &head->vm_list, vm_list) {
  77. if ((addr + size) < addr)
  78. goto nospc;
  79. if ((addr + size) <= c->vm_start)
  80. goto found;
  81. addr = c->vm_end;
  82. if (addr > end)
  83. goto nospc;
  84. }
  85. found:
  86. /*
  87. * Insert this entry _before_ the one we found.
  88. */
  89. list_add_tail(&new->vm_list, &c->vm_list);
  90. new->vm_start = addr;
  91. new->vm_end = addr + size;
  92. raw_spin_unlock_irqrestore(&consistent_lock, flags);
  93. return new;
  94. nospc:
  95. raw_spin_unlock_irqrestore(&consistent_lock, flags);
  96. kfree(new);
  97. out:
  98. return NULL;
  99. }
  100. static struct arch_vm_region *vm_region_find(struct arch_vm_region *head,
  101. unsigned long addr)
  102. {
  103. struct arch_vm_region *c;
  104. list_for_each_entry(c, &head->vm_list, vm_list) {
  105. if (c->vm_start == addr)
  106. goto out;
  107. }
  108. c = NULL;
  109. out:
  110. return c;
  111. }
  112. /* FIXME: attrs is not used. */
  113. static void *nds32_dma_alloc_coherent(struct device *dev, size_t size,
  114. dma_addr_t * handle, gfp_t gfp,
  115. unsigned long attrs)
  116. {
  117. struct page *page;
  118. struct arch_vm_region *c;
  119. unsigned long order;
  120. u64 mask = ~0ULL, limit;
  121. pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
  122. if (!consistent_pte) {
  123. pr_err("%s: not initialized\n", __func__);
  124. dump_stack();
  125. return NULL;
  126. }
  127. if (dev) {
  128. mask = dev->coherent_dma_mask;
  129. /*
  130. * Sanity check the DMA mask - it must be non-zero, and
  131. * must be able to be satisfied by a DMA allocation.
  132. */
  133. if (mask == 0) {
  134. dev_warn(dev, "coherent DMA mask is unset\n");
  135. goto no_page;
  136. }
  137. }
  138. /*
  139. * Sanity check the allocation size.
  140. */
  141. size = PAGE_ALIGN(size);
  142. limit = (mask + 1) & ~mask;
  143. if ((limit && size >= limit) ||
  144. size >= (CONSISTENT_END - CONSISTENT_BASE)) {
  145. pr_warn("coherent allocation too big "
  146. "(requested %#x mask %#llx)\n", size, mask);
  147. goto no_page;
  148. }
  149. order = get_order(size);
  150. if (mask != 0xffffffff)
  151. gfp |= GFP_DMA;
  152. page = alloc_pages(gfp, order);
  153. if (!page)
  154. goto no_page;
  155. /*
  156. * Invalidate any data that might be lurking in the
  157. * kernel direct-mapped region for device DMA.
  158. */
  159. {
  160. unsigned long kaddr = (unsigned long)page_address(page);
  161. memset(page_address(page), 0, size);
  162. cpu_dma_wbinval_range(kaddr, kaddr + size);
  163. }
  164. /*
  165. * Allocate a virtual address in the consistent mapping region.
  166. */
  167. c = vm_region_alloc(&consistent_head, size,
  168. gfp & ~(__GFP_DMA | __GFP_HIGHMEM));
  169. if (c) {
  170. pte_t *pte = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
  171. struct page *end = page + (1 << order);
  172. c->vm_pages = page;
  173. /*
  174. * Set the "dma handle"
  175. */
  176. *handle = page_to_phys(page);
  177. do {
  178. BUG_ON(!pte_none(*pte));
  179. /*
  180. * x86 does not mark the pages reserved...
  181. */
  182. SetPageReserved(page);
  183. set_pte(pte, mk_pte(page, prot));
  184. page++;
  185. pte++;
  186. } while (size -= PAGE_SIZE);
  187. /*
  188. * Free the otherwise unused pages.
  189. */
  190. while (page < end) {
  191. __free_page(page);
  192. page++;
  193. }
  194. return (void *)c->vm_start;
  195. }
  196. if (page)
  197. __free_pages(page, order);
  198. no_page:
  199. *handle = ~0;
  200. return NULL;
  201. }
  202. static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr,
  203. dma_addr_t handle, unsigned long attrs)
  204. {
  205. struct arch_vm_region *c;
  206. unsigned long flags, addr;
  207. pte_t *ptep;
  208. size = PAGE_ALIGN(size);
  209. raw_spin_lock_irqsave(&consistent_lock, flags);
  210. c = vm_region_find(&consistent_head, (unsigned long)cpu_addr);
  211. if (!c)
  212. goto no_area;
  213. if ((c->vm_end - c->vm_start) != size) {
  214. pr_err("%s: freeing wrong coherent size (%ld != %d)\n",
  215. __func__, c->vm_end - c->vm_start, size);
  216. dump_stack();
  217. size = c->vm_end - c->vm_start;
  218. }
  219. ptep = consistent_pte + CONSISTENT_OFFSET(c->vm_start);
  220. addr = c->vm_start;
  221. do {
  222. pte_t pte = ptep_get_and_clear(&init_mm, addr, ptep);
  223. unsigned long pfn;
  224. ptep++;
  225. addr += PAGE_SIZE;
  226. if (!pte_none(pte) && pte_present(pte)) {
  227. pfn = pte_pfn(pte);
  228. if (pfn_valid(pfn)) {
  229. struct page *page = pfn_to_page(pfn);
  230. /*
  231. * x86 does not mark the pages reserved...
  232. */
  233. ClearPageReserved(page);
  234. __free_page(page);
  235. continue;
  236. }
  237. }
  238. pr_crit("%s: bad page in kernel page table\n", __func__);
  239. } while (size -= PAGE_SIZE);
  240. flush_tlb_kernel_range(c->vm_start, c->vm_end);
  241. list_del(&c->vm_list);
  242. raw_spin_unlock_irqrestore(&consistent_lock, flags);
  243. kfree(c);
  244. return;
  245. no_area:
  246. raw_spin_unlock_irqrestore(&consistent_lock, flags);
  247. pr_err("%s: trying to free invalid coherent area: %p\n",
  248. __func__, cpu_addr);
  249. dump_stack();
  250. }
  251. /*
  252. * Initialise the consistent memory allocation.
  253. */
  254. static int __init consistent_init(void)
  255. {
  256. pgd_t *pgd;
  257. pmd_t *pmd;
  258. pte_t *pte;
  259. int ret = 0;
  260. do {
  261. pgd = pgd_offset(&init_mm, CONSISTENT_BASE);
  262. pmd = pmd_alloc(&init_mm, pgd, CONSISTENT_BASE);
  263. if (!pmd) {
  264. pr_err("%s: no pmd tables\n", __func__);
  265. ret = -ENOMEM;
  266. break;
  267. }
  268. /* The first level mapping may be created in somewhere.
  269. * It's not necessary to warn here. */
  270. /* WARN_ON(!pmd_none(*pmd)); */
  271. pte = pte_alloc_kernel(pmd, CONSISTENT_BASE);
  272. if (!pte) {
  273. ret = -ENOMEM;
  274. break;
  275. }
  276. consistent_pte = pte;
  277. } while (0);
  278. return ret;
  279. }
  280. core_initcall(consistent_init);
  281. static void consistent_sync(void *vaddr, size_t size, int direction, int master_type);
  282. static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page,
  283. unsigned long offset, size_t size,
  284. enum dma_data_direction dir,
  285. unsigned long attrs)
  286. {
  287. if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
  288. consistent_sync((void *)(page_address(page) + offset), size, dir, FOR_DEVICE);
  289. return page_to_phys(page) + offset;
  290. }
  291. static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle,
  292. size_t size, enum dma_data_direction dir,
  293. unsigned long attrs)
  294. {
  295. if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
  296. consistent_sync(phys_to_virt(handle), size, dir, FOR_CPU);
  297. }
  298. /*
  299. * Make an area consistent for devices.
  300. */
  301. static void consistent_sync(void *vaddr, size_t size, int direction, int master_type)
  302. {
  303. unsigned long start = (unsigned long)vaddr;
  304. unsigned long end = start + size;
  305. if (master_type == FOR_CPU) {
  306. switch (direction) {
  307. case DMA_TO_DEVICE:
  308. break;
  309. case DMA_FROM_DEVICE:
  310. case DMA_BIDIRECTIONAL:
  311. cpu_dma_inval_range(start, end);
  312. break;
  313. default:
  314. BUG();
  315. }
  316. } else {
  317. /* FOR_DEVICE */
  318. switch (direction) {
  319. case DMA_FROM_DEVICE:
  320. break;
  321. case DMA_TO_DEVICE:
  322. case DMA_BIDIRECTIONAL:
  323. cpu_dma_wb_range(start, end);
  324. break;
  325. default:
  326. BUG();
  327. }
  328. }
  329. }
  330. static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg,
  331. int nents, enum dma_data_direction dir,
  332. unsigned long attrs)
  333. {
  334. int i;
  335. for (i = 0; i < nents; i++, sg++) {
  336. void *virt;
  337. unsigned long pfn;
  338. struct page *page = sg_page(sg);
  339. sg->dma_address = sg_phys(sg);
  340. pfn = page_to_pfn(page) + sg->offset / PAGE_SIZE;
  341. page = pfn_to_page(pfn);
  342. if (PageHighMem(page)) {
  343. virt = kmap_atomic(page);
  344. consistent_sync(virt, sg->length, dir, FOR_CPU);
  345. kunmap_atomic(virt);
  346. } else {
  347. if (sg->offset > PAGE_SIZE)
  348. panic("sg->offset:%08x > PAGE_SIZE\n",
  349. sg->offset);
  350. virt = page_address(page) + sg->offset;
  351. consistent_sync(virt, sg->length, dir, FOR_CPU);
  352. }
  353. }
  354. return nents;
  355. }
  356. static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
  357. int nhwentries, enum dma_data_direction dir,
  358. unsigned long attrs)
  359. {
  360. }
  361. static void
  362. nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle,
  363. size_t size, enum dma_data_direction dir)
  364. {
  365. consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_CPU);
  366. }
  367. static void
  368. nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle,
  369. size_t size, enum dma_data_direction dir)
  370. {
  371. consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_DEVICE);
  372. }
  373. static void
  374. nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents,
  375. enum dma_data_direction dir)
  376. {
  377. int i;
  378. for (i = 0; i < nents; i++, sg++) {
  379. char *virt =
  380. page_address((struct page *)sg->page_link) + sg->offset;
  381. consistent_sync(virt, sg->length, dir, FOR_CPU);
  382. }
  383. }
  384. static void
  385. nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
  386. int nents, enum dma_data_direction dir)
  387. {
  388. int i;
  389. for (i = 0; i < nents; i++, sg++) {
  390. char *virt =
  391. page_address((struct page *)sg->page_link) + sg->offset;
  392. consistent_sync(virt, sg->length, dir, FOR_DEVICE);
  393. }
  394. }
  395. struct dma_map_ops nds32_dma_ops = {
  396. .alloc = nds32_dma_alloc_coherent,
  397. .free = nds32_dma_free,
  398. .map_page = nds32_dma_map_page,
  399. .unmap_page = nds32_dma_unmap_page,
  400. .map_sg = nds32_dma_map_sg,
  401. .unmap_sg = nds32_dma_unmap_sg,
  402. .sync_single_for_device = nds32_dma_sync_single_for_device,
  403. .sync_single_for_cpu = nds32_dma_sync_single_for_cpu,
  404. .sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu,
  405. .sync_sg_for_device = nds32_dma_sync_sg_for_device,
  406. };
  407. EXPORT_SYMBOL(nds32_dma_ops);