scif_rma.c 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775
  1. /*
  2. * Intel MIC Platform Software Stack (MPSS)
  3. *
  4. * Copyright(c) 2015 Intel Corporation.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License, version 2, as
  8. * published by the Free Software Foundation.
  9. *
  10. * This program is distributed in the hope that it will be useful, but
  11. * WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * General Public License for more details.
  14. *
  15. * Intel SCIF driver.
  16. *
  17. */
  18. #include <linux/dma_remapping.h>
  19. #include <linux/pagemap.h>
  20. #include <linux/sched/mm.h>
  21. #include "scif_main.h"
  22. #include "scif_map.h"
  23. /* Used to skip ulimit checks for registrations with SCIF_MAP_KERNEL flag */
  24. #define SCIF_MAP_ULIMIT 0x40
  25. bool scif_ulimit_check = 1;
  26. /**
  27. * scif_rma_ep_init:
  28. * @ep: end point
  29. *
  30. * Initialize RMA per EP data structures.
  31. */
  32. void scif_rma_ep_init(struct scif_endpt *ep)
  33. {
  34. struct scif_endpt_rma_info *rma = &ep->rma_info;
  35. mutex_init(&rma->rma_lock);
  36. init_iova_domain(&rma->iovad, PAGE_SIZE, SCIF_IOVA_START_PFN,
  37. SCIF_DMA_64BIT_PFN);
  38. spin_lock_init(&rma->tc_lock);
  39. mutex_init(&rma->mmn_lock);
  40. INIT_LIST_HEAD(&rma->reg_list);
  41. INIT_LIST_HEAD(&rma->remote_reg_list);
  42. atomic_set(&rma->tw_refcount, 0);
  43. atomic_set(&rma->tcw_refcount, 0);
  44. atomic_set(&rma->tcw_total_pages, 0);
  45. atomic_set(&rma->fence_refcount, 0);
  46. rma->async_list_del = 0;
  47. rma->dma_chan = NULL;
  48. INIT_LIST_HEAD(&rma->mmn_list);
  49. INIT_LIST_HEAD(&rma->vma_list);
  50. init_waitqueue_head(&rma->markwq);
  51. }
  52. /**
  53. * scif_rma_ep_can_uninit:
  54. * @ep: end point
  55. *
  56. * Returns 1 if an endpoint can be uninitialized and 0 otherwise.
  57. */
  58. int scif_rma_ep_can_uninit(struct scif_endpt *ep)
  59. {
  60. int ret = 0;
  61. mutex_lock(&ep->rma_info.rma_lock);
  62. /* Destroy RMA Info only if both lists are empty */
  63. if (list_empty(&ep->rma_info.reg_list) &&
  64. list_empty(&ep->rma_info.remote_reg_list) &&
  65. list_empty(&ep->rma_info.mmn_list) &&
  66. !atomic_read(&ep->rma_info.tw_refcount) &&
  67. !atomic_read(&ep->rma_info.tcw_refcount) &&
  68. !atomic_read(&ep->rma_info.fence_refcount))
  69. ret = 1;
  70. mutex_unlock(&ep->rma_info.rma_lock);
  71. return ret;
  72. }
  73. /**
  74. * scif_create_pinned_pages:
  75. * @nr_pages: number of pages in window
  76. * @prot: read/write protection
  77. *
  78. * Allocate and prepare a set of pinned pages.
  79. */
  80. static struct scif_pinned_pages *
  81. scif_create_pinned_pages(int nr_pages, int prot)
  82. {
  83. struct scif_pinned_pages *pin;
  84. might_sleep();
  85. pin = scif_zalloc(sizeof(*pin));
  86. if (!pin)
  87. goto error;
  88. pin->pages = scif_zalloc(nr_pages * sizeof(*pin->pages));
  89. if (!pin->pages)
  90. goto error_free_pinned_pages;
  91. pin->prot = prot;
  92. pin->magic = SCIFEP_MAGIC;
  93. return pin;
  94. error_free_pinned_pages:
  95. scif_free(pin, sizeof(*pin));
  96. error:
  97. return NULL;
  98. }
  99. /**
  100. * scif_destroy_pinned_pages:
  101. * @pin: A set of pinned pages.
  102. *
  103. * Deallocate resources for pinned pages.
  104. */
  105. static int scif_destroy_pinned_pages(struct scif_pinned_pages *pin)
  106. {
  107. int j;
  108. int writeable = pin->prot & SCIF_PROT_WRITE;
  109. int kernel = SCIF_MAP_KERNEL & pin->map_flags;
  110. for (j = 0; j < pin->nr_pages; j++) {
  111. if (pin->pages[j] && !kernel) {
  112. if (writeable)
  113. SetPageDirty(pin->pages[j]);
  114. put_page(pin->pages[j]);
  115. }
  116. }
  117. scif_free(pin->pages,
  118. pin->nr_pages * sizeof(*pin->pages));
  119. scif_free(pin, sizeof(*pin));
  120. return 0;
  121. }
  122. /*
  123. * scif_create_window:
  124. * @ep: end point
  125. * @nr_pages: number of pages
  126. * @offset: registration offset
  127. * @temp: true if a temporary window is being created
  128. *
  129. * Allocate and prepare a self registration window.
  130. */
  131. struct scif_window *scif_create_window(struct scif_endpt *ep, int nr_pages,
  132. s64 offset, bool temp)
  133. {
  134. struct scif_window *window;
  135. might_sleep();
  136. window = scif_zalloc(sizeof(*window));
  137. if (!window)
  138. goto error;
  139. window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
  140. if (!window->dma_addr)
  141. goto error_free_window;
  142. window->num_pages = scif_zalloc(nr_pages * sizeof(*window->num_pages));
  143. if (!window->num_pages)
  144. goto error_free_window;
  145. window->offset = offset;
  146. window->ep = (u64)ep;
  147. window->magic = SCIFEP_MAGIC;
  148. window->reg_state = OP_IDLE;
  149. init_waitqueue_head(&window->regwq);
  150. window->unreg_state = OP_IDLE;
  151. init_waitqueue_head(&window->unregwq);
  152. INIT_LIST_HEAD(&window->list);
  153. window->type = SCIF_WINDOW_SELF;
  154. window->temp = temp;
  155. return window;
  156. error_free_window:
  157. scif_free(window->dma_addr,
  158. nr_pages * sizeof(*window->dma_addr));
  159. scif_free(window, sizeof(*window));
  160. error:
  161. return NULL;
  162. }
  163. /**
  164. * scif_destroy_incomplete_window:
  165. * @ep: end point
  166. * @window: registration window
  167. *
  168. * Deallocate resources for self window.
  169. */
  170. static void scif_destroy_incomplete_window(struct scif_endpt *ep,
  171. struct scif_window *window)
  172. {
  173. int err;
  174. int nr_pages = window->nr_pages;
  175. struct scif_allocmsg *alloc = &window->alloc_handle;
  176. struct scifmsg msg;
  177. retry:
  178. /* Wait for a SCIF_ALLOC_GNT/REJ message */
  179. err = wait_event_timeout(alloc->allocwq,
  180. alloc->state != OP_IN_PROGRESS,
  181. SCIF_NODE_ALIVE_TIMEOUT);
  182. if (!err && scifdev_alive(ep))
  183. goto retry;
  184. mutex_lock(&ep->rma_info.rma_lock);
  185. if (alloc->state == OP_COMPLETED) {
  186. msg.uop = SCIF_FREE_VIRT;
  187. msg.src = ep->port;
  188. msg.payload[0] = ep->remote_ep;
  189. msg.payload[1] = window->alloc_handle.vaddr;
  190. msg.payload[2] = (u64)window;
  191. msg.payload[3] = SCIF_REGISTER;
  192. _scif_nodeqp_send(ep->remote_dev, &msg);
  193. }
  194. mutex_unlock(&ep->rma_info.rma_lock);
  195. scif_free_window_offset(ep, window, window->offset);
  196. scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
  197. scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
  198. scif_free(window, sizeof(*window));
  199. }
  200. /**
  201. * scif_unmap_window:
  202. * @remote_dev: SCIF remote device
  203. * @window: registration window
  204. *
  205. * Delete any DMA mappings created for a registered self window
  206. */
  207. void scif_unmap_window(struct scif_dev *remote_dev, struct scif_window *window)
  208. {
  209. int j;
  210. if (scif_is_iommu_enabled() && !scifdev_self(remote_dev)) {
  211. if (window->st) {
  212. dma_unmap_sg(&remote_dev->sdev->dev,
  213. window->st->sgl, window->st->nents,
  214. DMA_BIDIRECTIONAL);
  215. sg_free_table(window->st);
  216. kfree(window->st);
  217. window->st = NULL;
  218. }
  219. } else {
  220. for (j = 0; j < window->nr_contig_chunks; j++) {
  221. if (window->dma_addr[j]) {
  222. scif_unmap_single(window->dma_addr[j],
  223. remote_dev,
  224. window->num_pages[j] <<
  225. PAGE_SHIFT);
  226. window->dma_addr[j] = 0x0;
  227. }
  228. }
  229. }
  230. }
  231. static inline struct mm_struct *__scif_acquire_mm(void)
  232. {
  233. if (scif_ulimit_check)
  234. return get_task_mm(current);
  235. return NULL;
  236. }
  237. static inline void __scif_release_mm(struct mm_struct *mm)
  238. {
  239. if (mm)
  240. mmput(mm);
  241. }
  242. static inline int
  243. __scif_dec_pinned_vm_lock(struct mm_struct *mm,
  244. int nr_pages, bool try_lock)
  245. {
  246. if (!mm || !nr_pages || !scif_ulimit_check)
  247. return 0;
  248. if (try_lock) {
  249. if (!down_write_trylock(&mm->mmap_sem)) {
  250. dev_err(scif_info.mdev.this_device,
  251. "%s %d err\n", __func__, __LINE__);
  252. return -1;
  253. }
  254. } else {
  255. down_write(&mm->mmap_sem);
  256. }
  257. mm->pinned_vm -= nr_pages;
  258. up_write(&mm->mmap_sem);
  259. return 0;
  260. }
  261. static inline int __scif_check_inc_pinned_vm(struct mm_struct *mm,
  262. int nr_pages)
  263. {
  264. unsigned long locked, lock_limit;
  265. if (!mm || !nr_pages || !scif_ulimit_check)
  266. return 0;
  267. locked = nr_pages;
  268. locked += mm->pinned_vm;
  269. lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
  270. if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
  271. dev_err(scif_info.mdev.this_device,
  272. "locked(%lu) > lock_limit(%lu)\n",
  273. locked, lock_limit);
  274. return -ENOMEM;
  275. }
  276. mm->pinned_vm = locked;
  277. return 0;
  278. }
  279. /**
  280. * scif_destroy_window:
  281. * @ep: end point
  282. * @window: registration window
  283. *
  284. * Deallocate resources for self window.
  285. */
  286. int scif_destroy_window(struct scif_endpt *ep, struct scif_window *window)
  287. {
  288. int j;
  289. struct scif_pinned_pages *pinned_pages = window->pinned_pages;
  290. int nr_pages = window->nr_pages;
  291. might_sleep();
  292. if (!window->temp && window->mm) {
  293. __scif_dec_pinned_vm_lock(window->mm, window->nr_pages, 0);
  294. __scif_release_mm(window->mm);
  295. window->mm = NULL;
  296. }
  297. scif_free_window_offset(ep, window, window->offset);
  298. scif_unmap_window(ep->remote_dev, window);
  299. /*
  300. * Decrement references for this set of pinned pages from
  301. * this window.
  302. */
  303. j = atomic_sub_return(1, &pinned_pages->ref_count);
  304. if (j < 0)
  305. dev_err(scif_info.mdev.this_device,
  306. "%s %d incorrect ref count %d\n",
  307. __func__, __LINE__, j);
  308. /*
  309. * If the ref count for pinned_pages is zero then someone
  310. * has already called scif_unpin_pages() for it and we should
  311. * destroy the page cache.
  312. */
  313. if (!j)
  314. scif_destroy_pinned_pages(window->pinned_pages);
  315. scif_free(window->dma_addr, nr_pages * sizeof(*window->dma_addr));
  316. scif_free(window->num_pages, nr_pages * sizeof(*window->num_pages));
  317. window->magic = 0;
  318. scif_free(window, sizeof(*window));
  319. return 0;
  320. }
  321. /**
  322. * scif_create_remote_lookup:
  323. * @remote_dev: SCIF remote device
  324. * @window: remote window
  325. *
  326. * Allocate and prepare lookup entries for the remote
  327. * end to copy over the physical addresses.
  328. * Returns 0 on success and appropriate errno on failure.
  329. */
  330. static int scif_create_remote_lookup(struct scif_dev *remote_dev,
  331. struct scif_window *window)
  332. {
  333. int i, j, err = 0;
  334. int nr_pages = window->nr_pages;
  335. bool vmalloc_dma_phys, vmalloc_num_pages;
  336. might_sleep();
  337. /* Map window */
  338. err = scif_map_single(&window->mapped_offset,
  339. window, remote_dev, sizeof(*window));
  340. if (err)
  341. goto error_window;
  342. /* Compute the number of lookup entries. 21 == 2MB Shift */
  343. window->nr_lookup = ALIGN(nr_pages * PAGE_SIZE,
  344. ((2) * 1024 * 1024)) >> 21;
  345. window->dma_addr_lookup.lookup =
  346. scif_alloc_coherent(&window->dma_addr_lookup.offset,
  347. remote_dev, window->nr_lookup *
  348. sizeof(*window->dma_addr_lookup.lookup),
  349. GFP_KERNEL | __GFP_ZERO);
  350. if (!window->dma_addr_lookup.lookup) {
  351. err = -ENOMEM;
  352. goto error_window;
  353. }
  354. window->num_pages_lookup.lookup =
  355. scif_alloc_coherent(&window->num_pages_lookup.offset,
  356. remote_dev, window->nr_lookup *
  357. sizeof(*window->num_pages_lookup.lookup),
  358. GFP_KERNEL | __GFP_ZERO);
  359. if (!window->num_pages_lookup.lookup) {
  360. err = -ENOMEM;
  361. goto error_window;
  362. }
  363. vmalloc_dma_phys = is_vmalloc_addr(&window->dma_addr[0]);
  364. vmalloc_num_pages = is_vmalloc_addr(&window->num_pages[0]);
  365. /* Now map each of the pages containing physical addresses */
  366. for (i = 0, j = 0; i < nr_pages; i += SCIF_NR_ADDR_IN_PAGE, j++) {
  367. err = scif_map_page(&window->dma_addr_lookup.lookup[j],
  368. vmalloc_dma_phys ?
  369. vmalloc_to_page(&window->dma_addr[i]) :
  370. virt_to_page(&window->dma_addr[i]),
  371. remote_dev);
  372. if (err)
  373. goto error_window;
  374. err = scif_map_page(&window->num_pages_lookup.lookup[j],
  375. vmalloc_dma_phys ?
  376. vmalloc_to_page(&window->num_pages[i]) :
  377. virt_to_page(&window->num_pages[i]),
  378. remote_dev);
  379. if (err)
  380. goto error_window;
  381. }
  382. return 0;
  383. error_window:
  384. return err;
  385. }
  386. /**
  387. * scif_destroy_remote_lookup:
  388. * @remote_dev: SCIF remote device
  389. * @window: remote window
  390. *
  391. * Destroy lookup entries used for the remote
  392. * end to copy over the physical addresses.
  393. */
  394. static void scif_destroy_remote_lookup(struct scif_dev *remote_dev,
  395. struct scif_window *window)
  396. {
  397. int i, j;
  398. if (window->nr_lookup) {
  399. struct scif_rma_lookup *lup = &window->dma_addr_lookup;
  400. struct scif_rma_lookup *npup = &window->num_pages_lookup;
  401. for (i = 0, j = 0; i < window->nr_pages;
  402. i += SCIF_NR_ADDR_IN_PAGE, j++) {
  403. if (lup->lookup && lup->lookup[j])
  404. scif_unmap_single(lup->lookup[j],
  405. remote_dev,
  406. PAGE_SIZE);
  407. if (npup->lookup && npup->lookup[j])
  408. scif_unmap_single(npup->lookup[j],
  409. remote_dev,
  410. PAGE_SIZE);
  411. }
  412. if (lup->lookup)
  413. scif_free_coherent(lup->lookup, lup->offset,
  414. remote_dev, window->nr_lookup *
  415. sizeof(*lup->lookup));
  416. if (npup->lookup)
  417. scif_free_coherent(npup->lookup, npup->offset,
  418. remote_dev, window->nr_lookup *
  419. sizeof(*npup->lookup));
  420. if (window->mapped_offset)
  421. scif_unmap_single(window->mapped_offset,
  422. remote_dev, sizeof(*window));
  423. window->nr_lookup = 0;
  424. }
  425. }
  426. /**
  427. * scif_create_remote_window:
  428. * @ep: end point
  429. * @nr_pages: number of pages in window
  430. *
  431. * Allocate and prepare a remote registration window.
  432. */
  433. static struct scif_window *
  434. scif_create_remote_window(struct scif_dev *scifdev, int nr_pages)
  435. {
  436. struct scif_window *window;
  437. might_sleep();
  438. window = scif_zalloc(sizeof(*window));
  439. if (!window)
  440. goto error_ret;
  441. window->magic = SCIFEP_MAGIC;
  442. window->nr_pages = nr_pages;
  443. window->dma_addr = scif_zalloc(nr_pages * sizeof(*window->dma_addr));
  444. if (!window->dma_addr)
  445. goto error_window;
  446. window->num_pages = scif_zalloc(nr_pages *
  447. sizeof(*window->num_pages));
  448. if (!window->num_pages)
  449. goto error_window;
  450. if (scif_create_remote_lookup(scifdev, window))
  451. goto error_window;
  452. window->type = SCIF_WINDOW_PEER;
  453. window->unreg_state = OP_IDLE;
  454. INIT_LIST_HEAD(&window->list);
  455. return window;
  456. error_window:
  457. scif_destroy_remote_window(window);
  458. error_ret:
  459. return NULL;
  460. }
  461. /**
  462. * scif_destroy_remote_window:
  463. * @ep: end point
  464. * @window: remote registration window
  465. *
  466. * Deallocate resources for remote window.
  467. */
  468. void
  469. scif_destroy_remote_window(struct scif_window *window)
  470. {
  471. scif_free(window->dma_addr, window->nr_pages *
  472. sizeof(*window->dma_addr));
  473. scif_free(window->num_pages, window->nr_pages *
  474. sizeof(*window->num_pages));
  475. window->magic = 0;
  476. scif_free(window, sizeof(*window));
  477. }
  478. /**
  479. * scif_iommu_map: create DMA mappings if the IOMMU is enabled
  480. * @remote_dev: SCIF remote device
  481. * @window: remote registration window
  482. *
  483. * Map the physical pages using dma_map_sg(..) and then detect the number
  484. * of contiguous DMA mappings allocated
  485. */
  486. static int scif_iommu_map(struct scif_dev *remote_dev,
  487. struct scif_window *window)
  488. {
  489. struct scatterlist *sg;
  490. int i, err;
  491. scif_pinned_pages_t pin = window->pinned_pages;
  492. window->st = kzalloc(sizeof(*window->st), GFP_KERNEL);
  493. if (!window->st)
  494. return -ENOMEM;
  495. err = sg_alloc_table(window->st, window->nr_pages, GFP_KERNEL);
  496. if (err)
  497. return err;
  498. for_each_sg(window->st->sgl, sg, window->st->nents, i)
  499. sg_set_page(sg, pin->pages[i], PAGE_SIZE, 0x0);
  500. err = dma_map_sg(&remote_dev->sdev->dev, window->st->sgl,
  501. window->st->nents, DMA_BIDIRECTIONAL);
  502. if (!err)
  503. return -ENOMEM;
  504. /* Detect contiguous ranges of DMA mappings */
  505. sg = window->st->sgl;
  506. for (i = 0; sg; i++) {
  507. dma_addr_t last_da;
  508. window->dma_addr[i] = sg_dma_address(sg);
  509. window->num_pages[i] = sg_dma_len(sg) >> PAGE_SHIFT;
  510. last_da = sg_dma_address(sg) + sg_dma_len(sg);
  511. while ((sg = sg_next(sg)) && sg_dma_address(sg) == last_da) {
  512. window->num_pages[i] +=
  513. (sg_dma_len(sg) >> PAGE_SHIFT);
  514. last_da = window->dma_addr[i] +
  515. sg_dma_len(sg);
  516. }
  517. window->nr_contig_chunks++;
  518. }
  519. return 0;
  520. }
  521. /**
  522. * scif_map_window:
  523. * @remote_dev: SCIF remote device
  524. * @window: self registration window
  525. *
  526. * Map pages of a window into the aperture/PCI.
  527. * Also determine addresses required for DMA.
  528. */
  529. int
  530. scif_map_window(struct scif_dev *remote_dev, struct scif_window *window)
  531. {
  532. int i, j, k, err = 0, nr_contig_pages;
  533. scif_pinned_pages_t pin;
  534. phys_addr_t phys_prev, phys_curr;
  535. might_sleep();
  536. pin = window->pinned_pages;
  537. if (intel_iommu_enabled && !scifdev_self(remote_dev))
  538. return scif_iommu_map(remote_dev, window);
  539. for (i = 0, j = 0; i < window->nr_pages; i += nr_contig_pages, j++) {
  540. phys_prev = page_to_phys(pin->pages[i]);
  541. nr_contig_pages = 1;
  542. /* Detect physically contiguous chunks */
  543. for (k = i + 1; k < window->nr_pages; k++) {
  544. phys_curr = page_to_phys(pin->pages[k]);
  545. if (phys_curr != (phys_prev + PAGE_SIZE))
  546. break;
  547. phys_prev = phys_curr;
  548. nr_contig_pages++;
  549. }
  550. window->num_pages[j] = nr_contig_pages;
  551. window->nr_contig_chunks++;
  552. if (scif_is_mgmt_node()) {
  553. /*
  554. * Management node has to deal with SMPT on X100 and
  555. * hence the DMA mapping is required
  556. */
  557. err = scif_map_single(&window->dma_addr[j],
  558. phys_to_virt(page_to_phys(
  559. pin->pages[i])),
  560. remote_dev,
  561. nr_contig_pages << PAGE_SHIFT);
  562. if (err)
  563. return err;
  564. } else {
  565. window->dma_addr[j] = page_to_phys(pin->pages[i]);
  566. }
  567. }
  568. return err;
  569. }
  570. /**
  571. * scif_send_scif_unregister:
  572. * @ep: end point
  573. * @window: self registration window
  574. *
  575. * Send a SCIF_UNREGISTER message.
  576. */
  577. static int scif_send_scif_unregister(struct scif_endpt *ep,
  578. struct scif_window *window)
  579. {
  580. struct scifmsg msg;
  581. msg.uop = SCIF_UNREGISTER;
  582. msg.src = ep->port;
  583. msg.payload[0] = window->alloc_handle.vaddr;
  584. msg.payload[1] = (u64)window;
  585. return scif_nodeqp_send(ep->remote_dev, &msg);
  586. }
  587. /**
  588. * scif_unregister_window:
  589. * @window: self registration window
  590. *
  591. * Send an unregistration request and wait for a response.
  592. */
  593. int scif_unregister_window(struct scif_window *window)
  594. {
  595. int err = 0;
  596. struct scif_endpt *ep = (struct scif_endpt *)window->ep;
  597. bool send_msg = false;
  598. might_sleep();
  599. switch (window->unreg_state) {
  600. case OP_IDLE:
  601. {
  602. window->unreg_state = OP_IN_PROGRESS;
  603. send_msg = true;
  604. /* fall through */
  605. }
  606. case OP_IN_PROGRESS:
  607. {
  608. scif_get_window(window, 1);
  609. mutex_unlock(&ep->rma_info.rma_lock);
  610. if (send_msg) {
  611. err = scif_send_scif_unregister(ep, window);
  612. if (err) {
  613. window->unreg_state = OP_COMPLETED;
  614. goto done;
  615. }
  616. } else {
  617. /* Return ENXIO since unregistration is in progress */
  618. mutex_lock(&ep->rma_info.rma_lock);
  619. return -ENXIO;
  620. }
  621. retry:
  622. /* Wait for a SCIF_UNREGISTER_(N)ACK message */
  623. err = wait_event_timeout(window->unregwq,
  624. window->unreg_state != OP_IN_PROGRESS,
  625. SCIF_NODE_ALIVE_TIMEOUT);
  626. if (!err && scifdev_alive(ep))
  627. goto retry;
  628. if (!err) {
  629. err = -ENODEV;
  630. window->unreg_state = OP_COMPLETED;
  631. dev_err(scif_info.mdev.this_device,
  632. "%s %d err %d\n", __func__, __LINE__, err);
  633. }
  634. if (err > 0)
  635. err = 0;
  636. done:
  637. mutex_lock(&ep->rma_info.rma_lock);
  638. scif_put_window(window, 1);
  639. break;
  640. }
  641. case OP_FAILED:
  642. {
  643. if (!scifdev_alive(ep)) {
  644. err = -ENODEV;
  645. window->unreg_state = OP_COMPLETED;
  646. }
  647. break;
  648. }
  649. case OP_COMPLETED:
  650. break;
  651. default:
  652. err = -ENODEV;
  653. }
  654. if (window->unreg_state == OP_COMPLETED && window->ref_count)
  655. scif_put_window(window, window->nr_pages);
  656. if (!window->ref_count) {
  657. atomic_inc(&ep->rma_info.tw_refcount);
  658. list_del_init(&window->list);
  659. scif_free_window_offset(ep, window, window->offset);
  660. mutex_unlock(&ep->rma_info.rma_lock);
  661. if ((!!(window->pinned_pages->map_flags & SCIF_MAP_KERNEL)) &&
  662. scifdev_alive(ep)) {
  663. scif_drain_dma_intr(ep->remote_dev->sdev,
  664. ep->rma_info.dma_chan);
  665. } else {
  666. if (!__scif_dec_pinned_vm_lock(window->mm,
  667. window->nr_pages, 1)) {
  668. __scif_release_mm(window->mm);
  669. window->mm = NULL;
  670. }
  671. }
  672. scif_queue_for_cleanup(window, &scif_info.rma);
  673. mutex_lock(&ep->rma_info.rma_lock);
  674. }
  675. return err;
  676. }
  677. /**
  678. * scif_send_alloc_request:
  679. * @ep: end point
  680. * @window: self registration window
  681. *
  682. * Send a remote window allocation request
  683. */
  684. static int scif_send_alloc_request(struct scif_endpt *ep,
  685. struct scif_window *window)
  686. {
  687. struct scifmsg msg;
  688. struct scif_allocmsg *alloc = &window->alloc_handle;
  689. /* Set up the Alloc Handle */
  690. alloc->state = OP_IN_PROGRESS;
  691. init_waitqueue_head(&alloc->allocwq);
  692. /* Send out an allocation request */
  693. msg.uop = SCIF_ALLOC_REQ;
  694. msg.payload[1] = window->nr_pages;
  695. msg.payload[2] = (u64)&window->alloc_handle;
  696. return _scif_nodeqp_send(ep->remote_dev, &msg);
  697. }
  698. /**
  699. * scif_prep_remote_window:
  700. * @ep: end point
  701. * @window: self registration window
  702. *
  703. * Send a remote window allocation request, wait for an allocation response,
  704. * and prepares the remote window by copying over the page lists
  705. */
  706. static int scif_prep_remote_window(struct scif_endpt *ep,
  707. struct scif_window *window)
  708. {
  709. struct scifmsg msg;
  710. struct scif_window *remote_window;
  711. struct scif_allocmsg *alloc = &window->alloc_handle;
  712. dma_addr_t *dma_phys_lookup, *tmp, *num_pages_lookup, *tmp1;
  713. int i = 0, j = 0;
  714. int nr_contig_chunks, loop_nr_contig_chunks;
  715. int remaining_nr_contig_chunks, nr_lookup;
  716. int err, map_err;
  717. map_err = scif_map_window(ep->remote_dev, window);
  718. if (map_err)
  719. dev_err(&ep->remote_dev->sdev->dev,
  720. "%s %d map_err %d\n", __func__, __LINE__, map_err);
  721. remaining_nr_contig_chunks = window->nr_contig_chunks;
  722. nr_contig_chunks = window->nr_contig_chunks;
  723. retry:
  724. /* Wait for a SCIF_ALLOC_GNT/REJ message */
  725. err = wait_event_timeout(alloc->allocwq,
  726. alloc->state != OP_IN_PROGRESS,
  727. SCIF_NODE_ALIVE_TIMEOUT);
  728. mutex_lock(&ep->rma_info.rma_lock);
  729. /* Synchronize with the thread waking up allocwq */
  730. mutex_unlock(&ep->rma_info.rma_lock);
  731. if (!err && scifdev_alive(ep))
  732. goto retry;
  733. if (!err)
  734. err = -ENODEV;
  735. if (err > 0)
  736. err = 0;
  737. else
  738. return err;
  739. /* Bail out. The remote end rejected this request */
  740. if (alloc->state == OP_FAILED)
  741. return -ENOMEM;
  742. if (map_err) {
  743. dev_err(&ep->remote_dev->sdev->dev,
  744. "%s %d err %d\n", __func__, __LINE__, map_err);
  745. msg.uop = SCIF_FREE_VIRT;
  746. msg.src = ep->port;
  747. msg.payload[0] = ep->remote_ep;
  748. msg.payload[1] = window->alloc_handle.vaddr;
  749. msg.payload[2] = (u64)window;
  750. msg.payload[3] = SCIF_REGISTER;
  751. spin_lock(&ep->lock);
  752. if (ep->state == SCIFEP_CONNECTED)
  753. err = _scif_nodeqp_send(ep->remote_dev, &msg);
  754. else
  755. err = -ENOTCONN;
  756. spin_unlock(&ep->lock);
  757. return err;
  758. }
  759. remote_window = scif_ioremap(alloc->phys_addr, sizeof(*window),
  760. ep->remote_dev);
  761. /* Compute the number of lookup entries. 21 == 2MB Shift */
  762. nr_lookup = ALIGN(nr_contig_chunks, SCIF_NR_ADDR_IN_PAGE)
  763. >> ilog2(SCIF_NR_ADDR_IN_PAGE);
  764. dma_phys_lookup =
  765. scif_ioremap(remote_window->dma_addr_lookup.offset,
  766. nr_lookup *
  767. sizeof(*remote_window->dma_addr_lookup.lookup),
  768. ep->remote_dev);
  769. num_pages_lookup =
  770. scif_ioremap(remote_window->num_pages_lookup.offset,
  771. nr_lookup *
  772. sizeof(*remote_window->num_pages_lookup.lookup),
  773. ep->remote_dev);
  774. while (remaining_nr_contig_chunks) {
  775. loop_nr_contig_chunks = min_t(int, remaining_nr_contig_chunks,
  776. (int)SCIF_NR_ADDR_IN_PAGE);
  777. /* #1/2 - Copy physical addresses over to the remote side */
  778. /* #2/2 - Copy DMA addresses (addresses that are fed into the
  779. * DMA engine) We transfer bus addresses which are then
  780. * converted into a MIC physical address on the remote
  781. * side if it is a MIC, if the remote node is a mgmt node we
  782. * transfer the MIC physical address
  783. */
  784. tmp = scif_ioremap(dma_phys_lookup[j],
  785. loop_nr_contig_chunks *
  786. sizeof(*window->dma_addr),
  787. ep->remote_dev);
  788. tmp1 = scif_ioremap(num_pages_lookup[j],
  789. loop_nr_contig_chunks *
  790. sizeof(*window->num_pages),
  791. ep->remote_dev);
  792. if (scif_is_mgmt_node()) {
  793. memcpy_toio((void __force __iomem *)tmp,
  794. &window->dma_addr[i], loop_nr_contig_chunks
  795. * sizeof(*window->dma_addr));
  796. memcpy_toio((void __force __iomem *)tmp1,
  797. &window->num_pages[i], loop_nr_contig_chunks
  798. * sizeof(*window->num_pages));
  799. } else {
  800. if (scifdev_is_p2p(ep->remote_dev)) {
  801. /*
  802. * add remote node's base address for this node
  803. * to convert it into a MIC address
  804. */
  805. int m;
  806. dma_addr_t dma_addr;
  807. for (m = 0; m < loop_nr_contig_chunks; m++) {
  808. dma_addr = window->dma_addr[i + m] +
  809. ep->remote_dev->base_addr;
  810. writeq(dma_addr,
  811. (void __force __iomem *)&tmp[m]);
  812. }
  813. memcpy_toio((void __force __iomem *)tmp1,
  814. &window->num_pages[i],
  815. loop_nr_contig_chunks
  816. * sizeof(*window->num_pages));
  817. } else {
  818. /* Mgmt node or loopback - transfer DMA
  819. * addresses as is, this is the same as a
  820. * MIC physical address (we use the dma_addr
  821. * and not the phys_addr array since the
  822. * phys_addr is only setup if there is a mmap()
  823. * request from the mgmt node)
  824. */
  825. memcpy_toio((void __force __iomem *)tmp,
  826. &window->dma_addr[i],
  827. loop_nr_contig_chunks *
  828. sizeof(*window->dma_addr));
  829. memcpy_toio((void __force __iomem *)tmp1,
  830. &window->num_pages[i],
  831. loop_nr_contig_chunks *
  832. sizeof(*window->num_pages));
  833. }
  834. }
  835. remaining_nr_contig_chunks -= loop_nr_contig_chunks;
  836. i += loop_nr_contig_chunks;
  837. j++;
  838. scif_iounmap(tmp, loop_nr_contig_chunks *
  839. sizeof(*window->dma_addr), ep->remote_dev);
  840. scif_iounmap(tmp1, loop_nr_contig_chunks *
  841. sizeof(*window->num_pages), ep->remote_dev);
  842. }
  843. /* Prepare the remote window for the peer */
  844. remote_window->peer_window = (u64)window;
  845. remote_window->offset = window->offset;
  846. remote_window->prot = window->prot;
  847. remote_window->nr_contig_chunks = nr_contig_chunks;
  848. remote_window->ep = ep->remote_ep;
  849. scif_iounmap(num_pages_lookup,
  850. nr_lookup *
  851. sizeof(*remote_window->num_pages_lookup.lookup),
  852. ep->remote_dev);
  853. scif_iounmap(dma_phys_lookup,
  854. nr_lookup *
  855. sizeof(*remote_window->dma_addr_lookup.lookup),
  856. ep->remote_dev);
  857. scif_iounmap(remote_window, sizeof(*remote_window), ep->remote_dev);
  858. window->peer_window = alloc->vaddr;
  859. return err;
  860. }
  861. /**
  862. * scif_send_scif_register:
  863. * @ep: end point
  864. * @window: self registration window
  865. *
  866. * Send a SCIF_REGISTER message if EP is connected and wait for a
  867. * SCIF_REGISTER_(N)ACK message else send a SCIF_FREE_VIRT
  868. * message so that the peer can free its remote window allocated earlier.
  869. */
  870. static int scif_send_scif_register(struct scif_endpt *ep,
  871. struct scif_window *window)
  872. {
  873. int err = 0;
  874. struct scifmsg msg;
  875. msg.src = ep->port;
  876. msg.payload[0] = ep->remote_ep;
  877. msg.payload[1] = window->alloc_handle.vaddr;
  878. msg.payload[2] = (u64)window;
  879. spin_lock(&ep->lock);
  880. if (ep->state == SCIFEP_CONNECTED) {
  881. msg.uop = SCIF_REGISTER;
  882. window->reg_state = OP_IN_PROGRESS;
  883. err = _scif_nodeqp_send(ep->remote_dev, &msg);
  884. spin_unlock(&ep->lock);
  885. if (!err) {
  886. retry:
  887. /* Wait for a SCIF_REGISTER_(N)ACK message */
  888. err = wait_event_timeout(window->regwq,
  889. window->reg_state !=
  890. OP_IN_PROGRESS,
  891. SCIF_NODE_ALIVE_TIMEOUT);
  892. if (!err && scifdev_alive(ep))
  893. goto retry;
  894. err = !err ? -ENODEV : 0;
  895. if (window->reg_state == OP_FAILED)
  896. err = -ENOTCONN;
  897. }
  898. } else {
  899. msg.uop = SCIF_FREE_VIRT;
  900. msg.payload[3] = SCIF_REGISTER;
  901. err = _scif_nodeqp_send(ep->remote_dev, &msg);
  902. spin_unlock(&ep->lock);
  903. if (!err)
  904. err = -ENOTCONN;
  905. }
  906. return err;
  907. }
  908. /**
  909. * scif_get_window_offset:
  910. * @ep: end point descriptor
  911. * @flags: flags
  912. * @offset: offset hint
  913. * @num_pages: number of pages
  914. * @out_offset: computed offset returned by reference.
  915. *
  916. * Compute/Claim a new offset for this EP.
  917. */
  918. int scif_get_window_offset(struct scif_endpt *ep, int flags, s64 offset,
  919. int num_pages, s64 *out_offset)
  920. {
  921. s64 page_index;
  922. struct iova *iova_ptr;
  923. int err = 0;
  924. if (flags & SCIF_MAP_FIXED) {
  925. page_index = SCIF_IOVA_PFN(offset);
  926. iova_ptr = reserve_iova(&ep->rma_info.iovad, page_index,
  927. page_index + num_pages - 1);
  928. if (!iova_ptr)
  929. err = -EADDRINUSE;
  930. } else {
  931. iova_ptr = alloc_iova(&ep->rma_info.iovad, num_pages,
  932. SCIF_DMA_63BIT_PFN - 1, 0);
  933. if (!iova_ptr)
  934. err = -ENOMEM;
  935. }
  936. if (!err)
  937. *out_offset = (iova_ptr->pfn_lo) << PAGE_SHIFT;
  938. return err;
  939. }
  940. /**
  941. * scif_free_window_offset:
  942. * @ep: end point descriptor
  943. * @window: registration window
  944. * @offset: Offset to be freed
  945. *
  946. * Free offset for this EP. The callee is supposed to grab
  947. * the RMA mutex before calling this API.
  948. */
  949. void scif_free_window_offset(struct scif_endpt *ep,
  950. struct scif_window *window, s64 offset)
  951. {
  952. if ((window && !window->offset_freed) || !window) {
  953. free_iova(&ep->rma_info.iovad, offset >> PAGE_SHIFT);
  954. if (window)
  955. window->offset_freed = true;
  956. }
  957. }
  958. /**
  959. * scif_alloc_req: Respond to SCIF_ALLOC_REQ interrupt message
  960. * @msg: Interrupt message
  961. *
  962. * Remote side is requesting a memory allocation.
  963. */
  964. void scif_alloc_req(struct scif_dev *scifdev, struct scifmsg *msg)
  965. {
  966. int err;
  967. struct scif_window *window = NULL;
  968. int nr_pages = msg->payload[1];
  969. window = scif_create_remote_window(scifdev, nr_pages);
  970. if (!window) {
  971. err = -ENOMEM;
  972. goto error;
  973. }
  974. /* The peer's allocation request is granted */
  975. msg->uop = SCIF_ALLOC_GNT;
  976. msg->payload[0] = (u64)window;
  977. msg->payload[1] = window->mapped_offset;
  978. err = scif_nodeqp_send(scifdev, msg);
  979. if (err)
  980. scif_destroy_remote_window(window);
  981. return;
  982. error:
  983. /* The peer's allocation request is rejected */
  984. dev_err(&scifdev->sdev->dev,
  985. "%s %d error %d alloc_ptr %p nr_pages 0x%x\n",
  986. __func__, __LINE__, err, window, nr_pages);
  987. msg->uop = SCIF_ALLOC_REJ;
  988. scif_nodeqp_send(scifdev, msg);
  989. }
  990. /**
  991. * scif_alloc_gnt_rej: Respond to SCIF_ALLOC_GNT/REJ interrupt message
  992. * @msg: Interrupt message
  993. *
  994. * Remote side responded to a memory allocation.
  995. */
  996. void scif_alloc_gnt_rej(struct scif_dev *scifdev, struct scifmsg *msg)
  997. {
  998. struct scif_allocmsg *handle = (struct scif_allocmsg *)msg->payload[2];
  999. struct scif_window *window = container_of(handle, struct scif_window,
  1000. alloc_handle);
  1001. struct scif_endpt *ep = (struct scif_endpt *)window->ep;
  1002. mutex_lock(&ep->rma_info.rma_lock);
  1003. handle->vaddr = msg->payload[0];
  1004. handle->phys_addr = msg->payload[1];
  1005. if (msg->uop == SCIF_ALLOC_GNT)
  1006. handle->state = OP_COMPLETED;
  1007. else
  1008. handle->state = OP_FAILED;
  1009. wake_up(&handle->allocwq);
  1010. mutex_unlock(&ep->rma_info.rma_lock);
  1011. }
  1012. /**
  1013. * scif_free_virt: Respond to SCIF_FREE_VIRT interrupt message
  1014. * @msg: Interrupt message
  1015. *
  1016. * Free up memory kmalloc'd earlier.
  1017. */
  1018. void scif_free_virt(struct scif_dev *scifdev, struct scifmsg *msg)
  1019. {
  1020. struct scif_window *window = (struct scif_window *)msg->payload[1];
  1021. scif_destroy_remote_window(window);
  1022. }
  1023. static void
  1024. scif_fixup_aper_base(struct scif_dev *dev, struct scif_window *window)
  1025. {
  1026. int j;
  1027. struct scif_hw_dev *sdev = dev->sdev;
  1028. phys_addr_t apt_base = 0;
  1029. /*
  1030. * Add the aperture base if the DMA address is not card relative
  1031. * since the DMA addresses need to be an offset into the bar
  1032. */
  1033. if (!scifdev_self(dev) && window->type == SCIF_WINDOW_PEER &&
  1034. sdev->aper && !sdev->card_rel_da)
  1035. apt_base = sdev->aper->pa;
  1036. else
  1037. return;
  1038. for (j = 0; j < window->nr_contig_chunks; j++) {
  1039. if (window->num_pages[j])
  1040. window->dma_addr[j] += apt_base;
  1041. else
  1042. break;
  1043. }
  1044. }
  1045. /**
  1046. * scif_recv_reg: Respond to SCIF_REGISTER interrupt message
  1047. * @msg: Interrupt message
  1048. *
  1049. * Update remote window list with a new registered window.
  1050. */
  1051. void scif_recv_reg(struct scif_dev *scifdev, struct scifmsg *msg)
  1052. {
  1053. struct scif_endpt *ep = (struct scif_endpt *)msg->payload[0];
  1054. struct scif_window *window =
  1055. (struct scif_window *)msg->payload[1];
  1056. mutex_lock(&ep->rma_info.rma_lock);
  1057. spin_lock(&ep->lock);
  1058. if (ep->state == SCIFEP_CONNECTED) {
  1059. msg->uop = SCIF_REGISTER_ACK;
  1060. scif_nodeqp_send(ep->remote_dev, msg);
  1061. scif_fixup_aper_base(ep->remote_dev, window);
  1062. /* No further failures expected. Insert new window */
  1063. scif_insert_window(window, &ep->rma_info.remote_reg_list);
  1064. } else {
  1065. msg->uop = SCIF_REGISTER_NACK;
  1066. scif_nodeqp_send(ep->remote_dev, msg);
  1067. }
  1068. spin_unlock(&ep->lock);
  1069. mutex_unlock(&ep->rma_info.rma_lock);
  1070. /* free up any lookup resources now that page lists are transferred */
  1071. scif_destroy_remote_lookup(ep->remote_dev, window);
  1072. /*
  1073. * We could not insert the window but we need to
  1074. * destroy the window.
  1075. */
  1076. if (msg->uop == SCIF_REGISTER_NACK)
  1077. scif_destroy_remote_window(window);
  1078. }
  1079. /**
  1080. * scif_recv_unreg: Respond to SCIF_UNREGISTER interrupt message
  1081. * @msg: Interrupt message
  1082. *
  1083. * Remove window from remote registration list;
  1084. */
  1085. void scif_recv_unreg(struct scif_dev *scifdev, struct scifmsg *msg)
  1086. {
  1087. struct scif_rma_req req;
  1088. struct scif_window *window = NULL;
  1089. struct scif_window *recv_window =
  1090. (struct scif_window *)msg->payload[0];
  1091. struct scif_endpt *ep;
  1092. int del_window = 0;
  1093. ep = (struct scif_endpt *)recv_window->ep;
  1094. req.out_window = &window;
  1095. req.offset = recv_window->offset;
  1096. req.prot = 0;
  1097. req.nr_bytes = recv_window->nr_pages << PAGE_SHIFT;
  1098. req.type = SCIF_WINDOW_FULL;
  1099. req.head = &ep->rma_info.remote_reg_list;
  1100. msg->payload[0] = ep->remote_ep;
  1101. mutex_lock(&ep->rma_info.rma_lock);
  1102. /* Does a valid window exist? */
  1103. if (scif_query_window(&req)) {
  1104. dev_err(&scifdev->sdev->dev,
  1105. "%s %d -ENXIO\n", __func__, __LINE__);
  1106. msg->uop = SCIF_UNREGISTER_ACK;
  1107. goto error;
  1108. }
  1109. if (window) {
  1110. if (window->ref_count)
  1111. scif_put_window(window, window->nr_pages);
  1112. else
  1113. dev_err(&scifdev->sdev->dev,
  1114. "%s %d ref count should be +ve\n",
  1115. __func__, __LINE__);
  1116. window->unreg_state = OP_COMPLETED;
  1117. if (!window->ref_count) {
  1118. msg->uop = SCIF_UNREGISTER_ACK;
  1119. atomic_inc(&ep->rma_info.tw_refcount);
  1120. ep->rma_info.async_list_del = 1;
  1121. list_del_init(&window->list);
  1122. del_window = 1;
  1123. } else {
  1124. /* NACK! There are valid references to this window */
  1125. msg->uop = SCIF_UNREGISTER_NACK;
  1126. }
  1127. } else {
  1128. /* The window did not make its way to the list at all. ACK */
  1129. msg->uop = SCIF_UNREGISTER_ACK;
  1130. scif_destroy_remote_window(recv_window);
  1131. }
  1132. error:
  1133. mutex_unlock(&ep->rma_info.rma_lock);
  1134. if (del_window)
  1135. scif_drain_dma_intr(ep->remote_dev->sdev,
  1136. ep->rma_info.dma_chan);
  1137. scif_nodeqp_send(ep->remote_dev, msg);
  1138. if (del_window)
  1139. scif_queue_for_cleanup(window, &scif_info.rma);
  1140. }
  1141. /**
  1142. * scif_recv_reg_ack: Respond to SCIF_REGISTER_ACK interrupt message
  1143. * @msg: Interrupt message
  1144. *
  1145. * Wake up the window waiting to complete registration.
  1146. */
  1147. void scif_recv_reg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
  1148. {
  1149. struct scif_window *window =
  1150. (struct scif_window *)msg->payload[2];
  1151. struct scif_endpt *ep = (struct scif_endpt *)window->ep;
  1152. mutex_lock(&ep->rma_info.rma_lock);
  1153. window->reg_state = OP_COMPLETED;
  1154. wake_up(&window->regwq);
  1155. mutex_unlock(&ep->rma_info.rma_lock);
  1156. }
  1157. /**
  1158. * scif_recv_reg_nack: Respond to SCIF_REGISTER_NACK interrupt message
  1159. * @msg: Interrupt message
  1160. *
  1161. * Wake up the window waiting to inform it that registration
  1162. * cannot be completed.
  1163. */
  1164. void scif_recv_reg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
  1165. {
  1166. struct scif_window *window =
  1167. (struct scif_window *)msg->payload[2];
  1168. struct scif_endpt *ep = (struct scif_endpt *)window->ep;
  1169. mutex_lock(&ep->rma_info.rma_lock);
  1170. window->reg_state = OP_FAILED;
  1171. wake_up(&window->regwq);
  1172. mutex_unlock(&ep->rma_info.rma_lock);
  1173. }
  1174. /**
  1175. * scif_recv_unreg_ack: Respond to SCIF_UNREGISTER_ACK interrupt message
  1176. * @msg: Interrupt message
  1177. *
  1178. * Wake up the window waiting to complete unregistration.
  1179. */
  1180. void scif_recv_unreg_ack(struct scif_dev *scifdev, struct scifmsg *msg)
  1181. {
  1182. struct scif_window *window =
  1183. (struct scif_window *)msg->payload[1];
  1184. struct scif_endpt *ep = (struct scif_endpt *)window->ep;
  1185. mutex_lock(&ep->rma_info.rma_lock);
  1186. window->unreg_state = OP_COMPLETED;
  1187. wake_up(&window->unregwq);
  1188. mutex_unlock(&ep->rma_info.rma_lock);
  1189. }
  1190. /**
  1191. * scif_recv_unreg_nack: Respond to SCIF_UNREGISTER_NACK interrupt message
  1192. * @msg: Interrupt message
  1193. *
  1194. * Wake up the window waiting to inform it that unregistration
  1195. * cannot be completed immediately.
  1196. */
  1197. void scif_recv_unreg_nack(struct scif_dev *scifdev, struct scifmsg *msg)
  1198. {
  1199. struct scif_window *window =
  1200. (struct scif_window *)msg->payload[1];
  1201. struct scif_endpt *ep = (struct scif_endpt *)window->ep;
  1202. mutex_lock(&ep->rma_info.rma_lock);
  1203. window->unreg_state = OP_FAILED;
  1204. wake_up(&window->unregwq);
  1205. mutex_unlock(&ep->rma_info.rma_lock);
  1206. }
  1207. int __scif_pin_pages(void *addr, size_t len, int *out_prot,
  1208. int map_flags, scif_pinned_pages_t *pages)
  1209. {
  1210. struct scif_pinned_pages *pinned_pages;
  1211. int nr_pages, err = 0, i;
  1212. bool vmalloc_addr = false;
  1213. bool try_upgrade = false;
  1214. int prot = *out_prot;
  1215. int ulimit = 0;
  1216. struct mm_struct *mm = NULL;
  1217. /* Unsupported flags */
  1218. if (map_flags & ~(SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT))
  1219. return -EINVAL;
  1220. ulimit = !!(map_flags & SCIF_MAP_ULIMIT);
  1221. /* Unsupported protection requested */
  1222. if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
  1223. return -EINVAL;
  1224. /* addr/len must be page aligned. len should be non zero */
  1225. if (!len ||
  1226. (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
  1227. (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
  1228. return -EINVAL;
  1229. might_sleep();
  1230. nr_pages = len >> PAGE_SHIFT;
  1231. /* Allocate a set of pinned pages */
  1232. pinned_pages = scif_create_pinned_pages(nr_pages, prot);
  1233. if (!pinned_pages)
  1234. return -ENOMEM;
  1235. if (map_flags & SCIF_MAP_KERNEL) {
  1236. if (is_vmalloc_addr(addr))
  1237. vmalloc_addr = true;
  1238. for (i = 0; i < nr_pages; i++) {
  1239. if (vmalloc_addr)
  1240. pinned_pages->pages[i] =
  1241. vmalloc_to_page(addr + (i * PAGE_SIZE));
  1242. else
  1243. pinned_pages->pages[i] =
  1244. virt_to_page(addr + (i * PAGE_SIZE));
  1245. }
  1246. pinned_pages->nr_pages = nr_pages;
  1247. pinned_pages->map_flags = SCIF_MAP_KERNEL;
  1248. } else {
  1249. /*
  1250. * SCIF supports registration caching. If a registration has
  1251. * been requested with read only permissions, then we try
  1252. * to pin the pages with RW permissions so that a subsequent
  1253. * transfer with RW permission can hit the cache instead of
  1254. * invalidating it. If the upgrade fails with RW then we
  1255. * revert back to R permission and retry
  1256. */
  1257. if (prot == SCIF_PROT_READ)
  1258. try_upgrade = true;
  1259. prot |= SCIF_PROT_WRITE;
  1260. retry:
  1261. mm = current->mm;
  1262. down_write(&mm->mmap_sem);
  1263. if (ulimit) {
  1264. err = __scif_check_inc_pinned_vm(mm, nr_pages);
  1265. if (err) {
  1266. up_write(&mm->mmap_sem);
  1267. pinned_pages->nr_pages = 0;
  1268. goto error_unmap;
  1269. }
  1270. }
  1271. pinned_pages->nr_pages = get_user_pages(
  1272. (u64)addr,
  1273. nr_pages,
  1274. (prot & SCIF_PROT_WRITE) ? FOLL_WRITE : 0,
  1275. pinned_pages->pages,
  1276. NULL);
  1277. up_write(&mm->mmap_sem);
  1278. if (nr_pages != pinned_pages->nr_pages) {
  1279. if (try_upgrade) {
  1280. if (ulimit)
  1281. __scif_dec_pinned_vm_lock(mm,
  1282. nr_pages, 0);
  1283. /* Roll back any pinned pages */
  1284. for (i = 0; i < pinned_pages->nr_pages; i++) {
  1285. if (pinned_pages->pages[i])
  1286. put_page(
  1287. pinned_pages->pages[i]);
  1288. }
  1289. prot &= ~SCIF_PROT_WRITE;
  1290. try_upgrade = false;
  1291. goto retry;
  1292. }
  1293. }
  1294. pinned_pages->map_flags = 0;
  1295. }
  1296. if (pinned_pages->nr_pages < nr_pages) {
  1297. err = -EFAULT;
  1298. pinned_pages->nr_pages = nr_pages;
  1299. goto dec_pinned;
  1300. }
  1301. *out_prot = prot;
  1302. atomic_set(&pinned_pages->ref_count, 1);
  1303. *pages = pinned_pages;
  1304. return err;
  1305. dec_pinned:
  1306. if (ulimit)
  1307. __scif_dec_pinned_vm_lock(mm, nr_pages, 0);
  1308. /* Something went wrong! Rollback */
  1309. error_unmap:
  1310. pinned_pages->nr_pages = nr_pages;
  1311. scif_destroy_pinned_pages(pinned_pages);
  1312. *pages = NULL;
  1313. dev_dbg(scif_info.mdev.this_device,
  1314. "%s %d err %d len 0x%lx\n", __func__, __LINE__, err, len);
  1315. return err;
  1316. }
  1317. int scif_pin_pages(void *addr, size_t len, int prot,
  1318. int map_flags, scif_pinned_pages_t *pages)
  1319. {
  1320. return __scif_pin_pages(addr, len, &prot, map_flags, pages);
  1321. }
  1322. EXPORT_SYMBOL_GPL(scif_pin_pages);
  1323. int scif_unpin_pages(scif_pinned_pages_t pinned_pages)
  1324. {
  1325. int err = 0, ret;
  1326. if (!pinned_pages || SCIFEP_MAGIC != pinned_pages->magic)
  1327. return -EINVAL;
  1328. ret = atomic_sub_return(1, &pinned_pages->ref_count);
  1329. if (ret < 0) {
  1330. dev_err(scif_info.mdev.this_device,
  1331. "%s %d scif_unpin_pages called without pinning? rc %d\n",
  1332. __func__, __LINE__, ret);
  1333. return -EINVAL;
  1334. }
  1335. /*
  1336. * Destroy the window if the ref count for this set of pinned
  1337. * pages has dropped to zero. If it is positive then there is
  1338. * a valid registered window which is backed by these pages and
  1339. * it will be destroyed once all such windows are unregistered.
  1340. */
  1341. if (!ret)
  1342. err = scif_destroy_pinned_pages(pinned_pages);
  1343. return err;
  1344. }
  1345. EXPORT_SYMBOL_GPL(scif_unpin_pages);
  1346. static inline void
  1347. scif_insert_local_window(struct scif_window *window, struct scif_endpt *ep)
  1348. {
  1349. mutex_lock(&ep->rma_info.rma_lock);
  1350. scif_insert_window(window, &ep->rma_info.reg_list);
  1351. mutex_unlock(&ep->rma_info.rma_lock);
  1352. }
  1353. off_t scif_register_pinned_pages(scif_epd_t epd,
  1354. scif_pinned_pages_t pinned_pages,
  1355. off_t offset, int map_flags)
  1356. {
  1357. struct scif_endpt *ep = (struct scif_endpt *)epd;
  1358. s64 computed_offset;
  1359. struct scif_window *window;
  1360. int err;
  1361. size_t len;
  1362. struct device *spdev;
  1363. /* Unsupported flags */
  1364. if (map_flags & ~SCIF_MAP_FIXED)
  1365. return -EINVAL;
  1366. len = pinned_pages->nr_pages << PAGE_SHIFT;
  1367. /*
  1368. * Offset is not page aligned/negative or offset+len
  1369. * wraps around with SCIF_MAP_FIXED.
  1370. */
  1371. if ((map_flags & SCIF_MAP_FIXED) &&
  1372. ((ALIGN(offset, PAGE_SIZE) != offset) ||
  1373. (offset < 0) ||
  1374. (len > LONG_MAX - offset)))
  1375. return -EINVAL;
  1376. might_sleep();
  1377. err = scif_verify_epd(ep);
  1378. if (err)
  1379. return err;
  1380. /*
  1381. * It is an error to pass pinned_pages to scif_register_pinned_pages()
  1382. * after calling scif_unpin_pages().
  1383. */
  1384. if (!atomic_add_unless(&pinned_pages->ref_count, 1, 0))
  1385. return -EINVAL;
  1386. /* Compute the offset for this registration */
  1387. err = scif_get_window_offset(ep, map_flags, offset,
  1388. len, &computed_offset);
  1389. if (err) {
  1390. atomic_sub(1, &pinned_pages->ref_count);
  1391. return err;
  1392. }
  1393. /* Allocate and prepare self registration window */
  1394. window = scif_create_window(ep, pinned_pages->nr_pages,
  1395. computed_offset, false);
  1396. if (!window) {
  1397. atomic_sub(1, &pinned_pages->ref_count);
  1398. scif_free_window_offset(ep, NULL, computed_offset);
  1399. return -ENOMEM;
  1400. }
  1401. window->pinned_pages = pinned_pages;
  1402. window->nr_pages = pinned_pages->nr_pages;
  1403. window->prot = pinned_pages->prot;
  1404. spdev = scif_get_peer_dev(ep->remote_dev);
  1405. if (IS_ERR(spdev)) {
  1406. err = PTR_ERR(spdev);
  1407. scif_destroy_window(ep, window);
  1408. return err;
  1409. }
  1410. err = scif_send_alloc_request(ep, window);
  1411. if (err) {
  1412. dev_err(&ep->remote_dev->sdev->dev,
  1413. "%s %d err %d\n", __func__, __LINE__, err);
  1414. goto error_unmap;
  1415. }
  1416. /* Prepare the remote registration window */
  1417. err = scif_prep_remote_window(ep, window);
  1418. if (err) {
  1419. dev_err(&ep->remote_dev->sdev->dev,
  1420. "%s %d err %d\n", __func__, __LINE__, err);
  1421. goto error_unmap;
  1422. }
  1423. /* Tell the peer about the new window */
  1424. err = scif_send_scif_register(ep, window);
  1425. if (err) {
  1426. dev_err(&ep->remote_dev->sdev->dev,
  1427. "%s %d err %d\n", __func__, __LINE__, err);
  1428. goto error_unmap;
  1429. }
  1430. scif_put_peer_dev(spdev);
  1431. /* No further failures expected. Insert new window */
  1432. scif_insert_local_window(window, ep);
  1433. return computed_offset;
  1434. error_unmap:
  1435. scif_destroy_window(ep, window);
  1436. scif_put_peer_dev(spdev);
  1437. dev_err(&ep->remote_dev->sdev->dev,
  1438. "%s %d err %d\n", __func__, __LINE__, err);
  1439. return err;
  1440. }
  1441. EXPORT_SYMBOL_GPL(scif_register_pinned_pages);
  1442. off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
  1443. int prot, int map_flags)
  1444. {
  1445. scif_pinned_pages_t pinned_pages;
  1446. off_t err;
  1447. struct scif_endpt *ep = (struct scif_endpt *)epd;
  1448. s64 computed_offset;
  1449. struct scif_window *window;
  1450. struct mm_struct *mm = NULL;
  1451. struct device *spdev;
  1452. dev_dbg(scif_info.mdev.this_device,
  1453. "SCIFAPI register: ep %p addr %p len 0x%lx offset 0x%lx prot 0x%x map_flags 0x%x\n",
  1454. epd, addr, len, offset, prot, map_flags);
  1455. /* Unsupported flags */
  1456. if (map_flags & ~(SCIF_MAP_FIXED | SCIF_MAP_KERNEL))
  1457. return -EINVAL;
  1458. /*
  1459. * Offset is not page aligned/negative or offset+len
  1460. * wraps around with SCIF_MAP_FIXED.
  1461. */
  1462. if ((map_flags & SCIF_MAP_FIXED) &&
  1463. ((ALIGN(offset, PAGE_SIZE) != offset) ||
  1464. (offset < 0) ||
  1465. (len > LONG_MAX - offset)))
  1466. return -EINVAL;
  1467. /* Unsupported protection requested */
  1468. if (prot & ~(SCIF_PROT_READ | SCIF_PROT_WRITE))
  1469. return -EINVAL;
  1470. /* addr/len must be page aligned. len should be non zero */
  1471. if (!len || (ALIGN((u64)addr, PAGE_SIZE) != (u64)addr) ||
  1472. (ALIGN(len, PAGE_SIZE) != len))
  1473. return -EINVAL;
  1474. might_sleep();
  1475. err = scif_verify_epd(ep);
  1476. if (err)
  1477. return err;
  1478. /* Compute the offset for this registration */
  1479. err = scif_get_window_offset(ep, map_flags, offset,
  1480. len >> PAGE_SHIFT, &computed_offset);
  1481. if (err)
  1482. return err;
  1483. spdev = scif_get_peer_dev(ep->remote_dev);
  1484. if (IS_ERR(spdev)) {
  1485. err = PTR_ERR(spdev);
  1486. scif_free_window_offset(ep, NULL, computed_offset);
  1487. return err;
  1488. }
  1489. /* Allocate and prepare self registration window */
  1490. window = scif_create_window(ep, len >> PAGE_SHIFT,
  1491. computed_offset, false);
  1492. if (!window) {
  1493. scif_free_window_offset(ep, NULL, computed_offset);
  1494. scif_put_peer_dev(spdev);
  1495. return -ENOMEM;
  1496. }
  1497. window->nr_pages = len >> PAGE_SHIFT;
  1498. err = scif_send_alloc_request(ep, window);
  1499. if (err) {
  1500. scif_destroy_incomplete_window(ep, window);
  1501. scif_put_peer_dev(spdev);
  1502. return err;
  1503. }
  1504. if (!(map_flags & SCIF_MAP_KERNEL)) {
  1505. mm = __scif_acquire_mm();
  1506. map_flags |= SCIF_MAP_ULIMIT;
  1507. }
  1508. /* Pin down the pages */
  1509. err = __scif_pin_pages(addr, len, &prot,
  1510. map_flags & (SCIF_MAP_KERNEL | SCIF_MAP_ULIMIT),
  1511. &pinned_pages);
  1512. if (err) {
  1513. scif_destroy_incomplete_window(ep, window);
  1514. __scif_release_mm(mm);
  1515. goto error;
  1516. }
  1517. window->pinned_pages = pinned_pages;
  1518. window->prot = pinned_pages->prot;
  1519. window->mm = mm;
  1520. /* Prepare the remote registration window */
  1521. err = scif_prep_remote_window(ep, window);
  1522. if (err) {
  1523. dev_err(&ep->remote_dev->sdev->dev,
  1524. "%s %d err %ld\n", __func__, __LINE__, err);
  1525. goto error_unmap;
  1526. }
  1527. /* Tell the peer about the new window */
  1528. err = scif_send_scif_register(ep, window);
  1529. if (err) {
  1530. dev_err(&ep->remote_dev->sdev->dev,
  1531. "%s %d err %ld\n", __func__, __LINE__, err);
  1532. goto error_unmap;
  1533. }
  1534. scif_put_peer_dev(spdev);
  1535. /* No further failures expected. Insert new window */
  1536. scif_insert_local_window(window, ep);
  1537. dev_dbg(&ep->remote_dev->sdev->dev,
  1538. "SCIFAPI register: ep %p addr %p len 0x%lx computed_offset 0x%llx\n",
  1539. epd, addr, len, computed_offset);
  1540. return computed_offset;
  1541. error_unmap:
  1542. scif_destroy_window(ep, window);
  1543. error:
  1544. scif_put_peer_dev(spdev);
  1545. dev_err(&ep->remote_dev->sdev->dev,
  1546. "%s %d err %ld\n", __func__, __LINE__, err);
  1547. return err;
  1548. }
  1549. EXPORT_SYMBOL_GPL(scif_register);
  1550. int
  1551. scif_unregister(scif_epd_t epd, off_t offset, size_t len)
  1552. {
  1553. struct scif_endpt *ep = (struct scif_endpt *)epd;
  1554. struct scif_window *window = NULL;
  1555. struct scif_rma_req req;
  1556. int nr_pages, err;
  1557. struct device *spdev;
  1558. dev_dbg(scif_info.mdev.this_device,
  1559. "SCIFAPI unregister: ep %p offset 0x%lx len 0x%lx\n",
  1560. ep, offset, len);
  1561. /* len must be page aligned. len should be non zero */
  1562. if (!len ||
  1563. (ALIGN((u64)len, PAGE_SIZE) != (u64)len))
  1564. return -EINVAL;
  1565. /* Offset is not page aligned or offset+len wraps around */
  1566. if ((ALIGN(offset, PAGE_SIZE) != offset) ||
  1567. (offset < 0) ||
  1568. (len > LONG_MAX - offset))
  1569. return -EINVAL;
  1570. err = scif_verify_epd(ep);
  1571. if (err)
  1572. return err;
  1573. might_sleep();
  1574. nr_pages = len >> PAGE_SHIFT;
  1575. req.out_window = &window;
  1576. req.offset = offset;
  1577. req.prot = 0;
  1578. req.nr_bytes = len;
  1579. req.type = SCIF_WINDOW_FULL;
  1580. req.head = &ep->rma_info.reg_list;
  1581. spdev = scif_get_peer_dev(ep->remote_dev);
  1582. if (IS_ERR(spdev)) {
  1583. err = PTR_ERR(spdev);
  1584. return err;
  1585. }
  1586. mutex_lock(&ep->rma_info.rma_lock);
  1587. /* Does a valid window exist? */
  1588. err = scif_query_window(&req);
  1589. if (err) {
  1590. dev_err(&ep->remote_dev->sdev->dev,
  1591. "%s %d err %d\n", __func__, __LINE__, err);
  1592. goto error;
  1593. }
  1594. /* Unregister all the windows in this range */
  1595. err = scif_rma_list_unregister(window, offset, nr_pages);
  1596. if (err)
  1597. dev_err(&ep->remote_dev->sdev->dev,
  1598. "%s %d err %d\n", __func__, __LINE__, err);
  1599. error:
  1600. mutex_unlock(&ep->rma_info.rma_lock);
  1601. scif_put_peer_dev(spdev);
  1602. return err;
  1603. }
  1604. EXPORT_SYMBOL_GPL(scif_unregister);