user_sdma.c 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685
  1. /*
  2. * Copyright(c) 2015 - 2017 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. #include <linux/mm.h>
  48. #include <linux/types.h>
  49. #include <linux/device.h>
  50. #include <linux/dmapool.h>
  51. #include <linux/slab.h>
  52. #include <linux/list.h>
  53. #include <linux/highmem.h>
  54. #include <linux/io.h>
  55. #include <linux/uio.h>
  56. #include <linux/rbtree.h>
  57. #include <linux/spinlock.h>
  58. #include <linux/delay.h>
  59. #include <linux/kthread.h>
  60. #include <linux/mmu_context.h>
  61. #include <linux/module.h>
  62. #include <linux/vmalloc.h>
  63. #include <linux/string.h>
  64. #include "hfi.h"
  65. #include "sdma.h"
  66. #include "user_sdma.h"
  67. #include "verbs.h" /* for the headers */
  68. #include "common.h" /* for struct hfi1_tid_info */
  69. #include "trace.h"
  70. #include "mmu_rb.h"
  71. static uint hfi1_sdma_comp_ring_size = 128;
  72. module_param_named(sdma_comp_size, hfi1_sdma_comp_ring_size, uint, S_IRUGO);
  73. MODULE_PARM_DESC(sdma_comp_size, "Size of User SDMA completion ring. Default: 128");
  74. /* The maximum number of Data io vectors per message/request */
  75. #define MAX_VECTORS_PER_REQ 8
  76. /*
  77. * Maximum number of packet to send from each message/request
  78. * before moving to the next one.
  79. */
  80. #define MAX_PKTS_PER_QUEUE 16
  81. #define num_pages(x) (1 + ((((x) - 1) & PAGE_MASK) >> PAGE_SHIFT))
  82. #define req_opcode(x) \
  83. (((x) >> HFI1_SDMA_REQ_OPCODE_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK)
  84. #define req_version(x) \
  85. (((x) >> HFI1_SDMA_REQ_VERSION_SHIFT) & HFI1_SDMA_REQ_OPCODE_MASK)
  86. #define req_iovcnt(x) \
  87. (((x) >> HFI1_SDMA_REQ_IOVCNT_SHIFT) & HFI1_SDMA_REQ_IOVCNT_MASK)
  88. /* Number of BTH.PSN bits used for sequence number in expected rcvs */
  89. #define BTH_SEQ_MASK 0x7ffull
  90. /*
  91. * Define fields in the KDETH header so we can update the header
  92. * template.
  93. */
  94. #define KDETH_OFFSET_SHIFT 0
  95. #define KDETH_OFFSET_MASK 0x7fff
  96. #define KDETH_OM_SHIFT 15
  97. #define KDETH_OM_MASK 0x1
  98. #define KDETH_TID_SHIFT 16
  99. #define KDETH_TID_MASK 0x3ff
  100. #define KDETH_TIDCTRL_SHIFT 26
  101. #define KDETH_TIDCTRL_MASK 0x3
  102. #define KDETH_INTR_SHIFT 28
  103. #define KDETH_INTR_MASK 0x1
  104. #define KDETH_SH_SHIFT 29
  105. #define KDETH_SH_MASK 0x1
  106. #define KDETH_HCRC_UPPER_SHIFT 16
  107. #define KDETH_HCRC_UPPER_MASK 0xff
  108. #define KDETH_HCRC_LOWER_SHIFT 24
  109. #define KDETH_HCRC_LOWER_MASK 0xff
  110. #define AHG_KDETH_INTR_SHIFT 12
  111. #define AHG_KDETH_SH_SHIFT 13
  112. #define PBC2LRH(x) ((((x) & 0xfff) << 2) - 4)
  113. #define LRH2PBC(x) ((((x) >> 2) + 1) & 0xfff)
  114. #define KDETH_GET(val, field) \
  115. (((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
  116. #define KDETH_SET(dw, field, val) do { \
  117. u32 dwval = le32_to_cpu(dw); \
  118. dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
  119. dwval |= (((val) & KDETH_##field##_MASK) << \
  120. KDETH_##field##_SHIFT); \
  121. dw = cpu_to_le32(dwval); \
  122. } while (0)
  123. #define AHG_HEADER_SET(arr, idx, dw, bit, width, value) \
  124. do { \
  125. if ((idx) < ARRAY_SIZE((arr))) \
  126. (arr)[(idx++)] = sdma_build_ahg_descriptor( \
  127. (__force u16)(value), (dw), (bit), \
  128. (width)); \
  129. else \
  130. return -ERANGE; \
  131. } while (0)
  132. /* KDETH OM multipliers and switch over point */
  133. #define KDETH_OM_SMALL 4
  134. #define KDETH_OM_SMALL_SHIFT 2
  135. #define KDETH_OM_LARGE 64
  136. #define KDETH_OM_LARGE_SHIFT 6
  137. #define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
  138. /* Tx request flag bits */
  139. #define TXREQ_FLAGS_REQ_ACK BIT(0) /* Set the ACK bit in the header */
  140. #define TXREQ_FLAGS_REQ_DISABLE_SH BIT(1) /* Disable header suppression */
  141. /* SDMA request flag bits */
  142. #define SDMA_REQ_FOR_THREAD 1
  143. #define SDMA_REQ_SEND_DONE 2
  144. #define SDMA_REQ_HAS_ERROR 3
  145. #define SDMA_REQ_DONE_ERROR 4
  146. #define SDMA_PKT_Q_INACTIVE BIT(0)
  147. #define SDMA_PKT_Q_ACTIVE BIT(1)
  148. #define SDMA_PKT_Q_DEFERRED BIT(2)
  149. /*
  150. * Maximum retry attempts to submit a TX request
  151. * before putting the process to sleep.
  152. */
  153. #define MAX_DEFER_RETRY_COUNT 1
  154. static unsigned initial_pkt_count = 8;
  155. #define SDMA_IOWAIT_TIMEOUT 1000 /* in milliseconds */
  156. struct sdma_mmu_node;
  157. struct user_sdma_iovec {
  158. struct list_head list;
  159. struct iovec iov;
  160. /* number of pages in this vector */
  161. unsigned npages;
  162. /* array of pinned pages for this vector */
  163. struct page **pages;
  164. /*
  165. * offset into the virtual address space of the vector at
  166. * which we last left off.
  167. */
  168. u64 offset;
  169. struct sdma_mmu_node *node;
  170. };
  171. struct sdma_mmu_node {
  172. struct mmu_rb_node rb;
  173. struct hfi1_user_sdma_pkt_q *pq;
  174. atomic_t refcount;
  175. struct page **pages;
  176. unsigned npages;
  177. };
  178. /* evict operation argument */
  179. struct evict_data {
  180. u32 cleared; /* count evicted so far */
  181. u32 target; /* target count to evict */
  182. };
  183. struct user_sdma_request {
  184. struct sdma_req_info info;
  185. struct hfi1_user_sdma_pkt_q *pq;
  186. struct hfi1_user_sdma_comp_q *cq;
  187. /* This is the original header from user space */
  188. struct hfi1_pkt_header hdr;
  189. /*
  190. * Pointer to the SDMA engine for this request.
  191. * Since different request could be on different VLs,
  192. * each request will need it's own engine pointer.
  193. */
  194. struct sdma_engine *sde;
  195. s8 ahg_idx;
  196. u32 ahg[9];
  197. /*
  198. * KDETH.Offset (Eager) field
  199. * We need to remember the initial value so the headers
  200. * can be updated properly.
  201. */
  202. u32 koffset;
  203. /*
  204. * KDETH.OFFSET (TID) field
  205. * The offset can cover multiple packets, depending on the
  206. * size of the TID entry.
  207. */
  208. u32 tidoffset;
  209. /*
  210. * We copy the iovs for this request (based on
  211. * info.iovcnt). These are only the data vectors
  212. */
  213. unsigned data_iovs;
  214. /* total length of the data in the request */
  215. u32 data_len;
  216. /* progress index moving along the iovs array */
  217. unsigned iov_idx;
  218. struct user_sdma_iovec iovs[MAX_VECTORS_PER_REQ];
  219. /* number of elements copied to the tids array */
  220. u16 n_tids;
  221. /* TID array values copied from the tid_iov vector */
  222. u32 *tids;
  223. u16 tididx;
  224. u32 sent;
  225. u64 seqnum;
  226. u64 seqcomp;
  227. u64 seqsubmitted;
  228. struct list_head txps;
  229. unsigned long flags;
  230. /* status of the last txreq completed */
  231. int status;
  232. };
  233. /*
  234. * A single txreq could span up to 3 physical pages when the MTU
  235. * is sufficiently large (> 4K). Each of the IOV pointers also
  236. * needs it's own set of flags so the vector has been handled
  237. * independently of each other.
  238. */
  239. struct user_sdma_txreq {
  240. /* Packet header for the txreq */
  241. struct hfi1_pkt_header hdr;
  242. struct sdma_txreq txreq;
  243. struct list_head list;
  244. struct user_sdma_request *req;
  245. u16 flags;
  246. unsigned busycount;
  247. u64 seqnum;
  248. };
  249. #define SDMA_DBG(req, fmt, ...) \
  250. hfi1_cdbg(SDMA, "[%u:%u:%u:%u] " fmt, (req)->pq->dd->unit, \
  251. (req)->pq->ctxt, (req)->pq->subctxt, (req)->info.comp_idx, \
  252. ##__VA_ARGS__)
  253. #define SDMA_Q_DBG(pq, fmt, ...) \
  254. hfi1_cdbg(SDMA, "[%u:%u:%u] " fmt, (pq)->dd->unit, (pq)->ctxt, \
  255. (pq)->subctxt, ##__VA_ARGS__)
  256. static int user_sdma_send_pkts(struct user_sdma_request *req,
  257. unsigned maxpkts);
  258. static int num_user_pages(const struct iovec *iov);
  259. static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status);
  260. static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq);
  261. static void user_sdma_free_request(struct user_sdma_request *req, bool unpin);
  262. static int pin_vector_pages(struct user_sdma_request *req,
  263. struct user_sdma_iovec *iovec);
  264. static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
  265. unsigned start, unsigned npages);
  266. static int check_header_template(struct user_sdma_request *req,
  267. struct hfi1_pkt_header *hdr, u32 lrhlen,
  268. u32 datalen);
  269. static int set_txreq_header(struct user_sdma_request *req,
  270. struct user_sdma_txreq *tx, u32 datalen);
  271. static int set_txreq_header_ahg(struct user_sdma_request *req,
  272. struct user_sdma_txreq *tx, u32 len);
  273. static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
  274. struct hfi1_user_sdma_comp_q *cq,
  275. u16 idx, enum hfi1_sdma_comp_state state,
  276. int ret);
  277. static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags);
  278. static inline u32 get_lrh_len(struct hfi1_pkt_header, u32 len);
  279. static int defer_packet_queue(
  280. struct sdma_engine *sde,
  281. struct iowait *wait,
  282. struct sdma_txreq *txreq,
  283. unsigned int seq);
  284. static void activate_packet_queue(struct iowait *wait, int reason);
  285. static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
  286. unsigned long len);
  287. static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode);
  288. static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
  289. void *arg2, bool *stop);
  290. static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode);
  291. static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode);
  292. static struct mmu_rb_ops sdma_rb_ops = {
  293. .filter = sdma_rb_filter,
  294. .insert = sdma_rb_insert,
  295. .evict = sdma_rb_evict,
  296. .remove = sdma_rb_remove,
  297. .invalidate = sdma_rb_invalidate
  298. };
  299. static int defer_packet_queue(
  300. struct sdma_engine *sde,
  301. struct iowait *wait,
  302. struct sdma_txreq *txreq,
  303. unsigned seq)
  304. {
  305. struct hfi1_user_sdma_pkt_q *pq =
  306. container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
  307. struct hfi1_ibdev *dev = &pq->dd->verbs_dev;
  308. struct user_sdma_txreq *tx =
  309. container_of(txreq, struct user_sdma_txreq, txreq);
  310. if (sdma_progress(sde, seq, txreq)) {
  311. if (tx->busycount++ < MAX_DEFER_RETRY_COUNT)
  312. goto eagain;
  313. }
  314. /*
  315. * We are assuming that if the list is enqueued somewhere, it
  316. * is to the dmawait list since that is the only place where
  317. * it is supposed to be enqueued.
  318. */
  319. xchg(&pq->state, SDMA_PKT_Q_DEFERRED);
  320. write_seqlock(&dev->iowait_lock);
  321. if (list_empty(&pq->busy.list))
  322. list_add_tail(&pq->busy.list, &sde->dmawait);
  323. write_sequnlock(&dev->iowait_lock);
  324. return -EBUSY;
  325. eagain:
  326. return -EAGAIN;
  327. }
  328. static void activate_packet_queue(struct iowait *wait, int reason)
  329. {
  330. struct hfi1_user_sdma_pkt_q *pq =
  331. container_of(wait, struct hfi1_user_sdma_pkt_q, busy);
  332. xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
  333. wake_up(&wait->wait_dma);
  334. };
  335. static void sdma_kmem_cache_ctor(void *obj)
  336. {
  337. struct user_sdma_txreq *tx = obj;
  338. memset(tx, 0, sizeof(*tx));
  339. }
  340. int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *uctxt,
  341. struct hfi1_filedata *fd)
  342. {
  343. int ret = -ENOMEM;
  344. char buf[64];
  345. struct hfi1_devdata *dd;
  346. struct hfi1_user_sdma_comp_q *cq;
  347. struct hfi1_user_sdma_pkt_q *pq;
  348. unsigned long flags;
  349. if (!uctxt || !fd)
  350. return -EBADF;
  351. if (!hfi1_sdma_comp_ring_size)
  352. return -EINVAL;
  353. dd = uctxt->dd;
  354. pq = kzalloc(sizeof(*pq), GFP_KERNEL);
  355. if (!pq)
  356. return -ENOMEM;
  357. INIT_LIST_HEAD(&pq->list);
  358. pq->dd = dd;
  359. pq->ctxt = uctxt->ctxt;
  360. pq->subctxt = fd->subctxt;
  361. pq->n_max_reqs = hfi1_sdma_comp_ring_size;
  362. pq->state = SDMA_PKT_Q_INACTIVE;
  363. atomic_set(&pq->n_reqs, 0);
  364. init_waitqueue_head(&pq->wait);
  365. atomic_set(&pq->n_locked, 0);
  366. pq->mm = fd->mm;
  367. iowait_init(&pq->busy, 0, NULL, defer_packet_queue,
  368. activate_packet_queue, NULL);
  369. pq->reqidx = 0;
  370. pq->reqs = kcalloc(hfi1_sdma_comp_ring_size,
  371. sizeof(*pq->reqs),
  372. GFP_KERNEL);
  373. if (!pq->reqs)
  374. goto pq_reqs_nomem;
  375. pq->req_in_use = kcalloc(BITS_TO_LONGS(hfi1_sdma_comp_ring_size),
  376. sizeof(*pq->req_in_use),
  377. GFP_KERNEL);
  378. if (!pq->req_in_use)
  379. goto pq_reqs_no_in_use;
  380. snprintf(buf, 64, "txreq-kmem-cache-%u-%u-%u", dd->unit, uctxt->ctxt,
  381. fd->subctxt);
  382. pq->txreq_cache = kmem_cache_create(buf,
  383. sizeof(struct user_sdma_txreq),
  384. L1_CACHE_BYTES,
  385. SLAB_HWCACHE_ALIGN,
  386. sdma_kmem_cache_ctor);
  387. if (!pq->txreq_cache) {
  388. dd_dev_err(dd, "[%u] Failed to allocate TxReq cache\n",
  389. uctxt->ctxt);
  390. goto pq_txreq_nomem;
  391. }
  392. cq = kzalloc(sizeof(*cq), GFP_KERNEL);
  393. if (!cq)
  394. goto cq_nomem;
  395. cq->comps = vmalloc_user(PAGE_ALIGN(sizeof(*cq->comps)
  396. * hfi1_sdma_comp_ring_size));
  397. if (!cq->comps)
  398. goto cq_comps_nomem;
  399. cq->nentries = hfi1_sdma_comp_ring_size;
  400. ret = hfi1_mmu_rb_register(pq, pq->mm, &sdma_rb_ops, dd->pport->hfi1_wq,
  401. &pq->handler);
  402. if (ret) {
  403. dd_dev_err(dd, "Failed to register with MMU %d", ret);
  404. goto pq_mmu_fail;
  405. }
  406. fd->pq = pq;
  407. fd->cq = cq;
  408. spin_lock_irqsave(&uctxt->sdma_qlock, flags);
  409. list_add(&pq->list, &uctxt->sdma_queues);
  410. spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
  411. return 0;
  412. pq_mmu_fail:
  413. vfree(cq->comps);
  414. cq_comps_nomem:
  415. kfree(cq);
  416. cq_nomem:
  417. kmem_cache_destroy(pq->txreq_cache);
  418. pq_txreq_nomem:
  419. kfree(pq->req_in_use);
  420. pq_reqs_no_in_use:
  421. kfree(pq->reqs);
  422. pq_reqs_nomem:
  423. kfree(pq);
  424. return ret;
  425. }
  426. int hfi1_user_sdma_free_queues(struct hfi1_filedata *fd)
  427. {
  428. struct hfi1_ctxtdata *uctxt = fd->uctxt;
  429. struct hfi1_user_sdma_pkt_q *pq;
  430. unsigned long flags;
  431. hfi1_cdbg(SDMA, "[%u:%u:%u] Freeing user SDMA queues", uctxt->dd->unit,
  432. uctxt->ctxt, fd->subctxt);
  433. pq = fd->pq;
  434. if (pq) {
  435. if (pq->handler)
  436. hfi1_mmu_rb_unregister(pq->handler);
  437. spin_lock_irqsave(&uctxt->sdma_qlock, flags);
  438. if (!list_empty(&pq->list))
  439. list_del_init(&pq->list);
  440. spin_unlock_irqrestore(&uctxt->sdma_qlock, flags);
  441. iowait_sdma_drain(&pq->busy);
  442. /* Wait until all requests have been freed. */
  443. wait_event_interruptible(
  444. pq->wait,
  445. (ACCESS_ONCE(pq->state) == SDMA_PKT_Q_INACTIVE));
  446. kfree(pq->reqs);
  447. kfree(pq->req_in_use);
  448. kmem_cache_destroy(pq->txreq_cache);
  449. kfree(pq);
  450. fd->pq = NULL;
  451. }
  452. if (fd->cq) {
  453. vfree(fd->cq->comps);
  454. kfree(fd->cq);
  455. fd->cq = NULL;
  456. }
  457. return 0;
  458. }
  459. static u8 dlid_to_selector(u16 dlid)
  460. {
  461. static u8 mapping[256];
  462. static int initialized;
  463. static u8 next;
  464. int hash;
  465. if (!initialized) {
  466. memset(mapping, 0xFF, 256);
  467. initialized = 1;
  468. }
  469. hash = ((dlid >> 8) ^ dlid) & 0xFF;
  470. if (mapping[hash] == 0xFF) {
  471. mapping[hash] = next;
  472. next = (next + 1) & 0x7F;
  473. }
  474. return mapping[hash];
  475. }
  476. int hfi1_user_sdma_process_request(struct hfi1_filedata *fd,
  477. struct iovec *iovec, unsigned long dim,
  478. unsigned long *count)
  479. {
  480. int ret = 0, i;
  481. struct hfi1_ctxtdata *uctxt = fd->uctxt;
  482. struct hfi1_user_sdma_pkt_q *pq = fd->pq;
  483. struct hfi1_user_sdma_comp_q *cq = fd->cq;
  484. struct hfi1_devdata *dd = pq->dd;
  485. unsigned long idx = 0;
  486. u8 pcount = initial_pkt_count;
  487. struct sdma_req_info info;
  488. struct user_sdma_request *req;
  489. u8 opcode, sc, vl;
  490. int req_queued = 0;
  491. u16 dlid;
  492. u32 selector;
  493. if (iovec[idx].iov_len < sizeof(info) + sizeof(req->hdr)) {
  494. hfi1_cdbg(
  495. SDMA,
  496. "[%u:%u:%u] First vector not big enough for header %lu/%lu",
  497. dd->unit, uctxt->ctxt, fd->subctxt,
  498. iovec[idx].iov_len, sizeof(info) + sizeof(req->hdr));
  499. return -EINVAL;
  500. }
  501. ret = copy_from_user(&info, iovec[idx].iov_base, sizeof(info));
  502. if (ret) {
  503. hfi1_cdbg(SDMA, "[%u:%u:%u] Failed to copy info QW (%d)",
  504. dd->unit, uctxt->ctxt, fd->subctxt, ret);
  505. return -EFAULT;
  506. }
  507. trace_hfi1_sdma_user_reqinfo(dd, uctxt->ctxt, fd->subctxt,
  508. (u16 *)&info);
  509. if (info.comp_idx >= hfi1_sdma_comp_ring_size) {
  510. hfi1_cdbg(SDMA,
  511. "[%u:%u:%u:%u] Invalid comp index",
  512. dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
  513. return -EINVAL;
  514. }
  515. /*
  516. * Sanity check the header io vector count. Need at least 1 vector
  517. * (header) and cannot be larger than the actual io vector count.
  518. */
  519. if (req_iovcnt(info.ctrl) < 1 || req_iovcnt(info.ctrl) > dim) {
  520. hfi1_cdbg(SDMA,
  521. "[%u:%u:%u:%u] Invalid iov count %d, dim %ld",
  522. dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx,
  523. req_iovcnt(info.ctrl), dim);
  524. return -EINVAL;
  525. }
  526. if (!info.fragsize) {
  527. hfi1_cdbg(SDMA,
  528. "[%u:%u:%u:%u] Request does not specify fragsize",
  529. dd->unit, uctxt->ctxt, fd->subctxt, info.comp_idx);
  530. return -EINVAL;
  531. }
  532. /* Try to claim the request. */
  533. if (test_and_set_bit(info.comp_idx, pq->req_in_use)) {
  534. hfi1_cdbg(SDMA, "[%u:%u:%u] Entry %u is in use",
  535. dd->unit, uctxt->ctxt, fd->subctxt,
  536. info.comp_idx);
  537. return -EBADSLT;
  538. }
  539. /*
  540. * All safety checks have been done and this request has been claimed.
  541. */
  542. hfi1_cdbg(SDMA, "[%u:%u:%u] Using req/comp entry %u\n", dd->unit,
  543. uctxt->ctxt, fd->subctxt, info.comp_idx);
  544. req = pq->reqs + info.comp_idx;
  545. memset(req, 0, sizeof(*req));
  546. req->data_iovs = req_iovcnt(info.ctrl) - 1; /* subtract header vector */
  547. req->pq = pq;
  548. req->cq = cq;
  549. req->status = -1;
  550. req->ahg_idx = -1;
  551. INIT_LIST_HEAD(&req->txps);
  552. memcpy(&req->info, &info, sizeof(info));
  553. if (req_opcode(info.ctrl) == EXPECTED) {
  554. /* expected must have a TID info and at least one data vector */
  555. if (req->data_iovs < 2) {
  556. SDMA_DBG(req,
  557. "Not enough vectors for expected request");
  558. ret = -EINVAL;
  559. goto free_req;
  560. }
  561. req->data_iovs--;
  562. }
  563. if (!info.npkts || req->data_iovs > MAX_VECTORS_PER_REQ) {
  564. SDMA_DBG(req, "Too many vectors (%u/%u)", req->data_iovs,
  565. MAX_VECTORS_PER_REQ);
  566. ret = -EINVAL;
  567. goto free_req;
  568. }
  569. /* Copy the header from the user buffer */
  570. ret = copy_from_user(&req->hdr, iovec[idx].iov_base + sizeof(info),
  571. sizeof(req->hdr));
  572. if (ret) {
  573. SDMA_DBG(req, "Failed to copy header template (%d)", ret);
  574. ret = -EFAULT;
  575. goto free_req;
  576. }
  577. /* If Static rate control is not enabled, sanitize the header. */
  578. if (!HFI1_CAP_IS_USET(STATIC_RATE_CTRL))
  579. req->hdr.pbc[2] = 0;
  580. /* Validate the opcode. Do not trust packets from user space blindly. */
  581. opcode = (be32_to_cpu(req->hdr.bth[0]) >> 24) & 0xff;
  582. if ((opcode & USER_OPCODE_CHECK_MASK) !=
  583. USER_OPCODE_CHECK_VAL) {
  584. SDMA_DBG(req, "Invalid opcode (%d)", opcode);
  585. ret = -EINVAL;
  586. goto free_req;
  587. }
  588. /*
  589. * Validate the vl. Do not trust packets from user space blindly.
  590. * VL comes from PBC, SC comes from LRH, and the VL needs to
  591. * match the SC look up.
  592. */
  593. vl = (le16_to_cpu(req->hdr.pbc[0]) >> 12) & 0xF;
  594. sc = (((be16_to_cpu(req->hdr.lrh[0]) >> 12) & 0xF) |
  595. (((le16_to_cpu(req->hdr.pbc[1]) >> 14) & 0x1) << 4));
  596. if (vl >= dd->pport->vls_operational ||
  597. vl != sc_to_vlt(dd, sc)) {
  598. SDMA_DBG(req, "Invalid SC(%u)/VL(%u)", sc, vl);
  599. ret = -EINVAL;
  600. goto free_req;
  601. }
  602. /* Checking P_KEY for requests from user-space */
  603. if (egress_pkey_check(dd->pport, req->hdr.lrh, req->hdr.bth, sc,
  604. PKEY_CHECK_INVALID)) {
  605. ret = -EINVAL;
  606. goto free_req;
  607. }
  608. /*
  609. * Also should check the BTH.lnh. If it says the next header is GRH then
  610. * the RXE parsing will be off and will land in the middle of the KDETH
  611. * or miss it entirely.
  612. */
  613. if ((be16_to_cpu(req->hdr.lrh[0]) & 0x3) == HFI1_LRH_GRH) {
  614. SDMA_DBG(req, "User tried to pass in a GRH");
  615. ret = -EINVAL;
  616. goto free_req;
  617. }
  618. req->koffset = le32_to_cpu(req->hdr.kdeth.swdata[6]);
  619. /*
  620. * Calculate the initial TID offset based on the values of
  621. * KDETH.OFFSET and KDETH.OM that are passed in.
  622. */
  623. req->tidoffset = KDETH_GET(req->hdr.kdeth.ver_tid_offset, OFFSET) *
  624. (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
  625. KDETH_OM_LARGE : KDETH_OM_SMALL);
  626. SDMA_DBG(req, "Initial TID offset %u", req->tidoffset);
  627. idx++;
  628. /* Save all the IO vector structures */
  629. for (i = 0; i < req->data_iovs; i++) {
  630. INIT_LIST_HEAD(&req->iovs[i].list);
  631. memcpy(&req->iovs[i].iov,
  632. iovec + idx++,
  633. sizeof(req->iovs[i].iov));
  634. ret = pin_vector_pages(req, &req->iovs[i]);
  635. if (ret) {
  636. req->status = ret;
  637. goto free_req;
  638. }
  639. req->data_len += req->iovs[i].iov.iov_len;
  640. }
  641. SDMA_DBG(req, "total data length %u", req->data_len);
  642. if (pcount > req->info.npkts)
  643. pcount = req->info.npkts;
  644. /*
  645. * Copy any TID info
  646. * User space will provide the TID info only when the
  647. * request type is EXPECTED. This is true even if there is
  648. * only one packet in the request and the header is already
  649. * setup. The reason for the singular TID case is that the
  650. * driver needs to perform safety checks.
  651. */
  652. if (req_opcode(req->info.ctrl) == EXPECTED) {
  653. u16 ntids = iovec[idx].iov_len / sizeof(*req->tids);
  654. u32 *tmp;
  655. if (!ntids || ntids > MAX_TID_PAIR_ENTRIES) {
  656. ret = -EINVAL;
  657. goto free_req;
  658. }
  659. /*
  660. * We have to copy all of the tids because they may vary
  661. * in size and, therefore, the TID count might not be
  662. * equal to the pkt count. However, there is no way to
  663. * tell at this point.
  664. */
  665. tmp = memdup_user(iovec[idx].iov_base,
  666. ntids * sizeof(*req->tids));
  667. if (IS_ERR(tmp)) {
  668. ret = PTR_ERR(tmp);
  669. SDMA_DBG(req, "Failed to copy %d TIDs (%d)",
  670. ntids, ret);
  671. goto free_req;
  672. }
  673. req->tids = tmp;
  674. req->n_tids = ntids;
  675. idx++;
  676. }
  677. dlid = be16_to_cpu(req->hdr.lrh[1]);
  678. selector = dlid_to_selector(dlid);
  679. selector += uctxt->ctxt + fd->subctxt;
  680. req->sde = sdma_select_user_engine(dd, selector, vl);
  681. if (!req->sde || !sdma_running(req->sde)) {
  682. ret = -ECOMM;
  683. goto free_req;
  684. }
  685. /* We don't need an AHG entry if the request contains only one packet */
  686. if (req->info.npkts > 1 && HFI1_CAP_IS_USET(SDMA_AHG))
  687. req->ahg_idx = sdma_ahg_alloc(req->sde);
  688. set_comp_state(pq, cq, info.comp_idx, QUEUED, 0);
  689. atomic_inc(&pq->n_reqs);
  690. req_queued = 1;
  691. /* Send the first N packets in the request to buy us some time */
  692. ret = user_sdma_send_pkts(req, pcount);
  693. if (unlikely(ret < 0 && ret != -EBUSY)) {
  694. req->status = ret;
  695. goto free_req;
  696. }
  697. /*
  698. * It is possible that the SDMA engine would have processed all the
  699. * submitted packets by the time we get here. Therefore, only set
  700. * packet queue state to ACTIVE if there are still uncompleted
  701. * requests.
  702. */
  703. if (atomic_read(&pq->n_reqs))
  704. xchg(&pq->state, SDMA_PKT_Q_ACTIVE);
  705. /*
  706. * This is a somewhat blocking send implementation.
  707. * The driver will block the caller until all packets of the
  708. * request have been submitted to the SDMA engine. However, it
  709. * will not wait for send completions.
  710. */
  711. while (!test_bit(SDMA_REQ_SEND_DONE, &req->flags)) {
  712. ret = user_sdma_send_pkts(req, pcount);
  713. if (ret < 0) {
  714. if (ret != -EBUSY) {
  715. req->status = ret;
  716. set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
  717. if (ACCESS_ONCE(req->seqcomp) ==
  718. req->seqsubmitted - 1)
  719. goto free_req;
  720. return ret;
  721. }
  722. wait_event_interruptible_timeout(
  723. pq->busy.wait_dma,
  724. (pq->state == SDMA_PKT_Q_ACTIVE),
  725. msecs_to_jiffies(
  726. SDMA_IOWAIT_TIMEOUT));
  727. }
  728. }
  729. *count += idx;
  730. return 0;
  731. free_req:
  732. user_sdma_free_request(req, true);
  733. if (req_queued)
  734. pq_update(pq);
  735. set_comp_state(pq, cq, info.comp_idx, ERROR, req->status);
  736. return ret;
  737. }
  738. static inline u32 compute_data_length(struct user_sdma_request *req,
  739. struct user_sdma_txreq *tx)
  740. {
  741. /*
  742. * Determine the proper size of the packet data.
  743. * The size of the data of the first packet is in the header
  744. * template. However, it includes the header and ICRC, which need
  745. * to be subtracted.
  746. * The minimum representable packet data length in a header is 4 bytes,
  747. * therefore, when the data length request is less than 4 bytes, there's
  748. * only one packet, and the packet data length is equal to that of the
  749. * request data length.
  750. * The size of the remaining packets is the minimum of the frag
  751. * size (MTU) or remaining data in the request.
  752. */
  753. u32 len;
  754. if (!req->seqnum) {
  755. if (req->data_len < sizeof(u32))
  756. len = req->data_len;
  757. else
  758. len = ((be16_to_cpu(req->hdr.lrh[2]) << 2) -
  759. (sizeof(tx->hdr) - 4));
  760. } else if (req_opcode(req->info.ctrl) == EXPECTED) {
  761. u32 tidlen = EXP_TID_GET(req->tids[req->tididx], LEN) *
  762. PAGE_SIZE;
  763. /*
  764. * Get the data length based on the remaining space in the
  765. * TID pair.
  766. */
  767. len = min(tidlen - req->tidoffset, (u32)req->info.fragsize);
  768. /* If we've filled up the TID pair, move to the next one. */
  769. if (unlikely(!len) && ++req->tididx < req->n_tids &&
  770. req->tids[req->tididx]) {
  771. tidlen = EXP_TID_GET(req->tids[req->tididx],
  772. LEN) * PAGE_SIZE;
  773. req->tidoffset = 0;
  774. len = min_t(u32, tidlen, req->info.fragsize);
  775. }
  776. /*
  777. * Since the TID pairs map entire pages, make sure that we
  778. * are not going to try to send more data that we have
  779. * remaining.
  780. */
  781. len = min(len, req->data_len - req->sent);
  782. } else {
  783. len = min(req->data_len - req->sent, (u32)req->info.fragsize);
  784. }
  785. SDMA_DBG(req, "Data Length = %u", len);
  786. return len;
  787. }
  788. static inline u32 pad_len(u32 len)
  789. {
  790. if (len & (sizeof(u32) - 1))
  791. len += sizeof(u32) - (len & (sizeof(u32) - 1));
  792. return len;
  793. }
  794. static inline u32 get_lrh_len(struct hfi1_pkt_header hdr, u32 len)
  795. {
  796. /* (Size of complete header - size of PBC) + 4B ICRC + data length */
  797. return ((sizeof(hdr) - sizeof(hdr.pbc)) + 4 + len);
  798. }
  799. static int user_sdma_send_pkts(struct user_sdma_request *req, unsigned maxpkts)
  800. {
  801. int ret = 0, count;
  802. unsigned npkts = 0;
  803. struct user_sdma_txreq *tx = NULL;
  804. struct hfi1_user_sdma_pkt_q *pq = NULL;
  805. struct user_sdma_iovec *iovec = NULL;
  806. if (!req->pq)
  807. return -EINVAL;
  808. pq = req->pq;
  809. /* If tx completion has reported an error, we are done. */
  810. if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
  811. set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
  812. return -EFAULT;
  813. }
  814. /*
  815. * Check if we might have sent the entire request already
  816. */
  817. if (unlikely(req->seqnum == req->info.npkts)) {
  818. if (!list_empty(&req->txps))
  819. goto dosend;
  820. return ret;
  821. }
  822. if (!maxpkts || maxpkts > req->info.npkts - req->seqnum)
  823. maxpkts = req->info.npkts - req->seqnum;
  824. while (npkts < maxpkts) {
  825. u32 datalen = 0, queued = 0, data_sent = 0;
  826. u64 iov_offset = 0;
  827. /*
  828. * Check whether any of the completions have come back
  829. * with errors. If so, we are not going to process any
  830. * more packets from this request.
  831. */
  832. if (test_bit(SDMA_REQ_HAS_ERROR, &req->flags)) {
  833. set_bit(SDMA_REQ_DONE_ERROR, &req->flags);
  834. return -EFAULT;
  835. }
  836. tx = kmem_cache_alloc(pq->txreq_cache, GFP_KERNEL);
  837. if (!tx)
  838. return -ENOMEM;
  839. tx->flags = 0;
  840. tx->req = req;
  841. tx->busycount = 0;
  842. INIT_LIST_HEAD(&tx->list);
  843. /*
  844. * For the last packet set the ACK request
  845. * and disable header suppression.
  846. */
  847. if (req->seqnum == req->info.npkts - 1)
  848. tx->flags |= (TXREQ_FLAGS_REQ_ACK |
  849. TXREQ_FLAGS_REQ_DISABLE_SH);
  850. /*
  851. * Calculate the payload size - this is min of the fragment
  852. * (MTU) size or the remaining bytes in the request but only
  853. * if we have payload data.
  854. */
  855. if (req->data_len) {
  856. iovec = &req->iovs[req->iov_idx];
  857. if (ACCESS_ONCE(iovec->offset) == iovec->iov.iov_len) {
  858. if (++req->iov_idx == req->data_iovs) {
  859. ret = -EFAULT;
  860. goto free_txreq;
  861. }
  862. iovec = &req->iovs[req->iov_idx];
  863. WARN_ON(iovec->offset);
  864. }
  865. datalen = compute_data_length(req, tx);
  866. /*
  867. * Disable header suppression for the payload <= 8DWS.
  868. * If there is an uncorrectable error in the receive
  869. * data FIFO when the received payload size is less than
  870. * or equal to 8DWS then the RxDmaDataFifoRdUncErr is
  871. * not reported.There is set RHF.EccErr if the header
  872. * is not suppressed.
  873. */
  874. if (!datalen) {
  875. SDMA_DBG(req,
  876. "Request has data but pkt len is 0");
  877. ret = -EFAULT;
  878. goto free_tx;
  879. } else if (datalen <= 32) {
  880. tx->flags |= TXREQ_FLAGS_REQ_DISABLE_SH;
  881. }
  882. }
  883. if (req->ahg_idx >= 0) {
  884. if (!req->seqnum) {
  885. u16 pbclen = le16_to_cpu(req->hdr.pbc[0]);
  886. u32 lrhlen = get_lrh_len(req->hdr,
  887. pad_len(datalen));
  888. /*
  889. * Copy the request header into the tx header
  890. * because the HW needs a cacheline-aligned
  891. * address.
  892. * This copy can be optimized out if the hdr
  893. * member of user_sdma_request were also
  894. * cacheline aligned.
  895. */
  896. memcpy(&tx->hdr, &req->hdr, sizeof(tx->hdr));
  897. if (PBC2LRH(pbclen) != lrhlen) {
  898. pbclen = (pbclen & 0xf000) |
  899. LRH2PBC(lrhlen);
  900. tx->hdr.pbc[0] = cpu_to_le16(pbclen);
  901. }
  902. ret = check_header_template(req, &tx->hdr,
  903. lrhlen, datalen);
  904. if (ret)
  905. goto free_tx;
  906. ret = sdma_txinit_ahg(&tx->txreq,
  907. SDMA_TXREQ_F_AHG_COPY,
  908. sizeof(tx->hdr) + datalen,
  909. req->ahg_idx, 0, NULL, 0,
  910. user_sdma_txreq_cb);
  911. if (ret)
  912. goto free_tx;
  913. ret = sdma_txadd_kvaddr(pq->dd, &tx->txreq,
  914. &tx->hdr,
  915. sizeof(tx->hdr));
  916. if (ret)
  917. goto free_txreq;
  918. } else {
  919. int changes;
  920. changes = set_txreq_header_ahg(req, tx,
  921. datalen);
  922. if (changes < 0)
  923. goto free_tx;
  924. sdma_txinit_ahg(&tx->txreq,
  925. SDMA_TXREQ_F_USE_AHG,
  926. datalen, req->ahg_idx, changes,
  927. req->ahg, sizeof(req->hdr),
  928. user_sdma_txreq_cb);
  929. }
  930. } else {
  931. ret = sdma_txinit(&tx->txreq, 0, sizeof(req->hdr) +
  932. datalen, user_sdma_txreq_cb);
  933. if (ret)
  934. goto free_tx;
  935. /*
  936. * Modify the header for this packet. This only needs
  937. * to be done if we are not going to use AHG. Otherwise,
  938. * the HW will do it based on the changes we gave it
  939. * during sdma_txinit_ahg().
  940. */
  941. ret = set_txreq_header(req, tx, datalen);
  942. if (ret)
  943. goto free_txreq;
  944. }
  945. /*
  946. * If the request contains any data vectors, add up to
  947. * fragsize bytes to the descriptor.
  948. */
  949. while (queued < datalen &&
  950. (req->sent + data_sent) < req->data_len) {
  951. unsigned long base, offset;
  952. unsigned pageidx, len;
  953. base = (unsigned long)iovec->iov.iov_base;
  954. offset = offset_in_page(base + iovec->offset +
  955. iov_offset);
  956. pageidx = (((iovec->offset + iov_offset +
  957. base) - (base & PAGE_MASK)) >> PAGE_SHIFT);
  958. len = offset + req->info.fragsize > PAGE_SIZE ?
  959. PAGE_SIZE - offset : req->info.fragsize;
  960. len = min((datalen - queued), len);
  961. ret = sdma_txadd_page(pq->dd, &tx->txreq,
  962. iovec->pages[pageidx],
  963. offset, len);
  964. if (ret) {
  965. SDMA_DBG(req, "SDMA txreq add page failed %d\n",
  966. ret);
  967. goto free_txreq;
  968. }
  969. iov_offset += len;
  970. queued += len;
  971. data_sent += len;
  972. if (unlikely(queued < datalen &&
  973. pageidx == iovec->npages &&
  974. req->iov_idx < req->data_iovs - 1)) {
  975. iovec->offset += iov_offset;
  976. iovec = &req->iovs[++req->iov_idx];
  977. iov_offset = 0;
  978. }
  979. }
  980. /*
  981. * The txreq was submitted successfully so we can update
  982. * the counters.
  983. */
  984. req->koffset += datalen;
  985. if (req_opcode(req->info.ctrl) == EXPECTED)
  986. req->tidoffset += datalen;
  987. req->sent += data_sent;
  988. if (req->data_len)
  989. iovec->offset += iov_offset;
  990. list_add_tail(&tx->txreq.list, &req->txps);
  991. /*
  992. * It is important to increment this here as it is used to
  993. * generate the BTH.PSN and, therefore, can't be bulk-updated
  994. * outside of the loop.
  995. */
  996. tx->seqnum = req->seqnum++;
  997. npkts++;
  998. }
  999. dosend:
  1000. ret = sdma_send_txlist(req->sde, &pq->busy, &req->txps, &count);
  1001. req->seqsubmitted += count;
  1002. if (req->seqsubmitted == req->info.npkts) {
  1003. set_bit(SDMA_REQ_SEND_DONE, &req->flags);
  1004. /*
  1005. * The txreq has already been submitted to the HW queue
  1006. * so we can free the AHG entry now. Corruption will not
  1007. * happen due to the sequential manner in which
  1008. * descriptors are processed.
  1009. */
  1010. if (req->ahg_idx >= 0)
  1011. sdma_ahg_free(req->sde, req->ahg_idx);
  1012. }
  1013. return ret;
  1014. free_txreq:
  1015. sdma_txclean(pq->dd, &tx->txreq);
  1016. free_tx:
  1017. kmem_cache_free(pq->txreq_cache, tx);
  1018. return ret;
  1019. }
  1020. /*
  1021. * How many pages in this iovec element?
  1022. */
  1023. static inline int num_user_pages(const struct iovec *iov)
  1024. {
  1025. const unsigned long addr = (unsigned long)iov->iov_base;
  1026. const unsigned long len = iov->iov_len;
  1027. const unsigned long spage = addr & PAGE_MASK;
  1028. const unsigned long epage = (addr + len - 1) & PAGE_MASK;
  1029. return 1 + ((epage - spage) >> PAGE_SHIFT);
  1030. }
  1031. static u32 sdma_cache_evict(struct hfi1_user_sdma_pkt_q *pq, u32 npages)
  1032. {
  1033. struct evict_data evict_data;
  1034. evict_data.cleared = 0;
  1035. evict_data.target = npages;
  1036. hfi1_mmu_rb_evict(pq->handler, &evict_data);
  1037. return evict_data.cleared;
  1038. }
  1039. static int pin_vector_pages(struct user_sdma_request *req,
  1040. struct user_sdma_iovec *iovec)
  1041. {
  1042. int ret = 0, pinned, npages, cleared;
  1043. struct page **pages;
  1044. struct hfi1_user_sdma_pkt_q *pq = req->pq;
  1045. struct sdma_mmu_node *node = NULL;
  1046. struct mmu_rb_node *rb_node;
  1047. rb_node = hfi1_mmu_rb_extract(pq->handler,
  1048. (unsigned long)iovec->iov.iov_base,
  1049. iovec->iov.iov_len);
  1050. if (rb_node)
  1051. node = container_of(rb_node, struct sdma_mmu_node, rb);
  1052. else
  1053. rb_node = NULL;
  1054. if (!node) {
  1055. node = kzalloc(sizeof(*node), GFP_KERNEL);
  1056. if (!node)
  1057. return -ENOMEM;
  1058. node->rb.addr = (unsigned long)iovec->iov.iov_base;
  1059. node->pq = pq;
  1060. atomic_set(&node->refcount, 0);
  1061. }
  1062. npages = num_user_pages(&iovec->iov);
  1063. if (node->npages < npages) {
  1064. pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL);
  1065. if (!pages) {
  1066. SDMA_DBG(req, "Failed page array alloc");
  1067. ret = -ENOMEM;
  1068. goto bail;
  1069. }
  1070. memcpy(pages, node->pages, node->npages * sizeof(*pages));
  1071. npages -= node->npages;
  1072. retry:
  1073. if (!hfi1_can_pin_pages(pq->dd, pq->mm,
  1074. atomic_read(&pq->n_locked), npages)) {
  1075. cleared = sdma_cache_evict(pq, npages);
  1076. if (cleared >= npages)
  1077. goto retry;
  1078. }
  1079. pinned = hfi1_acquire_user_pages(pq->mm,
  1080. ((unsigned long)iovec->iov.iov_base +
  1081. (node->npages * PAGE_SIZE)), npages, 0,
  1082. pages + node->npages);
  1083. if (pinned < 0) {
  1084. kfree(pages);
  1085. ret = pinned;
  1086. goto bail;
  1087. }
  1088. if (pinned != npages) {
  1089. unpin_vector_pages(pq->mm, pages, node->npages,
  1090. pinned);
  1091. ret = -EFAULT;
  1092. goto bail;
  1093. }
  1094. kfree(node->pages);
  1095. node->rb.len = iovec->iov.iov_len;
  1096. node->pages = pages;
  1097. node->npages += pinned;
  1098. npages = node->npages;
  1099. atomic_add(pinned, &pq->n_locked);
  1100. }
  1101. iovec->pages = node->pages;
  1102. iovec->npages = npages;
  1103. iovec->node = node;
  1104. ret = hfi1_mmu_rb_insert(req->pq->handler, &node->rb);
  1105. if (ret) {
  1106. atomic_sub(node->npages, &pq->n_locked);
  1107. iovec->node = NULL;
  1108. goto bail;
  1109. }
  1110. return 0;
  1111. bail:
  1112. if (rb_node)
  1113. unpin_vector_pages(pq->mm, node->pages, 0, node->npages);
  1114. kfree(node);
  1115. return ret;
  1116. }
  1117. static void unpin_vector_pages(struct mm_struct *mm, struct page **pages,
  1118. unsigned start, unsigned npages)
  1119. {
  1120. hfi1_release_user_pages(mm, pages + start, npages, false);
  1121. kfree(pages);
  1122. }
  1123. static int check_header_template(struct user_sdma_request *req,
  1124. struct hfi1_pkt_header *hdr, u32 lrhlen,
  1125. u32 datalen)
  1126. {
  1127. /*
  1128. * Perform safety checks for any type of packet:
  1129. * - transfer size is multiple of 64bytes
  1130. * - packet length is multiple of 4 bytes
  1131. * - packet length is not larger than MTU size
  1132. *
  1133. * These checks are only done for the first packet of the
  1134. * transfer since the header is "given" to us by user space.
  1135. * For the remainder of the packets we compute the values.
  1136. */
  1137. if (req->info.fragsize % PIO_BLOCK_SIZE || lrhlen & 0x3 ||
  1138. lrhlen > get_lrh_len(*hdr, req->info.fragsize))
  1139. return -EINVAL;
  1140. if (req_opcode(req->info.ctrl) == EXPECTED) {
  1141. /*
  1142. * The header is checked only on the first packet. Furthermore,
  1143. * we ensure that at least one TID entry is copied when the
  1144. * request is submitted. Therefore, we don't have to verify that
  1145. * tididx points to something sane.
  1146. */
  1147. u32 tidval = req->tids[req->tididx],
  1148. tidlen = EXP_TID_GET(tidval, LEN) * PAGE_SIZE,
  1149. tididx = EXP_TID_GET(tidval, IDX),
  1150. tidctrl = EXP_TID_GET(tidval, CTRL),
  1151. tidoff;
  1152. __le32 kval = hdr->kdeth.ver_tid_offset;
  1153. tidoff = KDETH_GET(kval, OFFSET) *
  1154. (KDETH_GET(req->hdr.kdeth.ver_tid_offset, OM) ?
  1155. KDETH_OM_LARGE : KDETH_OM_SMALL);
  1156. /*
  1157. * Expected receive packets have the following
  1158. * additional checks:
  1159. * - offset is not larger than the TID size
  1160. * - TIDCtrl values match between header and TID array
  1161. * - TID indexes match between header and TID array
  1162. */
  1163. if ((tidoff + datalen > tidlen) ||
  1164. KDETH_GET(kval, TIDCTRL) != tidctrl ||
  1165. KDETH_GET(kval, TID) != tididx)
  1166. return -EINVAL;
  1167. }
  1168. return 0;
  1169. }
  1170. /*
  1171. * Correctly set the BTH.PSN field based on type of
  1172. * transfer - eager packets can just increment the PSN but
  1173. * expected packets encode generation and sequence in the
  1174. * BTH.PSN field so just incrementing will result in errors.
  1175. */
  1176. static inline u32 set_pkt_bth_psn(__be32 bthpsn, u8 expct, u32 frags)
  1177. {
  1178. u32 val = be32_to_cpu(bthpsn),
  1179. mask = (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffffull :
  1180. 0xffffffull),
  1181. psn = val & mask;
  1182. if (expct)
  1183. psn = (psn & ~BTH_SEQ_MASK) | ((psn + frags) & BTH_SEQ_MASK);
  1184. else
  1185. psn = psn + frags;
  1186. return psn & mask;
  1187. }
  1188. static int set_txreq_header(struct user_sdma_request *req,
  1189. struct user_sdma_txreq *tx, u32 datalen)
  1190. {
  1191. struct hfi1_user_sdma_pkt_q *pq = req->pq;
  1192. struct hfi1_pkt_header *hdr = &tx->hdr;
  1193. u8 omfactor; /* KDETH.OM */
  1194. u16 pbclen;
  1195. int ret;
  1196. u32 tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(datalen));
  1197. /* Copy the header template to the request before modification */
  1198. memcpy(hdr, &req->hdr, sizeof(*hdr));
  1199. /*
  1200. * Check if the PBC and LRH length are mismatched. If so
  1201. * adjust both in the header.
  1202. */
  1203. pbclen = le16_to_cpu(hdr->pbc[0]);
  1204. if (PBC2LRH(pbclen) != lrhlen) {
  1205. pbclen = (pbclen & 0xf000) | LRH2PBC(lrhlen);
  1206. hdr->pbc[0] = cpu_to_le16(pbclen);
  1207. hdr->lrh[2] = cpu_to_be16(lrhlen >> 2);
  1208. /*
  1209. * Third packet
  1210. * This is the first packet in the sequence that has
  1211. * a "static" size that can be used for the rest of
  1212. * the packets (besides the last one).
  1213. */
  1214. if (unlikely(req->seqnum == 2)) {
  1215. /*
  1216. * From this point on the lengths in both the
  1217. * PBC and LRH are the same until the last
  1218. * packet.
  1219. * Adjust the template so we don't have to update
  1220. * every packet
  1221. */
  1222. req->hdr.pbc[0] = hdr->pbc[0];
  1223. req->hdr.lrh[2] = hdr->lrh[2];
  1224. }
  1225. }
  1226. /*
  1227. * We only have to modify the header if this is not the
  1228. * first packet in the request. Otherwise, we use the
  1229. * header given to us.
  1230. */
  1231. if (unlikely(!req->seqnum)) {
  1232. ret = check_header_template(req, hdr, lrhlen, datalen);
  1233. if (ret)
  1234. return ret;
  1235. goto done;
  1236. }
  1237. hdr->bth[2] = cpu_to_be32(
  1238. set_pkt_bth_psn(hdr->bth[2],
  1239. (req_opcode(req->info.ctrl) == EXPECTED),
  1240. req->seqnum));
  1241. /* Set ACK request on last packet */
  1242. if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
  1243. hdr->bth[2] |= cpu_to_be32(1UL << 31);
  1244. /* Set the new offset */
  1245. hdr->kdeth.swdata[6] = cpu_to_le32(req->koffset);
  1246. /* Expected packets have to fill in the new TID information */
  1247. if (req_opcode(req->info.ctrl) == EXPECTED) {
  1248. tidval = req->tids[req->tididx];
  1249. /*
  1250. * If the offset puts us at the end of the current TID,
  1251. * advance everything.
  1252. */
  1253. if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
  1254. PAGE_SIZE)) {
  1255. req->tidoffset = 0;
  1256. /*
  1257. * Since we don't copy all the TIDs, all at once,
  1258. * we have to check again.
  1259. */
  1260. if (++req->tididx > req->n_tids - 1 ||
  1261. !req->tids[req->tididx]) {
  1262. return -EINVAL;
  1263. }
  1264. tidval = req->tids[req->tididx];
  1265. }
  1266. omfactor = EXP_TID_GET(tidval, LEN) * PAGE_SIZE >=
  1267. KDETH_OM_MAX_SIZE ? KDETH_OM_LARGE_SHIFT :
  1268. KDETH_OM_SMALL_SHIFT;
  1269. /* Set KDETH.TIDCtrl based on value for this TID. */
  1270. KDETH_SET(hdr->kdeth.ver_tid_offset, TIDCTRL,
  1271. EXP_TID_GET(tidval, CTRL));
  1272. /* Set KDETH.TID based on value for this TID */
  1273. KDETH_SET(hdr->kdeth.ver_tid_offset, TID,
  1274. EXP_TID_GET(tidval, IDX));
  1275. /* Clear KDETH.SH when DISABLE_SH flag is set */
  1276. if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH))
  1277. KDETH_SET(hdr->kdeth.ver_tid_offset, SH, 0);
  1278. /*
  1279. * Set the KDETH.OFFSET and KDETH.OM based on size of
  1280. * transfer.
  1281. */
  1282. SDMA_DBG(req, "TID offset %ubytes %uunits om%u",
  1283. req->tidoffset, req->tidoffset >> omfactor,
  1284. omfactor != KDETH_OM_SMALL_SHIFT);
  1285. KDETH_SET(hdr->kdeth.ver_tid_offset, OFFSET,
  1286. req->tidoffset >> omfactor);
  1287. KDETH_SET(hdr->kdeth.ver_tid_offset, OM,
  1288. omfactor != KDETH_OM_SMALL_SHIFT);
  1289. }
  1290. done:
  1291. trace_hfi1_sdma_user_header(pq->dd, pq->ctxt, pq->subctxt,
  1292. req->info.comp_idx, hdr, tidval);
  1293. return sdma_txadd_kvaddr(pq->dd, &tx->txreq, hdr, sizeof(*hdr));
  1294. }
  1295. static int set_txreq_header_ahg(struct user_sdma_request *req,
  1296. struct user_sdma_txreq *tx, u32 len)
  1297. {
  1298. int diff = 0;
  1299. u8 omfactor; /* KDETH.OM */
  1300. struct hfi1_user_sdma_pkt_q *pq = req->pq;
  1301. struct hfi1_pkt_header *hdr = &req->hdr;
  1302. u16 pbclen = le16_to_cpu(hdr->pbc[0]);
  1303. u32 val32, tidval = 0, lrhlen = get_lrh_len(*hdr, pad_len(len));
  1304. if (PBC2LRH(pbclen) != lrhlen) {
  1305. /* PBC.PbcLengthDWs */
  1306. AHG_HEADER_SET(req->ahg, diff, 0, 0, 12,
  1307. cpu_to_le16(LRH2PBC(lrhlen)));
  1308. /* LRH.PktLen (we need the full 16 bits due to byte swap) */
  1309. AHG_HEADER_SET(req->ahg, diff, 3, 0, 16,
  1310. cpu_to_be16(lrhlen >> 2));
  1311. }
  1312. /*
  1313. * Do the common updates
  1314. */
  1315. /* BTH.PSN and BTH.A */
  1316. val32 = (be32_to_cpu(hdr->bth[2]) + req->seqnum) &
  1317. (HFI1_CAP_IS_KSET(EXTENDED_PSN) ? 0x7fffffff : 0xffffff);
  1318. if (unlikely(tx->flags & TXREQ_FLAGS_REQ_ACK))
  1319. val32 |= 1UL << 31;
  1320. AHG_HEADER_SET(req->ahg, diff, 6, 0, 16, cpu_to_be16(val32 >> 16));
  1321. AHG_HEADER_SET(req->ahg, diff, 6, 16, 16, cpu_to_be16(val32 & 0xffff));
  1322. /* KDETH.Offset */
  1323. AHG_HEADER_SET(req->ahg, diff, 15, 0, 16,
  1324. cpu_to_le16(req->koffset & 0xffff));
  1325. AHG_HEADER_SET(req->ahg, diff, 15, 16, 16,
  1326. cpu_to_le16(req->koffset >> 16));
  1327. if (req_opcode(req->info.ctrl) == EXPECTED) {
  1328. __le16 val;
  1329. tidval = req->tids[req->tididx];
  1330. /*
  1331. * If the offset puts us at the end of the current TID,
  1332. * advance everything.
  1333. */
  1334. if ((req->tidoffset) == (EXP_TID_GET(tidval, LEN) *
  1335. PAGE_SIZE)) {
  1336. req->tidoffset = 0;
  1337. /*
  1338. * Since we don't copy all the TIDs, all at once,
  1339. * we have to check again.
  1340. */
  1341. if (++req->tididx > req->n_tids - 1 ||
  1342. !req->tids[req->tididx]) {
  1343. return -EINVAL;
  1344. }
  1345. tidval = req->tids[req->tididx];
  1346. }
  1347. omfactor = ((EXP_TID_GET(tidval, LEN) *
  1348. PAGE_SIZE) >=
  1349. KDETH_OM_MAX_SIZE) ? KDETH_OM_LARGE_SHIFT :
  1350. KDETH_OM_SMALL_SHIFT;
  1351. /* KDETH.OM and KDETH.OFFSET (TID) */
  1352. AHG_HEADER_SET(req->ahg, diff, 7, 0, 16,
  1353. ((!!(omfactor - KDETH_OM_SMALL_SHIFT)) << 15 |
  1354. ((req->tidoffset >> omfactor)
  1355. & 0x7fff)));
  1356. /* KDETH.TIDCtrl, KDETH.TID, KDETH.Intr, KDETH.SH */
  1357. val = cpu_to_le16(((EXP_TID_GET(tidval, CTRL) & 0x3) << 10) |
  1358. (EXP_TID_GET(tidval, IDX) & 0x3ff));
  1359. if (unlikely(tx->flags & TXREQ_FLAGS_REQ_DISABLE_SH)) {
  1360. val |= cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
  1361. INTR) <<
  1362. AHG_KDETH_INTR_SHIFT));
  1363. } else {
  1364. val |= KDETH_GET(hdr->kdeth.ver_tid_offset, SH) ?
  1365. cpu_to_le16(0x1 << AHG_KDETH_SH_SHIFT) :
  1366. cpu_to_le16((KDETH_GET(hdr->kdeth.ver_tid_offset,
  1367. INTR) <<
  1368. AHG_KDETH_INTR_SHIFT));
  1369. }
  1370. AHG_HEADER_SET(req->ahg, diff, 7, 16, 14, val);
  1371. }
  1372. trace_hfi1_sdma_user_header_ahg(pq->dd, pq->ctxt, pq->subctxt,
  1373. req->info.comp_idx, req->sde->this_idx,
  1374. req->ahg_idx, req->ahg, diff, tidval);
  1375. return diff;
  1376. }
  1377. /*
  1378. * SDMA tx request completion callback. Called when the SDMA progress
  1379. * state machine gets notification that the SDMA descriptors for this
  1380. * tx request have been processed by the DMA engine. Called in
  1381. * interrupt context.
  1382. */
  1383. static void user_sdma_txreq_cb(struct sdma_txreq *txreq, int status)
  1384. {
  1385. struct user_sdma_txreq *tx =
  1386. container_of(txreq, struct user_sdma_txreq, txreq);
  1387. struct user_sdma_request *req;
  1388. struct hfi1_user_sdma_pkt_q *pq;
  1389. struct hfi1_user_sdma_comp_q *cq;
  1390. u16 idx;
  1391. if (!tx->req)
  1392. return;
  1393. req = tx->req;
  1394. pq = req->pq;
  1395. cq = req->cq;
  1396. if (status != SDMA_TXREQ_S_OK) {
  1397. SDMA_DBG(req, "SDMA completion with error %d",
  1398. status);
  1399. set_bit(SDMA_REQ_HAS_ERROR, &req->flags);
  1400. }
  1401. req->seqcomp = tx->seqnum;
  1402. kmem_cache_free(pq->txreq_cache, tx);
  1403. tx = NULL;
  1404. idx = req->info.comp_idx;
  1405. if (req->status == -1 && status == SDMA_TXREQ_S_OK) {
  1406. if (req->seqcomp == req->info.npkts - 1) {
  1407. req->status = 0;
  1408. user_sdma_free_request(req, false);
  1409. pq_update(pq);
  1410. set_comp_state(pq, cq, idx, COMPLETE, 0);
  1411. }
  1412. } else {
  1413. if (status != SDMA_TXREQ_S_OK)
  1414. req->status = status;
  1415. if (req->seqcomp == (ACCESS_ONCE(req->seqsubmitted) - 1) &&
  1416. (test_bit(SDMA_REQ_SEND_DONE, &req->flags) ||
  1417. test_bit(SDMA_REQ_DONE_ERROR, &req->flags))) {
  1418. user_sdma_free_request(req, false);
  1419. pq_update(pq);
  1420. set_comp_state(pq, cq, idx, ERROR, req->status);
  1421. }
  1422. }
  1423. }
  1424. static inline void pq_update(struct hfi1_user_sdma_pkt_q *pq)
  1425. {
  1426. if (atomic_dec_and_test(&pq->n_reqs)) {
  1427. xchg(&pq->state, SDMA_PKT_Q_INACTIVE);
  1428. wake_up(&pq->wait);
  1429. }
  1430. }
  1431. static void user_sdma_free_request(struct user_sdma_request *req, bool unpin)
  1432. {
  1433. if (!list_empty(&req->txps)) {
  1434. struct sdma_txreq *t, *p;
  1435. list_for_each_entry_safe(t, p, &req->txps, list) {
  1436. struct user_sdma_txreq *tx =
  1437. container_of(t, struct user_sdma_txreq, txreq);
  1438. list_del_init(&t->list);
  1439. sdma_txclean(req->pq->dd, t);
  1440. kmem_cache_free(req->pq->txreq_cache, tx);
  1441. }
  1442. }
  1443. if (req->data_iovs) {
  1444. struct sdma_mmu_node *node;
  1445. int i;
  1446. for (i = 0; i < req->data_iovs; i++) {
  1447. node = req->iovs[i].node;
  1448. if (!node)
  1449. continue;
  1450. if (unpin)
  1451. hfi1_mmu_rb_remove(req->pq->handler,
  1452. &node->rb);
  1453. else
  1454. atomic_dec(&node->refcount);
  1455. }
  1456. }
  1457. kfree(req->tids);
  1458. clear_bit(req->info.comp_idx, req->pq->req_in_use);
  1459. }
  1460. static inline void set_comp_state(struct hfi1_user_sdma_pkt_q *pq,
  1461. struct hfi1_user_sdma_comp_q *cq,
  1462. u16 idx, enum hfi1_sdma_comp_state state,
  1463. int ret)
  1464. {
  1465. hfi1_cdbg(SDMA, "[%u:%u:%u:%u] Setting completion status %u %d",
  1466. pq->dd->unit, pq->ctxt, pq->subctxt, idx, state, ret);
  1467. if (state == ERROR)
  1468. cq->comps[idx].errcode = -ret;
  1469. smp_wmb(); /* make sure errcode is visible first */
  1470. cq->comps[idx].status = state;
  1471. trace_hfi1_sdma_user_completion(pq->dd, pq->ctxt, pq->subctxt,
  1472. idx, state, ret);
  1473. }
  1474. static bool sdma_rb_filter(struct mmu_rb_node *node, unsigned long addr,
  1475. unsigned long len)
  1476. {
  1477. return (bool)(node->addr == addr);
  1478. }
  1479. static int sdma_rb_insert(void *arg, struct mmu_rb_node *mnode)
  1480. {
  1481. struct sdma_mmu_node *node =
  1482. container_of(mnode, struct sdma_mmu_node, rb);
  1483. atomic_inc(&node->refcount);
  1484. return 0;
  1485. }
  1486. /*
  1487. * Return 1 to remove the node from the rb tree and call the remove op.
  1488. *
  1489. * Called with the rb tree lock held.
  1490. */
  1491. static int sdma_rb_evict(void *arg, struct mmu_rb_node *mnode,
  1492. void *evict_arg, bool *stop)
  1493. {
  1494. struct sdma_mmu_node *node =
  1495. container_of(mnode, struct sdma_mmu_node, rb);
  1496. struct evict_data *evict_data = evict_arg;
  1497. /* is this node still being used? */
  1498. if (atomic_read(&node->refcount))
  1499. return 0; /* keep this node */
  1500. /* this node will be evicted, add its pages to our count */
  1501. evict_data->cleared += node->npages;
  1502. /* have enough pages been cleared? */
  1503. if (evict_data->cleared >= evict_data->target)
  1504. *stop = true;
  1505. return 1; /* remove this node */
  1506. }
  1507. static void sdma_rb_remove(void *arg, struct mmu_rb_node *mnode)
  1508. {
  1509. struct sdma_mmu_node *node =
  1510. container_of(mnode, struct sdma_mmu_node, rb);
  1511. atomic_sub(node->npages, &node->pq->n_locked);
  1512. unpin_vector_pages(node->pq->mm, node->pages, 0, node->npages);
  1513. kfree(node);
  1514. }
  1515. static int sdma_rb_invalidate(void *arg, struct mmu_rb_node *mnode)
  1516. {
  1517. struct sdma_mmu_node *node =
  1518. container_of(mnode, struct sdma_mmu_node, rb);
  1519. if (!atomic_read(&node->refcount))
  1520. return 1;
  1521. return 0;
  1522. }