verbs.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394
  1. /*
  2. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the BSD-type
  8. * license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. *
  17. * Redistributions in binary form must reproduce the above
  18. * copyright notice, this list of conditions and the following
  19. * disclaimer in the documentation and/or other materials provided
  20. * with the distribution.
  21. *
  22. * Neither the name of the Network Appliance, Inc. nor the names of
  23. * its contributors may be used to endorse or promote products
  24. * derived from this software without specific prior written
  25. * permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  30. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  31. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  32. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  33. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  34. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  35. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  36. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  37. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. */
  39. /*
  40. * verbs.c
  41. *
  42. * Encapsulates the major functions managing:
  43. * o adapters
  44. * o endpoints
  45. * o connections
  46. * o buffer memory
  47. */
  48. #include <linux/interrupt.h>
  49. #include <linux/slab.h>
  50. #include <linux/prefetch.h>
  51. #include <linux/sunrpc/addr.h>
  52. #include <linux/sunrpc/svc_rdma.h>
  53. #include <asm/bitops.h>
  54. #include <linux/module.h> /* try_module_get()/module_put() */
  55. #include "xprt_rdma.h"
  56. /*
  57. * Globals/Macros
  58. */
  59. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  60. # define RPCDBG_FACILITY RPCDBG_TRANS
  61. #endif
  62. /*
  63. * internal functions
  64. */
  65. static struct workqueue_struct *rpcrdma_receive_wq;
  66. int
  67. rpcrdma_alloc_wq(void)
  68. {
  69. struct workqueue_struct *recv_wq;
  70. recv_wq = alloc_workqueue("xprtrdma_receive",
  71. WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
  72. 0);
  73. if (!recv_wq)
  74. return -ENOMEM;
  75. rpcrdma_receive_wq = recv_wq;
  76. return 0;
  77. }
  78. void
  79. rpcrdma_destroy_wq(void)
  80. {
  81. struct workqueue_struct *wq;
  82. if (rpcrdma_receive_wq) {
  83. wq = rpcrdma_receive_wq;
  84. rpcrdma_receive_wq = NULL;
  85. destroy_workqueue(wq);
  86. }
  87. }
  88. static void
  89. rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
  90. {
  91. struct rpcrdma_ep *ep = context;
  92. pr_err("rpcrdma: %s on device %s ep %p\n",
  93. ib_event_msg(event->event), event->device->name, context);
  94. if (ep->rep_connected == 1) {
  95. ep->rep_connected = -EIO;
  96. rpcrdma_conn_func(ep);
  97. wake_up_all(&ep->rep_connect_wait);
  98. }
  99. }
  100. /**
  101. * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
  102. * @cq: completion queue (ignored)
  103. * @wc: completed WR
  104. *
  105. */
  106. static void
  107. rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
  108. {
  109. /* WARNING: Only wr_cqe and status are reliable at this point */
  110. if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
  111. pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
  112. ib_wc_status_msg(wc->status),
  113. wc->status, wc->vendor_err);
  114. }
  115. /* Perform basic sanity checking to avoid using garbage
  116. * to update the credit grant value.
  117. */
  118. static void
  119. rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
  120. {
  121. struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
  122. struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
  123. u32 credits;
  124. if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
  125. return;
  126. credits = be32_to_cpu(rmsgp->rm_credit);
  127. if (credits == 0)
  128. credits = 1; /* don't deadlock */
  129. else if (credits > buffer->rb_max_requests)
  130. credits = buffer->rb_max_requests;
  131. atomic_set(&buffer->rb_credits, credits);
  132. }
  133. /**
  134. * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
  135. * @cq: completion queue (ignored)
  136. * @wc: completed WR
  137. *
  138. */
  139. static void
  140. rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
  141. {
  142. struct ib_cqe *cqe = wc->wr_cqe;
  143. struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
  144. rr_cqe);
  145. /* WARNING: Only wr_id and status are reliable at this point */
  146. if (wc->status != IB_WC_SUCCESS)
  147. goto out_fail;
  148. /* status == SUCCESS means all fields in wc are trustworthy */
  149. if (wc->opcode != IB_WC_RECV)
  150. return;
  151. dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
  152. __func__, rep, wc->byte_len);
  153. rep->rr_len = wc->byte_len;
  154. rep->rr_wc_flags = wc->wc_flags;
  155. rep->rr_inv_rkey = wc->ex.invalidate_rkey;
  156. ib_dma_sync_single_for_cpu(rep->rr_device,
  157. rdmab_addr(rep->rr_rdmabuf),
  158. rep->rr_len, DMA_FROM_DEVICE);
  159. rpcrdma_update_granted_credits(rep);
  160. out_schedule:
  161. queue_work(rpcrdma_receive_wq, &rep->rr_work);
  162. return;
  163. out_fail:
  164. if (wc->status != IB_WC_WR_FLUSH_ERR)
  165. pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
  166. ib_wc_status_msg(wc->status),
  167. wc->status, wc->vendor_err);
  168. rep->rr_len = RPCRDMA_BAD_LEN;
  169. goto out_schedule;
  170. }
  171. static void
  172. rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
  173. struct rdma_conn_param *param)
  174. {
  175. struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
  176. const struct rpcrdma_connect_private *pmsg = param->private_data;
  177. unsigned int rsize, wsize;
  178. /* Default settings for RPC-over-RDMA Version One */
  179. r_xprt->rx_ia.ri_reminv_expected = false;
  180. rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
  181. wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
  182. if (pmsg &&
  183. pmsg->cp_magic == rpcrdma_cmp_magic &&
  184. pmsg->cp_version == RPCRDMA_CMP_VERSION) {
  185. r_xprt->rx_ia.ri_reminv_expected = true;
  186. rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
  187. wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
  188. }
  189. if (rsize < cdata->inline_rsize)
  190. cdata->inline_rsize = rsize;
  191. if (wsize < cdata->inline_wsize)
  192. cdata->inline_wsize = wsize;
  193. dprintk("RPC: %s: max send %u, max recv %u\n",
  194. __func__, cdata->inline_wsize, cdata->inline_rsize);
  195. rpcrdma_set_max_header_sizes(r_xprt);
  196. }
  197. static int
  198. rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
  199. {
  200. struct rpcrdma_xprt *xprt = id->context;
  201. struct rpcrdma_ia *ia = &xprt->rx_ia;
  202. struct rpcrdma_ep *ep = &xprt->rx_ep;
  203. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  204. struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
  205. #endif
  206. struct ib_qp_attr *attr = &ia->ri_qp_attr;
  207. struct ib_qp_init_attr *iattr = &ia->ri_qp_init_attr;
  208. int connstate = 0;
  209. switch (event->event) {
  210. case RDMA_CM_EVENT_ADDR_RESOLVED:
  211. case RDMA_CM_EVENT_ROUTE_RESOLVED:
  212. ia->ri_async_rc = 0;
  213. complete(&ia->ri_done);
  214. break;
  215. case RDMA_CM_EVENT_ADDR_ERROR:
  216. ia->ri_async_rc = -EHOSTUNREACH;
  217. dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
  218. __func__, ep);
  219. complete(&ia->ri_done);
  220. break;
  221. case RDMA_CM_EVENT_ROUTE_ERROR:
  222. ia->ri_async_rc = -ENETUNREACH;
  223. dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
  224. __func__, ep);
  225. complete(&ia->ri_done);
  226. break;
  227. case RDMA_CM_EVENT_ESTABLISHED:
  228. connstate = 1;
  229. ib_query_qp(ia->ri_id->qp, attr,
  230. IB_QP_MAX_QP_RD_ATOMIC | IB_QP_MAX_DEST_RD_ATOMIC,
  231. iattr);
  232. dprintk("RPC: %s: %d responder resources"
  233. " (%d initiator)\n",
  234. __func__, attr->max_dest_rd_atomic,
  235. attr->max_rd_atomic);
  236. rpcrdma_update_connect_private(xprt, &event->param.conn);
  237. goto connected;
  238. case RDMA_CM_EVENT_CONNECT_ERROR:
  239. connstate = -ENOTCONN;
  240. goto connected;
  241. case RDMA_CM_EVENT_UNREACHABLE:
  242. connstate = -ENETDOWN;
  243. goto connected;
  244. case RDMA_CM_EVENT_REJECTED:
  245. connstate = -ECONNREFUSED;
  246. goto connected;
  247. case RDMA_CM_EVENT_DISCONNECTED:
  248. connstate = -ECONNABORTED;
  249. goto connected;
  250. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  251. connstate = -ENODEV;
  252. connected:
  253. dprintk("RPC: %s: %sconnected\n",
  254. __func__, connstate > 0 ? "" : "dis");
  255. atomic_set(&xprt->rx_buf.rb_credits, 1);
  256. ep->rep_connected = connstate;
  257. rpcrdma_conn_func(ep);
  258. wake_up_all(&ep->rep_connect_wait);
  259. /*FALLTHROUGH*/
  260. default:
  261. dprintk("RPC: %s: %pIS:%u (ep 0x%p): %s\n",
  262. __func__, sap, rpc_get_port(sap), ep,
  263. rdma_event_msg(event->event));
  264. break;
  265. }
  266. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  267. if (connstate == 1) {
  268. int ird = attr->max_dest_rd_atomic;
  269. int tird = ep->rep_remote_cma.responder_resources;
  270. pr_info("rpcrdma: connection to %pIS:%u on %s, memreg '%s', %d credits, %d responders%s\n",
  271. sap, rpc_get_port(sap),
  272. ia->ri_device->name,
  273. ia->ri_ops->ro_displayname,
  274. xprt->rx_buf.rb_max_requests,
  275. ird, ird < 4 && ird < tird / 2 ? " (low!)" : "");
  276. } else if (connstate < 0) {
  277. pr_info("rpcrdma: connection to %pIS:%u closed (%d)\n",
  278. sap, rpc_get_port(sap), connstate);
  279. }
  280. #endif
  281. return 0;
  282. }
  283. static void rpcrdma_destroy_id(struct rdma_cm_id *id)
  284. {
  285. if (id) {
  286. module_put(id->device->owner);
  287. rdma_destroy_id(id);
  288. }
  289. }
  290. static struct rdma_cm_id *
  291. rpcrdma_create_id(struct rpcrdma_xprt *xprt,
  292. struct rpcrdma_ia *ia, struct sockaddr *addr)
  293. {
  294. unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
  295. struct rdma_cm_id *id;
  296. int rc;
  297. init_completion(&ia->ri_done);
  298. id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
  299. IB_QPT_RC);
  300. if (IS_ERR(id)) {
  301. rc = PTR_ERR(id);
  302. dprintk("RPC: %s: rdma_create_id() failed %i\n",
  303. __func__, rc);
  304. return id;
  305. }
  306. ia->ri_async_rc = -ETIMEDOUT;
  307. rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
  308. if (rc) {
  309. dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
  310. __func__, rc);
  311. goto out;
  312. }
  313. rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
  314. if (rc < 0) {
  315. dprintk("RPC: %s: wait() exited: %i\n",
  316. __func__, rc);
  317. goto out;
  318. }
  319. /* FIXME:
  320. * Until xprtrdma supports DEVICE_REMOVAL, the provider must
  321. * be pinned while there are active NFS/RDMA mounts to prevent
  322. * hangs and crashes at umount time.
  323. */
  324. if (!ia->ri_async_rc && !try_module_get(id->device->owner)) {
  325. dprintk("RPC: %s: Failed to get device module\n",
  326. __func__);
  327. ia->ri_async_rc = -ENODEV;
  328. }
  329. rc = ia->ri_async_rc;
  330. if (rc)
  331. goto out;
  332. ia->ri_async_rc = -ETIMEDOUT;
  333. rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
  334. if (rc) {
  335. dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
  336. __func__, rc);
  337. goto put;
  338. }
  339. rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
  340. if (rc < 0) {
  341. dprintk("RPC: %s: wait() exited: %i\n",
  342. __func__, rc);
  343. goto put;
  344. }
  345. rc = ia->ri_async_rc;
  346. if (rc)
  347. goto put;
  348. return id;
  349. put:
  350. module_put(id->device->owner);
  351. out:
  352. rdma_destroy_id(id);
  353. return ERR_PTR(rc);
  354. }
  355. /*
  356. * Exported functions.
  357. */
  358. /*
  359. * Open and initialize an Interface Adapter.
  360. * o initializes fields of struct rpcrdma_ia, including
  361. * interface and provider attributes and protection zone.
  362. */
  363. int
  364. rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
  365. {
  366. struct rpcrdma_ia *ia = &xprt->rx_ia;
  367. int rc;
  368. ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
  369. if (IS_ERR(ia->ri_id)) {
  370. rc = PTR_ERR(ia->ri_id);
  371. goto out1;
  372. }
  373. ia->ri_device = ia->ri_id->device;
  374. ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
  375. if (IS_ERR(ia->ri_pd)) {
  376. rc = PTR_ERR(ia->ri_pd);
  377. pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
  378. goto out2;
  379. }
  380. switch (memreg) {
  381. case RPCRDMA_FRMR:
  382. if (frwr_is_supported(ia)) {
  383. ia->ri_ops = &rpcrdma_frwr_memreg_ops;
  384. break;
  385. }
  386. /*FALLTHROUGH*/
  387. case RPCRDMA_MTHCAFMR:
  388. if (fmr_is_supported(ia)) {
  389. ia->ri_ops = &rpcrdma_fmr_memreg_ops;
  390. break;
  391. }
  392. /*FALLTHROUGH*/
  393. default:
  394. pr_err("rpcrdma: Unsupported memory registration mode: %d\n",
  395. memreg);
  396. rc = -EINVAL;
  397. goto out3;
  398. }
  399. return 0;
  400. out3:
  401. ib_dealloc_pd(ia->ri_pd);
  402. ia->ri_pd = NULL;
  403. out2:
  404. rpcrdma_destroy_id(ia->ri_id);
  405. ia->ri_id = NULL;
  406. out1:
  407. return rc;
  408. }
  409. /*
  410. * Clean up/close an IA.
  411. * o if event handles and PD have been initialized, free them.
  412. * o close the IA
  413. */
  414. void
  415. rpcrdma_ia_close(struct rpcrdma_ia *ia)
  416. {
  417. dprintk("RPC: %s: entering\n", __func__);
  418. if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
  419. if (ia->ri_id->qp)
  420. rdma_destroy_qp(ia->ri_id);
  421. rpcrdma_destroy_id(ia->ri_id);
  422. ia->ri_id = NULL;
  423. }
  424. /* If the pd is still busy, xprtrdma missed freeing a resource */
  425. if (ia->ri_pd && !IS_ERR(ia->ri_pd))
  426. ib_dealloc_pd(ia->ri_pd);
  427. }
  428. /*
  429. * Create unconnected endpoint.
  430. */
  431. int
  432. rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
  433. struct rpcrdma_create_data_internal *cdata)
  434. {
  435. struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
  436. struct ib_cq *sendcq, *recvcq;
  437. unsigned int max_qp_wr;
  438. int rc;
  439. if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
  440. dprintk("RPC: %s: insufficient sge's available\n",
  441. __func__);
  442. return -ENOMEM;
  443. }
  444. if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
  445. dprintk("RPC: %s: insufficient wqe's available\n",
  446. __func__);
  447. return -ENOMEM;
  448. }
  449. max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;
  450. /* check provider's send/recv wr limits */
  451. if (cdata->max_requests > max_qp_wr)
  452. cdata->max_requests = max_qp_wr;
  453. ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
  454. ep->rep_attr.qp_context = ep;
  455. ep->rep_attr.srq = NULL;
  456. ep->rep_attr.cap.max_send_wr = cdata->max_requests;
  457. ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
  458. ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */
  459. rc = ia->ri_ops->ro_open(ia, ep, cdata);
  460. if (rc)
  461. return rc;
  462. ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
  463. ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
  464. ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
  465. ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
  466. ep->rep_attr.cap.max_recv_sge = 1;
  467. ep->rep_attr.cap.max_inline_data = 0;
  468. ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  469. ep->rep_attr.qp_type = IB_QPT_RC;
  470. ep->rep_attr.port_num = ~0;
  471. dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
  472. "iovs: send %d recv %d\n",
  473. __func__,
  474. ep->rep_attr.cap.max_send_wr,
  475. ep->rep_attr.cap.max_recv_wr,
  476. ep->rep_attr.cap.max_send_sge,
  477. ep->rep_attr.cap.max_recv_sge);
  478. /* set trigger for requesting send completion */
  479. ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
  480. if (ep->rep_cqinit <= 2)
  481. ep->rep_cqinit = 0; /* always signal? */
  482. rpcrdma_init_cqcount(ep, 0);
  483. init_waitqueue_head(&ep->rep_connect_wait);
  484. INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
  485. sendcq = ib_alloc_cq(ia->ri_device, NULL,
  486. ep->rep_attr.cap.max_send_wr + 1,
  487. 0, IB_POLL_SOFTIRQ);
  488. if (IS_ERR(sendcq)) {
  489. rc = PTR_ERR(sendcq);
  490. dprintk("RPC: %s: failed to create send CQ: %i\n",
  491. __func__, rc);
  492. goto out1;
  493. }
  494. recvcq = ib_alloc_cq(ia->ri_device, NULL,
  495. ep->rep_attr.cap.max_recv_wr + 1,
  496. 0, IB_POLL_SOFTIRQ);
  497. if (IS_ERR(recvcq)) {
  498. rc = PTR_ERR(recvcq);
  499. dprintk("RPC: %s: failed to create recv CQ: %i\n",
  500. __func__, rc);
  501. goto out2;
  502. }
  503. ep->rep_attr.send_cq = sendcq;
  504. ep->rep_attr.recv_cq = recvcq;
  505. /* Initialize cma parameters */
  506. memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma));
  507. /* Prepare RDMA-CM private message */
  508. pmsg->cp_magic = rpcrdma_cmp_magic;
  509. pmsg->cp_version = RPCRDMA_CMP_VERSION;
  510. pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok;
  511. pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
  512. pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
  513. ep->rep_remote_cma.private_data = pmsg;
  514. ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
  515. /* Client offers RDMA Read but does not initiate */
  516. ep->rep_remote_cma.initiator_depth = 0;
  517. if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
  518. ep->rep_remote_cma.responder_resources = 32;
  519. else
  520. ep->rep_remote_cma.responder_resources =
  521. ia->ri_device->attrs.max_qp_rd_atom;
  522. /* Limit transport retries so client can detect server
  523. * GID changes quickly. RPC layer handles re-establishing
  524. * transport connection and retransmission.
  525. */
  526. ep->rep_remote_cma.retry_count = 6;
  527. /* RPC-over-RDMA handles its own flow control. In addition,
  528. * make all RNR NAKs visible so we know that RPC-over-RDMA
  529. * flow control is working correctly (no NAKs should be seen).
  530. */
  531. ep->rep_remote_cma.flow_control = 0;
  532. ep->rep_remote_cma.rnr_retry_count = 0;
  533. return 0;
  534. out2:
  535. ib_free_cq(sendcq);
  536. out1:
  537. return rc;
  538. }
  539. /*
  540. * rpcrdma_ep_destroy
  541. *
  542. * Disconnect and destroy endpoint. After this, the only
  543. * valid operations on the ep are to free it (if dynamically
  544. * allocated) or re-create it.
  545. */
  546. void
  547. rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  548. {
  549. dprintk("RPC: %s: entering, connected is %d\n",
  550. __func__, ep->rep_connected);
  551. cancel_delayed_work_sync(&ep->rep_connect_worker);
  552. if (ia->ri_id->qp) {
  553. rpcrdma_ep_disconnect(ep, ia);
  554. rdma_destroy_qp(ia->ri_id);
  555. ia->ri_id->qp = NULL;
  556. }
  557. ib_free_cq(ep->rep_attr.recv_cq);
  558. ib_free_cq(ep->rep_attr.send_cq);
  559. }
  560. /*
  561. * Connect unconnected endpoint.
  562. */
  563. int
  564. rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  565. {
  566. struct rdma_cm_id *id, *old;
  567. int rc = 0;
  568. int retry_count = 0;
  569. if (ep->rep_connected != 0) {
  570. struct rpcrdma_xprt *xprt;
  571. retry:
  572. dprintk("RPC: %s: reconnecting...\n", __func__);
  573. rpcrdma_ep_disconnect(ep, ia);
  574. xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
  575. id = rpcrdma_create_id(xprt, ia,
  576. (struct sockaddr *)&xprt->rx_data.addr);
  577. if (IS_ERR(id)) {
  578. rc = -EHOSTUNREACH;
  579. goto out;
  580. }
  581. /* TEMP TEMP TEMP - fail if new device:
  582. * Deregister/remarshal *all* requests!
  583. * Close and recreate adapter, pd, etc!
  584. * Re-determine all attributes still sane!
  585. * More stuff I haven't thought of!
  586. * Rrrgh!
  587. */
  588. if (ia->ri_device != id->device) {
  589. printk("RPC: %s: can't reconnect on "
  590. "different device!\n", __func__);
  591. rpcrdma_destroy_id(id);
  592. rc = -ENETUNREACH;
  593. goto out;
  594. }
  595. /* END TEMP */
  596. rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
  597. if (rc) {
  598. dprintk("RPC: %s: rdma_create_qp failed %i\n",
  599. __func__, rc);
  600. rpcrdma_destroy_id(id);
  601. rc = -ENETUNREACH;
  602. goto out;
  603. }
  604. old = ia->ri_id;
  605. ia->ri_id = id;
  606. rdma_destroy_qp(old);
  607. rpcrdma_destroy_id(old);
  608. } else {
  609. dprintk("RPC: %s: connecting...\n", __func__);
  610. rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
  611. if (rc) {
  612. dprintk("RPC: %s: rdma_create_qp failed %i\n",
  613. __func__, rc);
  614. /* do not update ep->rep_connected */
  615. return -ENETUNREACH;
  616. }
  617. }
  618. ep->rep_connected = 0;
  619. rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
  620. if (rc) {
  621. dprintk("RPC: %s: rdma_connect() failed with %i\n",
  622. __func__, rc);
  623. goto out;
  624. }
  625. wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
  626. /*
  627. * Check state. A non-peer reject indicates no listener
  628. * (ECONNREFUSED), which may be a transient state. All
  629. * others indicate a transport condition which has already
  630. * undergone a best-effort.
  631. */
  632. if (ep->rep_connected == -ECONNREFUSED &&
  633. ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
  634. dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
  635. goto retry;
  636. }
  637. if (ep->rep_connected <= 0) {
  638. /* Sometimes, the only way to reliably connect to remote
  639. * CMs is to use same nonzero values for ORD and IRD. */
  640. if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
  641. (ep->rep_remote_cma.responder_resources == 0 ||
  642. ep->rep_remote_cma.initiator_depth !=
  643. ep->rep_remote_cma.responder_resources)) {
  644. if (ep->rep_remote_cma.responder_resources == 0)
  645. ep->rep_remote_cma.responder_resources = 1;
  646. ep->rep_remote_cma.initiator_depth =
  647. ep->rep_remote_cma.responder_resources;
  648. goto retry;
  649. }
  650. rc = ep->rep_connected;
  651. } else {
  652. struct rpcrdma_xprt *r_xprt;
  653. unsigned int extras;
  654. dprintk("RPC: %s: connected\n", __func__);
  655. r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
  656. extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
  657. if (extras) {
  658. rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
  659. if (rc) {
  660. pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
  661. __func__, rc);
  662. rc = 0;
  663. }
  664. }
  665. }
  666. out:
  667. if (rc)
  668. ep->rep_connected = rc;
  669. return rc;
  670. }
  671. /*
  672. * rpcrdma_ep_disconnect
  673. *
  674. * This is separate from destroy to facilitate the ability
  675. * to reconnect without recreating the endpoint.
  676. *
  677. * This call is not reentrant, and must not be made in parallel
  678. * on the same endpoint.
  679. */
  680. void
  681. rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  682. {
  683. int rc;
  684. rc = rdma_disconnect(ia->ri_id);
  685. if (!rc) {
  686. /* returns without wait if not connected */
  687. wait_event_interruptible(ep->rep_connect_wait,
  688. ep->rep_connected != 1);
  689. dprintk("RPC: %s: after wait, %sconnected\n", __func__,
  690. (ep->rep_connected == 1) ? "still " : "dis");
  691. } else {
  692. dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
  693. ep->rep_connected = rc;
  694. }
  695. ib_drain_qp(ia->ri_id->qp);
  696. }
  697. static void
  698. rpcrdma_mr_recovery_worker(struct work_struct *work)
  699. {
  700. struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
  701. rb_recovery_worker.work);
  702. struct rpcrdma_mw *mw;
  703. spin_lock(&buf->rb_recovery_lock);
  704. while (!list_empty(&buf->rb_stale_mrs)) {
  705. mw = list_first_entry(&buf->rb_stale_mrs,
  706. struct rpcrdma_mw, mw_list);
  707. list_del_init(&mw->mw_list);
  708. spin_unlock(&buf->rb_recovery_lock);
  709. dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
  710. mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw);
  711. spin_lock(&buf->rb_recovery_lock);
  712. }
  713. spin_unlock(&buf->rb_recovery_lock);
  714. }
  715. void
  716. rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
  717. {
  718. struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
  719. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  720. spin_lock(&buf->rb_recovery_lock);
  721. list_add(&mw->mw_list, &buf->rb_stale_mrs);
  722. spin_unlock(&buf->rb_recovery_lock);
  723. schedule_delayed_work(&buf->rb_recovery_worker, 0);
  724. }
  725. static void
  726. rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
  727. {
  728. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  729. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  730. unsigned int count;
  731. LIST_HEAD(free);
  732. LIST_HEAD(all);
  733. for (count = 0; count < 32; count++) {
  734. struct rpcrdma_mw *mw;
  735. int rc;
  736. mw = kzalloc(sizeof(*mw), GFP_KERNEL);
  737. if (!mw)
  738. break;
  739. rc = ia->ri_ops->ro_init_mr(ia, mw);
  740. if (rc) {
  741. kfree(mw);
  742. break;
  743. }
  744. mw->mw_xprt = r_xprt;
  745. list_add(&mw->mw_list, &free);
  746. list_add(&mw->mw_all, &all);
  747. }
  748. spin_lock(&buf->rb_mwlock);
  749. list_splice(&free, &buf->rb_mws);
  750. list_splice(&all, &buf->rb_all);
  751. r_xprt->rx_stats.mrs_allocated += count;
  752. spin_unlock(&buf->rb_mwlock);
  753. dprintk("RPC: %s: created %u MRs\n", __func__, count);
  754. }
  755. static void
  756. rpcrdma_mr_refresh_worker(struct work_struct *work)
  757. {
  758. struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
  759. rb_refresh_worker.work);
  760. struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
  761. rx_buf);
  762. rpcrdma_create_mrs(r_xprt);
  763. }
  764. struct rpcrdma_req *
  765. rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
  766. {
  767. struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
  768. struct rpcrdma_req *req;
  769. req = kzalloc(sizeof(*req), GFP_KERNEL);
  770. if (req == NULL)
  771. return ERR_PTR(-ENOMEM);
  772. INIT_LIST_HEAD(&req->rl_free);
  773. spin_lock(&buffer->rb_reqslock);
  774. list_add(&req->rl_all, &buffer->rb_allreqs);
  775. spin_unlock(&buffer->rb_reqslock);
  776. req->rl_cqe.done = rpcrdma_wc_send;
  777. req->rl_buffer = &r_xprt->rx_buf;
  778. INIT_LIST_HEAD(&req->rl_registered);
  779. req->rl_send_wr.next = NULL;
  780. req->rl_send_wr.wr_cqe = &req->rl_cqe;
  781. req->rl_send_wr.sg_list = req->rl_send_sge;
  782. req->rl_send_wr.opcode = IB_WR_SEND;
  783. return req;
  784. }
  785. struct rpcrdma_rep *
  786. rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
  787. {
  788. struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
  789. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  790. struct rpcrdma_rep *rep;
  791. int rc;
  792. rc = -ENOMEM;
  793. rep = kzalloc(sizeof(*rep), GFP_KERNEL);
  794. if (rep == NULL)
  795. goto out;
  796. rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize,
  797. DMA_FROM_DEVICE, GFP_KERNEL);
  798. if (IS_ERR(rep->rr_rdmabuf)) {
  799. rc = PTR_ERR(rep->rr_rdmabuf);
  800. goto out_free;
  801. }
  802. rep->rr_device = ia->ri_device;
  803. rep->rr_cqe.done = rpcrdma_wc_receive;
  804. rep->rr_rxprt = r_xprt;
  805. INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
  806. rep->rr_recv_wr.next = NULL;
  807. rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
  808. rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
  809. rep->rr_recv_wr.num_sge = 1;
  810. return rep;
  811. out_free:
  812. kfree(rep);
  813. out:
  814. return ERR_PTR(rc);
  815. }
  816. int
  817. rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
  818. {
  819. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  820. int i, rc;
  821. buf->rb_max_requests = r_xprt->rx_data.max_requests;
  822. buf->rb_bc_srv_max_requests = 0;
  823. atomic_set(&buf->rb_credits, 1);
  824. spin_lock_init(&buf->rb_mwlock);
  825. spin_lock_init(&buf->rb_lock);
  826. spin_lock_init(&buf->rb_recovery_lock);
  827. INIT_LIST_HEAD(&buf->rb_mws);
  828. INIT_LIST_HEAD(&buf->rb_all);
  829. INIT_LIST_HEAD(&buf->rb_stale_mrs);
  830. INIT_DELAYED_WORK(&buf->rb_refresh_worker,
  831. rpcrdma_mr_refresh_worker);
  832. INIT_DELAYED_WORK(&buf->rb_recovery_worker,
  833. rpcrdma_mr_recovery_worker);
  834. rpcrdma_create_mrs(r_xprt);
  835. INIT_LIST_HEAD(&buf->rb_send_bufs);
  836. INIT_LIST_HEAD(&buf->rb_allreqs);
  837. spin_lock_init(&buf->rb_reqslock);
  838. for (i = 0; i < buf->rb_max_requests; i++) {
  839. struct rpcrdma_req *req;
  840. req = rpcrdma_create_req(r_xprt);
  841. if (IS_ERR(req)) {
  842. dprintk("RPC: %s: request buffer %d alloc"
  843. " failed\n", __func__, i);
  844. rc = PTR_ERR(req);
  845. goto out;
  846. }
  847. req->rl_backchannel = false;
  848. list_add(&req->rl_free, &buf->rb_send_bufs);
  849. }
  850. INIT_LIST_HEAD(&buf->rb_recv_bufs);
  851. for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
  852. struct rpcrdma_rep *rep;
  853. rep = rpcrdma_create_rep(r_xprt);
  854. if (IS_ERR(rep)) {
  855. dprintk("RPC: %s: reply buffer %d alloc failed\n",
  856. __func__, i);
  857. rc = PTR_ERR(rep);
  858. goto out;
  859. }
  860. list_add(&rep->rr_list, &buf->rb_recv_bufs);
  861. }
  862. return 0;
  863. out:
  864. rpcrdma_buffer_destroy(buf);
  865. return rc;
  866. }
  867. static struct rpcrdma_req *
  868. rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
  869. {
  870. struct rpcrdma_req *req;
  871. req = list_first_entry(&buf->rb_send_bufs,
  872. struct rpcrdma_req, rl_free);
  873. list_del(&req->rl_free);
  874. return req;
  875. }
  876. static struct rpcrdma_rep *
  877. rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
  878. {
  879. struct rpcrdma_rep *rep;
  880. rep = list_first_entry(&buf->rb_recv_bufs,
  881. struct rpcrdma_rep, rr_list);
  882. list_del(&rep->rr_list);
  883. return rep;
  884. }
  885. static void
  886. rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
  887. {
  888. rpcrdma_free_regbuf(rep->rr_rdmabuf);
  889. kfree(rep);
  890. }
  891. void
  892. rpcrdma_destroy_req(struct rpcrdma_req *req)
  893. {
  894. rpcrdma_free_regbuf(req->rl_recvbuf);
  895. rpcrdma_free_regbuf(req->rl_sendbuf);
  896. rpcrdma_free_regbuf(req->rl_rdmabuf);
  897. kfree(req);
  898. }
  899. static void
  900. rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf)
  901. {
  902. struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
  903. rx_buf);
  904. struct rpcrdma_ia *ia = rdmab_to_ia(buf);
  905. struct rpcrdma_mw *mw;
  906. unsigned int count;
  907. count = 0;
  908. spin_lock(&buf->rb_mwlock);
  909. while (!list_empty(&buf->rb_all)) {
  910. mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
  911. list_del(&mw->mw_all);
  912. spin_unlock(&buf->rb_mwlock);
  913. ia->ri_ops->ro_release_mr(mw);
  914. count++;
  915. spin_lock(&buf->rb_mwlock);
  916. }
  917. spin_unlock(&buf->rb_mwlock);
  918. r_xprt->rx_stats.mrs_allocated = 0;
  919. dprintk("RPC: %s: released %u MRs\n", __func__, count);
  920. }
  921. void
  922. rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
  923. {
  924. cancel_delayed_work_sync(&buf->rb_recovery_worker);
  925. while (!list_empty(&buf->rb_recv_bufs)) {
  926. struct rpcrdma_rep *rep;
  927. rep = rpcrdma_buffer_get_rep_locked(buf);
  928. rpcrdma_destroy_rep(rep);
  929. }
  930. buf->rb_send_count = 0;
  931. spin_lock(&buf->rb_reqslock);
  932. while (!list_empty(&buf->rb_allreqs)) {
  933. struct rpcrdma_req *req;
  934. req = list_first_entry(&buf->rb_allreqs,
  935. struct rpcrdma_req, rl_all);
  936. list_del(&req->rl_all);
  937. spin_unlock(&buf->rb_reqslock);
  938. rpcrdma_destroy_req(req);
  939. spin_lock(&buf->rb_reqslock);
  940. }
  941. spin_unlock(&buf->rb_reqslock);
  942. buf->rb_recv_count = 0;
  943. rpcrdma_destroy_mrs(buf);
  944. }
  945. struct rpcrdma_mw *
  946. rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
  947. {
  948. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  949. struct rpcrdma_mw *mw = NULL;
  950. spin_lock(&buf->rb_mwlock);
  951. if (!list_empty(&buf->rb_mws)) {
  952. mw = list_first_entry(&buf->rb_mws,
  953. struct rpcrdma_mw, mw_list);
  954. list_del_init(&mw->mw_list);
  955. }
  956. spin_unlock(&buf->rb_mwlock);
  957. if (!mw)
  958. goto out_nomws;
  959. return mw;
  960. out_nomws:
  961. dprintk("RPC: %s: no MWs available\n", __func__);
  962. schedule_delayed_work(&buf->rb_refresh_worker, 0);
  963. /* Allow the reply handler and refresh worker to run */
  964. cond_resched();
  965. return NULL;
  966. }
  967. void
  968. rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
  969. {
  970. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  971. spin_lock(&buf->rb_mwlock);
  972. list_add_tail(&mw->mw_list, &buf->rb_mws);
  973. spin_unlock(&buf->rb_mwlock);
  974. }
  975. static struct rpcrdma_rep *
  976. rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers)
  977. {
  978. /* If an RPC previously completed without a reply (say, a
  979. * credential problem or a soft timeout occurs) then hold off
  980. * on supplying more Receive buffers until the number of new
  981. * pending RPCs catches up to the number of posted Receives.
  982. */
  983. if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
  984. return NULL;
  985. if (unlikely(list_empty(&buffers->rb_recv_bufs)))
  986. return NULL;
  987. buffers->rb_recv_count++;
  988. return rpcrdma_buffer_get_rep_locked(buffers);
  989. }
  990. /*
  991. * Get a set of request/reply buffers.
  992. *
  993. * Reply buffer (if available) is attached to send buffer upon return.
  994. */
  995. struct rpcrdma_req *
  996. rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
  997. {
  998. struct rpcrdma_req *req;
  999. spin_lock(&buffers->rb_lock);
  1000. if (list_empty(&buffers->rb_send_bufs))
  1001. goto out_reqbuf;
  1002. buffers->rb_send_count++;
  1003. req = rpcrdma_buffer_get_req_locked(buffers);
  1004. req->rl_reply = rpcrdma_buffer_get_rep(buffers);
  1005. spin_unlock(&buffers->rb_lock);
  1006. return req;
  1007. out_reqbuf:
  1008. spin_unlock(&buffers->rb_lock);
  1009. pr_warn("RPC: %s: out of request buffers\n", __func__);
  1010. return NULL;
  1011. }
  1012. /*
  1013. * Put request/reply buffers back into pool.
  1014. * Pre-decrement counter/array index.
  1015. */
  1016. void
  1017. rpcrdma_buffer_put(struct rpcrdma_req *req)
  1018. {
  1019. struct rpcrdma_buffer *buffers = req->rl_buffer;
  1020. struct rpcrdma_rep *rep = req->rl_reply;
  1021. req->rl_send_wr.num_sge = 0;
  1022. req->rl_reply = NULL;
  1023. spin_lock(&buffers->rb_lock);
  1024. buffers->rb_send_count--;
  1025. list_add_tail(&req->rl_free, &buffers->rb_send_bufs);
  1026. if (rep) {
  1027. buffers->rb_recv_count--;
  1028. list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
  1029. }
  1030. spin_unlock(&buffers->rb_lock);
  1031. }
  1032. /*
  1033. * Recover reply buffers from pool.
  1034. * This happens when recovering from disconnect.
  1035. */
  1036. void
  1037. rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
  1038. {
  1039. struct rpcrdma_buffer *buffers = req->rl_buffer;
  1040. spin_lock(&buffers->rb_lock);
  1041. req->rl_reply = rpcrdma_buffer_get_rep(buffers);
  1042. spin_unlock(&buffers->rb_lock);
  1043. }
  1044. /*
  1045. * Put reply buffers back into pool when not attached to
  1046. * request. This happens in error conditions.
  1047. */
  1048. void
  1049. rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
  1050. {
  1051. struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
  1052. spin_lock(&buffers->rb_lock);
  1053. buffers->rb_recv_count--;
  1054. list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
  1055. spin_unlock(&buffers->rb_lock);
  1056. }
  1057. /**
  1058. * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
  1059. * @size: size of buffer to be allocated, in bytes
  1060. * @direction: direction of data movement
  1061. * @flags: GFP flags
  1062. *
  1063. * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
  1064. * can be persistently DMA-mapped for I/O.
  1065. *
  1066. * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
  1067. * receiving the payload of RDMA RECV operations. During Long Calls
  1068. * or Replies they may be registered externally via ro_map.
  1069. */
  1070. struct rpcrdma_regbuf *
  1071. rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
  1072. gfp_t flags)
  1073. {
  1074. struct rpcrdma_regbuf *rb;
  1075. rb = kmalloc(sizeof(*rb) + size, flags);
  1076. if (rb == NULL)
  1077. return ERR_PTR(-ENOMEM);
  1078. rb->rg_device = NULL;
  1079. rb->rg_direction = direction;
  1080. rb->rg_iov.length = size;
  1081. return rb;
  1082. }
  1083. /**
  1084. * __rpcrdma_map_regbuf - DMA-map a regbuf
  1085. * @ia: controlling rpcrdma_ia
  1086. * @rb: regbuf to be mapped
  1087. */
  1088. bool
  1089. __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
  1090. {
  1091. if (rb->rg_direction == DMA_NONE)
  1092. return false;
  1093. rb->rg_iov.addr = ib_dma_map_single(ia->ri_device,
  1094. (void *)rb->rg_base,
  1095. rdmab_length(rb),
  1096. rb->rg_direction);
  1097. if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb)))
  1098. return false;
  1099. rb->rg_device = ia->ri_device;
  1100. rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
  1101. return true;
  1102. }
  1103. static void
  1104. rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
  1105. {
  1106. if (!rpcrdma_regbuf_is_mapped(rb))
  1107. return;
  1108. ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb),
  1109. rdmab_length(rb), rb->rg_direction);
  1110. rb->rg_device = NULL;
  1111. }
  1112. /**
  1113. * rpcrdma_free_regbuf - deregister and free registered buffer
  1114. * @rb: regbuf to be deregistered and freed
  1115. */
  1116. void
  1117. rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
  1118. {
  1119. if (!rb)
  1120. return;
  1121. rpcrdma_dma_unmap_regbuf(rb);
  1122. kfree(rb);
  1123. }
  1124. /*
  1125. * Prepost any receive buffer, then post send.
  1126. *
  1127. * Receive buffer is donated to hardware, reclaimed upon recv completion.
  1128. */
  1129. int
  1130. rpcrdma_ep_post(struct rpcrdma_ia *ia,
  1131. struct rpcrdma_ep *ep,
  1132. struct rpcrdma_req *req)
  1133. {
  1134. struct ib_send_wr *send_wr = &req->rl_send_wr;
  1135. struct ib_send_wr *send_wr_fail;
  1136. int rc;
  1137. if (req->rl_reply) {
  1138. rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
  1139. if (rc)
  1140. return rc;
  1141. req->rl_reply = NULL;
  1142. }
  1143. dprintk("RPC: %s: posting %d s/g entries\n",
  1144. __func__, send_wr->num_sge);
  1145. rpcrdma_set_signaled(ep, send_wr);
  1146. rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
  1147. if (rc)
  1148. goto out_postsend_err;
  1149. return 0;
  1150. out_postsend_err:
  1151. pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
  1152. return -ENOTCONN;
  1153. }
  1154. int
  1155. rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
  1156. struct rpcrdma_rep *rep)
  1157. {
  1158. struct ib_recv_wr *recv_wr_fail;
  1159. int rc;
  1160. if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
  1161. goto out_map;
  1162. rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
  1163. if (rc)
  1164. goto out_postrecv;
  1165. return 0;
  1166. out_map:
  1167. pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
  1168. return -EIO;
  1169. out_postrecv:
  1170. pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
  1171. return -ENOTCONN;
  1172. }
  1173. /**
  1174. * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
  1175. * @r_xprt: transport associated with these backchannel resources
  1176. * @min_reqs: minimum number of incoming requests expected
  1177. *
  1178. * Returns zero if all requested buffers were posted, or a negative errno.
  1179. */
  1180. int
  1181. rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
  1182. {
  1183. struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
  1184. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  1185. struct rpcrdma_rep *rep;
  1186. int rc;
  1187. while (count--) {
  1188. spin_lock(&buffers->rb_lock);
  1189. if (list_empty(&buffers->rb_recv_bufs))
  1190. goto out_reqbuf;
  1191. rep = rpcrdma_buffer_get_rep_locked(buffers);
  1192. spin_unlock(&buffers->rb_lock);
  1193. rc = rpcrdma_ep_post_recv(ia, rep);
  1194. if (rc)
  1195. goto out_rc;
  1196. }
  1197. return 0;
  1198. out_reqbuf:
  1199. spin_unlock(&buffers->rb_lock);
  1200. pr_warn("%s: no extra receive buffers\n", __func__);
  1201. return -ENOMEM;
  1202. out_rc:
  1203. rpcrdma_recv_buffer_put(rep);
  1204. return rc;
  1205. }