verbs.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455
  1. /*
  2. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the BSD-type
  8. * license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or without
  11. * modification, are permitted provided that the following conditions
  12. * are met:
  13. *
  14. * Redistributions of source code must retain the above copyright
  15. * notice, this list of conditions and the following disclaimer.
  16. *
  17. * Redistributions in binary form must reproduce the above
  18. * copyright notice, this list of conditions and the following
  19. * disclaimer in the documentation and/or other materials provided
  20. * with the distribution.
  21. *
  22. * Neither the name of the Network Appliance, Inc. nor the names of
  23. * its contributors may be used to endorse or promote products
  24. * derived from this software without specific prior written
  25. * permission.
  26. *
  27. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  28. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  29. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  30. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  31. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  32. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  33. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  34. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  35. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  36. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  37. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  38. */
  39. /*
  40. * verbs.c
  41. *
  42. * Encapsulates the major functions managing:
  43. * o adapters
  44. * o endpoints
  45. * o connections
  46. * o buffer memory
  47. */
  48. #include <linux/interrupt.h>
  49. #include <linux/slab.h>
  50. #include <linux/prefetch.h>
  51. #include <linux/sunrpc/addr.h>
  52. #include <linux/sunrpc/svc_rdma.h>
  53. #include <asm/bitops.h>
  54. #include <rdma/ib_cm.h>
  55. #include "xprt_rdma.h"
  56. /*
  57. * Globals/Macros
  58. */
  59. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  60. # define RPCDBG_FACILITY RPCDBG_TRANS
  61. #endif
  62. /*
  63. * internal functions
  64. */
  65. static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
  66. static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
  67. static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
  68. static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
  69. int
  70. rpcrdma_alloc_wq(void)
  71. {
  72. struct workqueue_struct *recv_wq;
  73. recv_wq = alloc_workqueue("xprtrdma_receive",
  74. WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI,
  75. 0);
  76. if (!recv_wq)
  77. return -ENOMEM;
  78. rpcrdma_receive_wq = recv_wq;
  79. return 0;
  80. }
  81. void
  82. rpcrdma_destroy_wq(void)
  83. {
  84. struct workqueue_struct *wq;
  85. if (rpcrdma_receive_wq) {
  86. wq = rpcrdma_receive_wq;
  87. rpcrdma_receive_wq = NULL;
  88. destroy_workqueue(wq);
  89. }
  90. }
  91. static void
  92. rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
  93. {
  94. struct rpcrdma_ep *ep = context;
  95. pr_err("rpcrdma: %s on device %s ep %p\n",
  96. ib_event_msg(event->event), event->device->name, context);
  97. if (ep->rep_connected == 1) {
  98. ep->rep_connected = -EIO;
  99. rpcrdma_conn_func(ep);
  100. wake_up_all(&ep->rep_connect_wait);
  101. }
  102. }
  103. /**
  104. * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC
  105. * @cq: completion queue (ignored)
  106. * @wc: completed WR
  107. *
  108. */
  109. static void
  110. rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
  111. {
  112. /* WARNING: Only wr_cqe and status are reliable at this point */
  113. if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR)
  114. pr_err("rpcrdma: Send: %s (%u/0x%x)\n",
  115. ib_wc_status_msg(wc->status),
  116. wc->status, wc->vendor_err);
  117. }
  118. /* Perform basic sanity checking to avoid using garbage
  119. * to update the credit grant value.
  120. */
  121. static void
  122. rpcrdma_update_granted_credits(struct rpcrdma_rep *rep)
  123. {
  124. struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf);
  125. struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf;
  126. u32 credits;
  127. if (rep->rr_len < RPCRDMA_HDRLEN_ERR)
  128. return;
  129. credits = be32_to_cpu(rmsgp->rm_credit);
  130. if (credits == 0)
  131. credits = 1; /* don't deadlock */
  132. else if (credits > buffer->rb_max_requests)
  133. credits = buffer->rb_max_requests;
  134. atomic_set(&buffer->rb_credits, credits);
  135. }
  136. /**
  137. * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
  138. * @cq: completion queue (ignored)
  139. * @wc: completed WR
  140. *
  141. */
  142. static void
  143. rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
  144. {
  145. struct ib_cqe *cqe = wc->wr_cqe;
  146. struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep,
  147. rr_cqe);
  148. /* WARNING: Only wr_id and status are reliable at this point */
  149. if (wc->status != IB_WC_SUCCESS)
  150. goto out_fail;
  151. /* status == SUCCESS means all fields in wc are trustworthy */
  152. if (wc->opcode != IB_WC_RECV)
  153. return;
  154. dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n",
  155. __func__, rep, wc->byte_len);
  156. rep->rr_len = wc->byte_len;
  157. rep->rr_wc_flags = wc->wc_flags;
  158. rep->rr_inv_rkey = wc->ex.invalidate_rkey;
  159. ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
  160. rdmab_addr(rep->rr_rdmabuf),
  161. rep->rr_len, DMA_FROM_DEVICE);
  162. rpcrdma_update_granted_credits(rep);
  163. out_schedule:
  164. queue_work(rpcrdma_receive_wq, &rep->rr_work);
  165. return;
  166. out_fail:
  167. if (wc->status != IB_WC_WR_FLUSH_ERR)
  168. pr_err("rpcrdma: Recv: %s (%u/0x%x)\n",
  169. ib_wc_status_msg(wc->status),
  170. wc->status, wc->vendor_err);
  171. rep->rr_len = RPCRDMA_BAD_LEN;
  172. goto out_schedule;
  173. }
  174. static void
  175. rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
  176. struct rdma_conn_param *param)
  177. {
  178. struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
  179. const struct rpcrdma_connect_private *pmsg = param->private_data;
  180. unsigned int rsize, wsize;
  181. /* Default settings for RPC-over-RDMA Version One */
  182. r_xprt->rx_ia.ri_reminv_expected = false;
  183. r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
  184. rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
  185. wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
  186. if (pmsg &&
  187. pmsg->cp_magic == rpcrdma_cmp_magic &&
  188. pmsg->cp_version == RPCRDMA_CMP_VERSION) {
  189. r_xprt->rx_ia.ri_reminv_expected = true;
  190. r_xprt->rx_ia.ri_implicit_roundup = true;
  191. rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
  192. wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
  193. }
  194. if (rsize < cdata->inline_rsize)
  195. cdata->inline_rsize = rsize;
  196. if (wsize < cdata->inline_wsize)
  197. cdata->inline_wsize = wsize;
  198. dprintk("RPC: %s: max send %u, max recv %u\n",
  199. __func__, cdata->inline_wsize, cdata->inline_rsize);
  200. rpcrdma_set_max_header_sizes(r_xprt);
  201. }
  202. static int
  203. rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
  204. {
  205. struct rpcrdma_xprt *xprt = id->context;
  206. struct rpcrdma_ia *ia = &xprt->rx_ia;
  207. struct rpcrdma_ep *ep = &xprt->rx_ep;
  208. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  209. struct sockaddr *sap = (struct sockaddr *)&ep->rep_remote_addr;
  210. #endif
  211. int connstate = 0;
  212. switch (event->event) {
  213. case RDMA_CM_EVENT_ADDR_RESOLVED:
  214. case RDMA_CM_EVENT_ROUTE_RESOLVED:
  215. ia->ri_async_rc = 0;
  216. complete(&ia->ri_done);
  217. break;
  218. case RDMA_CM_EVENT_ADDR_ERROR:
  219. ia->ri_async_rc = -EHOSTUNREACH;
  220. dprintk("RPC: %s: CM address resolution error, ep 0x%p\n",
  221. __func__, ep);
  222. complete(&ia->ri_done);
  223. break;
  224. case RDMA_CM_EVENT_ROUTE_ERROR:
  225. ia->ri_async_rc = -ENETUNREACH;
  226. dprintk("RPC: %s: CM route resolution error, ep 0x%p\n",
  227. __func__, ep);
  228. complete(&ia->ri_done);
  229. break;
  230. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  231. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  232. pr_info("rpcrdma: removing device %s for %pIS:%u\n",
  233. ia->ri_device->name,
  234. sap, rpc_get_port(sap));
  235. #endif
  236. set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
  237. ep->rep_connected = -ENODEV;
  238. xprt_force_disconnect(&xprt->rx_xprt);
  239. wait_for_completion(&ia->ri_remove_done);
  240. ia->ri_id = NULL;
  241. ia->ri_pd = NULL;
  242. ia->ri_device = NULL;
  243. /* Return 1 to ensure the core destroys the id. */
  244. return 1;
  245. case RDMA_CM_EVENT_ESTABLISHED:
  246. connstate = 1;
  247. rpcrdma_update_connect_private(xprt, &event->param.conn);
  248. goto connected;
  249. case RDMA_CM_EVENT_CONNECT_ERROR:
  250. connstate = -ENOTCONN;
  251. goto connected;
  252. case RDMA_CM_EVENT_UNREACHABLE:
  253. connstate = -ENETDOWN;
  254. goto connected;
  255. case RDMA_CM_EVENT_REJECTED:
  256. dprintk("rpcrdma: connection to %pIS:%u rejected: %s\n",
  257. sap, rpc_get_port(sap),
  258. rdma_reject_msg(id, event->status));
  259. connstate = -ECONNREFUSED;
  260. if (event->status == IB_CM_REJ_STALE_CONN)
  261. connstate = -EAGAIN;
  262. goto connected;
  263. case RDMA_CM_EVENT_DISCONNECTED:
  264. connstate = -ECONNABORTED;
  265. connected:
  266. atomic_set(&xprt->rx_buf.rb_credits, 1);
  267. ep->rep_connected = connstate;
  268. rpcrdma_conn_func(ep);
  269. wake_up_all(&ep->rep_connect_wait);
  270. /*FALLTHROUGH*/
  271. default:
  272. dprintk("RPC: %s: %pIS:%u on %s/%s (ep 0x%p): %s\n",
  273. __func__, sap, rpc_get_port(sap),
  274. ia->ri_device->name, ia->ri_ops->ro_displayname,
  275. ep, rdma_event_msg(event->event));
  276. break;
  277. }
  278. return 0;
  279. }
  280. static struct rdma_cm_id *
  281. rpcrdma_create_id(struct rpcrdma_xprt *xprt,
  282. struct rpcrdma_ia *ia, struct sockaddr *addr)
  283. {
  284. unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1;
  285. struct rdma_cm_id *id;
  286. int rc;
  287. init_completion(&ia->ri_done);
  288. init_completion(&ia->ri_remove_done);
  289. id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
  290. IB_QPT_RC);
  291. if (IS_ERR(id)) {
  292. rc = PTR_ERR(id);
  293. dprintk("RPC: %s: rdma_create_id() failed %i\n",
  294. __func__, rc);
  295. return id;
  296. }
  297. ia->ri_async_rc = -ETIMEDOUT;
  298. rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT);
  299. if (rc) {
  300. dprintk("RPC: %s: rdma_resolve_addr() failed %i\n",
  301. __func__, rc);
  302. goto out;
  303. }
  304. rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
  305. if (rc < 0) {
  306. dprintk("RPC: %s: wait() exited: %i\n",
  307. __func__, rc);
  308. goto out;
  309. }
  310. rc = ia->ri_async_rc;
  311. if (rc)
  312. goto out;
  313. ia->ri_async_rc = -ETIMEDOUT;
  314. rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
  315. if (rc) {
  316. dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
  317. __func__, rc);
  318. goto out;
  319. }
  320. rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
  321. if (rc < 0) {
  322. dprintk("RPC: %s: wait() exited: %i\n",
  323. __func__, rc);
  324. goto out;
  325. }
  326. rc = ia->ri_async_rc;
  327. if (rc)
  328. goto out;
  329. return id;
  330. out:
  331. rdma_destroy_id(id);
  332. return ERR_PTR(rc);
  333. }
  334. /*
  335. * Exported functions.
  336. */
  337. /**
  338. * rpcrdma_ia_open - Open and initialize an Interface Adapter.
  339. * @xprt: controlling transport
  340. * @addr: IP address of remote peer
  341. *
  342. * Returns 0 on success, negative errno if an appropriate
  343. * Interface Adapter could not be found and opened.
  344. */
  345. int
  346. rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
  347. {
  348. struct rpcrdma_ia *ia = &xprt->rx_ia;
  349. int rc;
  350. ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
  351. if (IS_ERR(ia->ri_id)) {
  352. rc = PTR_ERR(ia->ri_id);
  353. goto out_err;
  354. }
  355. ia->ri_device = ia->ri_id->device;
  356. ia->ri_pd = ib_alloc_pd(ia->ri_device, 0);
  357. if (IS_ERR(ia->ri_pd)) {
  358. rc = PTR_ERR(ia->ri_pd);
  359. pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
  360. goto out_err;
  361. }
  362. switch (xprt_rdma_memreg_strategy) {
  363. case RPCRDMA_FRMR:
  364. if (frwr_is_supported(ia)) {
  365. ia->ri_ops = &rpcrdma_frwr_memreg_ops;
  366. break;
  367. }
  368. /*FALLTHROUGH*/
  369. case RPCRDMA_MTHCAFMR:
  370. if (fmr_is_supported(ia)) {
  371. ia->ri_ops = &rpcrdma_fmr_memreg_ops;
  372. break;
  373. }
  374. /*FALLTHROUGH*/
  375. default:
  376. pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
  377. ia->ri_device->name, xprt_rdma_memreg_strategy);
  378. rc = -EINVAL;
  379. goto out_err;
  380. }
  381. return 0;
  382. out_err:
  383. rpcrdma_ia_close(ia);
  384. return rc;
  385. }
  386. /**
  387. * rpcrdma_ia_remove - Handle device driver unload
  388. * @ia: interface adapter being removed
  389. *
  390. * Divest transport H/W resources associated with this adapter,
  391. * but allow it to be restored later.
  392. */
  393. void
  394. rpcrdma_ia_remove(struct rpcrdma_ia *ia)
  395. {
  396. struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
  397. rx_ia);
  398. struct rpcrdma_ep *ep = &r_xprt->rx_ep;
  399. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  400. struct rpcrdma_req *req;
  401. struct rpcrdma_rep *rep;
  402. cancel_delayed_work_sync(&buf->rb_refresh_worker);
  403. /* This is similar to rpcrdma_ep_destroy, but:
  404. * - Don't cancel the connect worker.
  405. * - Don't call rpcrdma_ep_disconnect, which waits
  406. * for another conn upcall, which will deadlock.
  407. * - rdma_disconnect is unneeded, the underlying
  408. * connection is already gone.
  409. */
  410. if (ia->ri_id->qp) {
  411. ib_drain_qp(ia->ri_id->qp);
  412. rdma_destroy_qp(ia->ri_id);
  413. ia->ri_id->qp = NULL;
  414. }
  415. ib_free_cq(ep->rep_attr.recv_cq);
  416. ib_free_cq(ep->rep_attr.send_cq);
  417. /* The ULP is responsible for ensuring all DMA
  418. * mappings and MRs are gone.
  419. */
  420. list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
  421. rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
  422. list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
  423. rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
  424. rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
  425. rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
  426. }
  427. rpcrdma_destroy_mrs(buf);
  428. /* Allow waiters to continue */
  429. complete(&ia->ri_remove_done);
  430. }
  431. /**
  432. * rpcrdma_ia_close - Clean up/close an IA.
  433. * @ia: interface adapter to close
  434. *
  435. */
  436. void
  437. rpcrdma_ia_close(struct rpcrdma_ia *ia)
  438. {
  439. dprintk("RPC: %s: entering\n", __func__);
  440. if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
  441. if (ia->ri_id->qp)
  442. rdma_destroy_qp(ia->ri_id);
  443. rdma_destroy_id(ia->ri_id);
  444. }
  445. ia->ri_id = NULL;
  446. ia->ri_device = NULL;
  447. /* If the pd is still busy, xprtrdma missed freeing a resource */
  448. if (ia->ri_pd && !IS_ERR(ia->ri_pd))
  449. ib_dealloc_pd(ia->ri_pd);
  450. ia->ri_pd = NULL;
  451. }
  452. /*
  453. * Create unconnected endpoint.
  454. */
  455. int
  456. rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
  457. struct rpcrdma_create_data_internal *cdata)
  458. {
  459. struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
  460. unsigned int max_qp_wr, max_sge;
  461. struct ib_cq *sendcq, *recvcq;
  462. int rc;
  463. max_sge = min_t(unsigned int, ia->ri_device->attrs.max_sge,
  464. RPCRDMA_MAX_SEND_SGES);
  465. if (max_sge < RPCRDMA_MIN_SEND_SGES) {
  466. pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
  467. return -ENOMEM;
  468. }
  469. ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
  470. if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
  471. dprintk("RPC: %s: insufficient wqe's available\n",
  472. __func__);
  473. return -ENOMEM;
  474. }
  475. max_qp_wr = ia->ri_device->attrs.max_qp_wr - RPCRDMA_BACKWARD_WRS - 1;
  476. /* check provider's send/recv wr limits */
  477. if (cdata->max_requests > max_qp_wr)
  478. cdata->max_requests = max_qp_wr;
  479. ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
  480. ep->rep_attr.qp_context = ep;
  481. ep->rep_attr.srq = NULL;
  482. ep->rep_attr.cap.max_send_wr = cdata->max_requests;
  483. ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
  484. ep->rep_attr.cap.max_send_wr += 1; /* drain cqe */
  485. rc = ia->ri_ops->ro_open(ia, ep, cdata);
  486. if (rc)
  487. return rc;
  488. ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
  489. ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
  490. ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
  491. ep->rep_attr.cap.max_send_sge = max_sge;
  492. ep->rep_attr.cap.max_recv_sge = 1;
  493. ep->rep_attr.cap.max_inline_data = 0;
  494. ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  495. ep->rep_attr.qp_type = IB_QPT_RC;
  496. ep->rep_attr.port_num = ~0;
  497. dprintk("RPC: %s: requested max: dtos: send %d recv %d; "
  498. "iovs: send %d recv %d\n",
  499. __func__,
  500. ep->rep_attr.cap.max_send_wr,
  501. ep->rep_attr.cap.max_recv_wr,
  502. ep->rep_attr.cap.max_send_sge,
  503. ep->rep_attr.cap.max_recv_sge);
  504. /* set trigger for requesting send completion */
  505. ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
  506. if (ep->rep_cqinit <= 2)
  507. ep->rep_cqinit = 0; /* always signal? */
  508. rpcrdma_init_cqcount(ep, 0);
  509. init_waitqueue_head(&ep->rep_connect_wait);
  510. INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
  511. sendcq = ib_alloc_cq(ia->ri_device, NULL,
  512. ep->rep_attr.cap.max_send_wr + 1,
  513. 0, IB_POLL_SOFTIRQ);
  514. if (IS_ERR(sendcq)) {
  515. rc = PTR_ERR(sendcq);
  516. dprintk("RPC: %s: failed to create send CQ: %i\n",
  517. __func__, rc);
  518. goto out1;
  519. }
  520. recvcq = ib_alloc_cq(ia->ri_device, NULL,
  521. ep->rep_attr.cap.max_recv_wr + 1,
  522. 0, IB_POLL_SOFTIRQ);
  523. if (IS_ERR(recvcq)) {
  524. rc = PTR_ERR(recvcq);
  525. dprintk("RPC: %s: failed to create recv CQ: %i\n",
  526. __func__, rc);
  527. goto out2;
  528. }
  529. ep->rep_attr.send_cq = sendcq;
  530. ep->rep_attr.recv_cq = recvcq;
  531. /* Initialize cma parameters */
  532. memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma));
  533. /* Prepare RDMA-CM private message */
  534. pmsg->cp_magic = rpcrdma_cmp_magic;
  535. pmsg->cp_version = RPCRDMA_CMP_VERSION;
  536. pmsg->cp_flags |= ia->ri_ops->ro_send_w_inv_ok;
  537. pmsg->cp_send_size = rpcrdma_encode_buffer_size(cdata->inline_wsize);
  538. pmsg->cp_recv_size = rpcrdma_encode_buffer_size(cdata->inline_rsize);
  539. ep->rep_remote_cma.private_data = pmsg;
  540. ep->rep_remote_cma.private_data_len = sizeof(*pmsg);
  541. /* Client offers RDMA Read but does not initiate */
  542. ep->rep_remote_cma.initiator_depth = 0;
  543. if (ia->ri_device->attrs.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
  544. ep->rep_remote_cma.responder_resources = 32;
  545. else
  546. ep->rep_remote_cma.responder_resources =
  547. ia->ri_device->attrs.max_qp_rd_atom;
  548. /* Limit transport retries so client can detect server
  549. * GID changes quickly. RPC layer handles re-establishing
  550. * transport connection and retransmission.
  551. */
  552. ep->rep_remote_cma.retry_count = 6;
  553. /* RPC-over-RDMA handles its own flow control. In addition,
  554. * make all RNR NAKs visible so we know that RPC-over-RDMA
  555. * flow control is working correctly (no NAKs should be seen).
  556. */
  557. ep->rep_remote_cma.flow_control = 0;
  558. ep->rep_remote_cma.rnr_retry_count = 0;
  559. return 0;
  560. out2:
  561. ib_free_cq(sendcq);
  562. out1:
  563. return rc;
  564. }
  565. /*
  566. * rpcrdma_ep_destroy
  567. *
  568. * Disconnect and destroy endpoint. After this, the only
  569. * valid operations on the ep are to free it (if dynamically
  570. * allocated) or re-create it.
  571. */
  572. void
  573. rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  574. {
  575. dprintk("RPC: %s: entering, connected is %d\n",
  576. __func__, ep->rep_connected);
  577. cancel_delayed_work_sync(&ep->rep_connect_worker);
  578. if (ia->ri_id->qp) {
  579. rpcrdma_ep_disconnect(ep, ia);
  580. rdma_destroy_qp(ia->ri_id);
  581. ia->ri_id->qp = NULL;
  582. }
  583. ib_free_cq(ep->rep_attr.recv_cq);
  584. ib_free_cq(ep->rep_attr.send_cq);
  585. }
  586. /* Re-establish a connection after a device removal event.
  587. * Unlike a normal reconnection, a fresh PD and a new set
  588. * of MRs and buffers is needed.
  589. */
  590. static int
  591. rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
  592. struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  593. {
  594. struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
  595. int rc, err;
  596. pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
  597. rc = -EHOSTUNREACH;
  598. if (rpcrdma_ia_open(r_xprt, sap))
  599. goto out1;
  600. rc = -ENOMEM;
  601. err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
  602. if (err) {
  603. pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
  604. goto out2;
  605. }
  606. rc = -ENETUNREACH;
  607. err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
  608. if (err) {
  609. pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
  610. goto out3;
  611. }
  612. rpcrdma_create_mrs(r_xprt);
  613. return 0;
  614. out3:
  615. rpcrdma_ep_destroy(ep, ia);
  616. out2:
  617. rpcrdma_ia_close(ia);
  618. out1:
  619. return rc;
  620. }
  621. static int
  622. rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
  623. struct rpcrdma_ia *ia)
  624. {
  625. struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
  626. struct rdma_cm_id *id, *old;
  627. int err, rc;
  628. dprintk("RPC: %s: reconnecting...\n", __func__);
  629. rpcrdma_ep_disconnect(ep, ia);
  630. rc = -EHOSTUNREACH;
  631. id = rpcrdma_create_id(r_xprt, ia, sap);
  632. if (IS_ERR(id))
  633. goto out;
  634. /* As long as the new ID points to the same device as the
  635. * old ID, we can reuse the transport's existing PD and all
  636. * previously allocated MRs. Also, the same device means
  637. * the transport's previous DMA mappings are still valid.
  638. *
  639. * This is a sanity check only. There should be no way these
  640. * point to two different devices here.
  641. */
  642. old = id;
  643. rc = -ENETUNREACH;
  644. if (ia->ri_device != id->device) {
  645. pr_err("rpcrdma: can't reconnect on different device!\n");
  646. goto out_destroy;
  647. }
  648. err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
  649. if (err) {
  650. dprintk("RPC: %s: rdma_create_qp returned %d\n",
  651. __func__, err);
  652. goto out_destroy;
  653. }
  654. /* Atomically replace the transport's ID and QP. */
  655. rc = 0;
  656. old = ia->ri_id;
  657. ia->ri_id = id;
  658. rdma_destroy_qp(old);
  659. out_destroy:
  660. rdma_destroy_id(old);
  661. out:
  662. return rc;
  663. }
  664. /*
  665. * Connect unconnected endpoint.
  666. */
  667. int
  668. rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  669. {
  670. struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
  671. rx_ia);
  672. unsigned int extras;
  673. int rc;
  674. retry:
  675. switch (ep->rep_connected) {
  676. case 0:
  677. dprintk("RPC: %s: connecting...\n", __func__);
  678. rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
  679. if (rc) {
  680. dprintk("RPC: %s: rdma_create_qp failed %i\n",
  681. __func__, rc);
  682. rc = -ENETUNREACH;
  683. goto out_noupdate;
  684. }
  685. break;
  686. case -ENODEV:
  687. rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
  688. if (rc)
  689. goto out_noupdate;
  690. break;
  691. default:
  692. rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
  693. if (rc)
  694. goto out;
  695. }
  696. ep->rep_connected = 0;
  697. rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
  698. if (rc) {
  699. dprintk("RPC: %s: rdma_connect() failed with %i\n",
  700. __func__, rc);
  701. goto out;
  702. }
  703. wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
  704. if (ep->rep_connected <= 0) {
  705. if (ep->rep_connected == -EAGAIN)
  706. goto retry;
  707. rc = ep->rep_connected;
  708. goto out;
  709. }
  710. dprintk("RPC: %s: connected\n", __func__);
  711. extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
  712. if (extras)
  713. rpcrdma_ep_post_extra_recv(r_xprt, extras);
  714. out:
  715. if (rc)
  716. ep->rep_connected = rc;
  717. out_noupdate:
  718. return rc;
  719. }
  720. /*
  721. * rpcrdma_ep_disconnect
  722. *
  723. * This is separate from destroy to facilitate the ability
  724. * to reconnect without recreating the endpoint.
  725. *
  726. * This call is not reentrant, and must not be made in parallel
  727. * on the same endpoint.
  728. */
  729. void
  730. rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
  731. {
  732. int rc;
  733. rc = rdma_disconnect(ia->ri_id);
  734. if (!rc) {
  735. /* returns without wait if not connected */
  736. wait_event_interruptible(ep->rep_connect_wait,
  737. ep->rep_connected != 1);
  738. dprintk("RPC: %s: after wait, %sconnected\n", __func__,
  739. (ep->rep_connected == 1) ? "still " : "dis");
  740. } else {
  741. dprintk("RPC: %s: rdma_disconnect %i\n", __func__, rc);
  742. ep->rep_connected = rc;
  743. }
  744. ib_drain_qp(ia->ri_id->qp);
  745. }
  746. static void
  747. rpcrdma_mr_recovery_worker(struct work_struct *work)
  748. {
  749. struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
  750. rb_recovery_worker.work);
  751. struct rpcrdma_mw *mw;
  752. spin_lock(&buf->rb_recovery_lock);
  753. while (!list_empty(&buf->rb_stale_mrs)) {
  754. mw = rpcrdma_pop_mw(&buf->rb_stale_mrs);
  755. spin_unlock(&buf->rb_recovery_lock);
  756. dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
  757. mw->mw_xprt->rx_ia.ri_ops->ro_recover_mr(mw);
  758. spin_lock(&buf->rb_recovery_lock);
  759. }
  760. spin_unlock(&buf->rb_recovery_lock);
  761. }
  762. void
  763. rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
  764. {
  765. struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
  766. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  767. spin_lock(&buf->rb_recovery_lock);
  768. rpcrdma_push_mw(mw, &buf->rb_stale_mrs);
  769. spin_unlock(&buf->rb_recovery_lock);
  770. schedule_delayed_work(&buf->rb_recovery_worker, 0);
  771. }
  772. static void
  773. rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt)
  774. {
  775. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  776. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  777. unsigned int count;
  778. LIST_HEAD(free);
  779. LIST_HEAD(all);
  780. for (count = 0; count < 32; count++) {
  781. struct rpcrdma_mw *mw;
  782. int rc;
  783. mw = kzalloc(sizeof(*mw), GFP_KERNEL);
  784. if (!mw)
  785. break;
  786. rc = ia->ri_ops->ro_init_mr(ia, mw);
  787. if (rc) {
  788. kfree(mw);
  789. break;
  790. }
  791. mw->mw_xprt = r_xprt;
  792. list_add(&mw->mw_list, &free);
  793. list_add(&mw->mw_all, &all);
  794. }
  795. spin_lock(&buf->rb_mwlock);
  796. list_splice(&free, &buf->rb_mws);
  797. list_splice(&all, &buf->rb_all);
  798. r_xprt->rx_stats.mrs_allocated += count;
  799. spin_unlock(&buf->rb_mwlock);
  800. dprintk("RPC: %s: created %u MRs\n", __func__, count);
  801. }
  802. static void
  803. rpcrdma_mr_refresh_worker(struct work_struct *work)
  804. {
  805. struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
  806. rb_refresh_worker.work);
  807. struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
  808. rx_buf);
  809. rpcrdma_create_mrs(r_xprt);
  810. }
  811. struct rpcrdma_req *
  812. rpcrdma_create_req(struct rpcrdma_xprt *r_xprt)
  813. {
  814. struct rpcrdma_buffer *buffer = &r_xprt->rx_buf;
  815. struct rpcrdma_req *req;
  816. req = kzalloc(sizeof(*req), GFP_KERNEL);
  817. if (req == NULL)
  818. return ERR_PTR(-ENOMEM);
  819. spin_lock(&buffer->rb_reqslock);
  820. list_add(&req->rl_all, &buffer->rb_allreqs);
  821. spin_unlock(&buffer->rb_reqslock);
  822. req->rl_cqe.done = rpcrdma_wc_send;
  823. req->rl_buffer = &r_xprt->rx_buf;
  824. INIT_LIST_HEAD(&req->rl_registered);
  825. req->rl_send_wr.next = NULL;
  826. req->rl_send_wr.wr_cqe = &req->rl_cqe;
  827. req->rl_send_wr.sg_list = req->rl_send_sge;
  828. req->rl_send_wr.opcode = IB_WR_SEND;
  829. return req;
  830. }
  831. struct rpcrdma_rep *
  832. rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
  833. {
  834. struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
  835. struct rpcrdma_rep *rep;
  836. int rc;
  837. rc = -ENOMEM;
  838. rep = kzalloc(sizeof(*rep), GFP_KERNEL);
  839. if (rep == NULL)
  840. goto out;
  841. rep->rr_rdmabuf = rpcrdma_alloc_regbuf(cdata->inline_rsize,
  842. DMA_FROM_DEVICE, GFP_KERNEL);
  843. if (IS_ERR(rep->rr_rdmabuf)) {
  844. rc = PTR_ERR(rep->rr_rdmabuf);
  845. goto out_free;
  846. }
  847. rep->rr_cqe.done = rpcrdma_wc_receive;
  848. rep->rr_rxprt = r_xprt;
  849. INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
  850. rep->rr_recv_wr.next = NULL;
  851. rep->rr_recv_wr.wr_cqe = &rep->rr_cqe;
  852. rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
  853. rep->rr_recv_wr.num_sge = 1;
  854. return rep;
  855. out_free:
  856. kfree(rep);
  857. out:
  858. return ERR_PTR(rc);
  859. }
  860. int
  861. rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt)
  862. {
  863. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  864. int i, rc;
  865. buf->rb_max_requests = r_xprt->rx_data.max_requests;
  866. buf->rb_bc_srv_max_requests = 0;
  867. atomic_set(&buf->rb_credits, 1);
  868. spin_lock_init(&buf->rb_mwlock);
  869. spin_lock_init(&buf->rb_lock);
  870. spin_lock_init(&buf->rb_recovery_lock);
  871. INIT_LIST_HEAD(&buf->rb_mws);
  872. INIT_LIST_HEAD(&buf->rb_all);
  873. INIT_LIST_HEAD(&buf->rb_pending);
  874. INIT_LIST_HEAD(&buf->rb_stale_mrs);
  875. INIT_DELAYED_WORK(&buf->rb_refresh_worker,
  876. rpcrdma_mr_refresh_worker);
  877. INIT_DELAYED_WORK(&buf->rb_recovery_worker,
  878. rpcrdma_mr_recovery_worker);
  879. rpcrdma_create_mrs(r_xprt);
  880. INIT_LIST_HEAD(&buf->rb_send_bufs);
  881. INIT_LIST_HEAD(&buf->rb_allreqs);
  882. spin_lock_init(&buf->rb_reqslock);
  883. for (i = 0; i < buf->rb_max_requests; i++) {
  884. struct rpcrdma_req *req;
  885. req = rpcrdma_create_req(r_xprt);
  886. if (IS_ERR(req)) {
  887. dprintk("RPC: %s: request buffer %d alloc"
  888. " failed\n", __func__, i);
  889. rc = PTR_ERR(req);
  890. goto out;
  891. }
  892. req->rl_backchannel = false;
  893. list_add(&req->rl_list, &buf->rb_send_bufs);
  894. }
  895. INIT_LIST_HEAD(&buf->rb_recv_bufs);
  896. for (i = 0; i < buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; i++) {
  897. struct rpcrdma_rep *rep;
  898. rep = rpcrdma_create_rep(r_xprt);
  899. if (IS_ERR(rep)) {
  900. dprintk("RPC: %s: reply buffer %d alloc failed\n",
  901. __func__, i);
  902. rc = PTR_ERR(rep);
  903. goto out;
  904. }
  905. list_add(&rep->rr_list, &buf->rb_recv_bufs);
  906. }
  907. return 0;
  908. out:
  909. rpcrdma_buffer_destroy(buf);
  910. return rc;
  911. }
  912. static struct rpcrdma_req *
  913. rpcrdma_buffer_get_req_locked(struct rpcrdma_buffer *buf)
  914. {
  915. struct rpcrdma_req *req;
  916. req = list_first_entry(&buf->rb_send_bufs,
  917. struct rpcrdma_req, rl_list);
  918. list_del_init(&req->rl_list);
  919. return req;
  920. }
  921. static struct rpcrdma_rep *
  922. rpcrdma_buffer_get_rep_locked(struct rpcrdma_buffer *buf)
  923. {
  924. struct rpcrdma_rep *rep;
  925. rep = list_first_entry(&buf->rb_recv_bufs,
  926. struct rpcrdma_rep, rr_list);
  927. list_del(&rep->rr_list);
  928. return rep;
  929. }
  930. static void
  931. rpcrdma_destroy_rep(struct rpcrdma_rep *rep)
  932. {
  933. rpcrdma_free_regbuf(rep->rr_rdmabuf);
  934. kfree(rep);
  935. }
  936. void
  937. rpcrdma_destroy_req(struct rpcrdma_req *req)
  938. {
  939. rpcrdma_free_regbuf(req->rl_recvbuf);
  940. rpcrdma_free_regbuf(req->rl_sendbuf);
  941. rpcrdma_free_regbuf(req->rl_rdmabuf);
  942. kfree(req);
  943. }
  944. static void
  945. rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf)
  946. {
  947. struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt,
  948. rx_buf);
  949. struct rpcrdma_ia *ia = rdmab_to_ia(buf);
  950. struct rpcrdma_mw *mw;
  951. unsigned int count;
  952. count = 0;
  953. spin_lock(&buf->rb_mwlock);
  954. while (!list_empty(&buf->rb_all)) {
  955. mw = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
  956. list_del(&mw->mw_all);
  957. spin_unlock(&buf->rb_mwlock);
  958. ia->ri_ops->ro_release_mr(mw);
  959. count++;
  960. spin_lock(&buf->rb_mwlock);
  961. }
  962. spin_unlock(&buf->rb_mwlock);
  963. r_xprt->rx_stats.mrs_allocated = 0;
  964. dprintk("RPC: %s: released %u MRs\n", __func__, count);
  965. }
  966. void
  967. rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
  968. {
  969. cancel_delayed_work_sync(&buf->rb_recovery_worker);
  970. cancel_delayed_work_sync(&buf->rb_refresh_worker);
  971. while (!list_empty(&buf->rb_recv_bufs)) {
  972. struct rpcrdma_rep *rep;
  973. rep = rpcrdma_buffer_get_rep_locked(buf);
  974. rpcrdma_destroy_rep(rep);
  975. }
  976. buf->rb_send_count = 0;
  977. spin_lock(&buf->rb_reqslock);
  978. while (!list_empty(&buf->rb_allreqs)) {
  979. struct rpcrdma_req *req;
  980. req = list_first_entry(&buf->rb_allreqs,
  981. struct rpcrdma_req, rl_all);
  982. list_del(&req->rl_all);
  983. spin_unlock(&buf->rb_reqslock);
  984. rpcrdma_destroy_req(req);
  985. spin_lock(&buf->rb_reqslock);
  986. }
  987. spin_unlock(&buf->rb_reqslock);
  988. buf->rb_recv_count = 0;
  989. rpcrdma_destroy_mrs(buf);
  990. }
  991. struct rpcrdma_mw *
  992. rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
  993. {
  994. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  995. struct rpcrdma_mw *mw = NULL;
  996. spin_lock(&buf->rb_mwlock);
  997. if (!list_empty(&buf->rb_mws))
  998. mw = rpcrdma_pop_mw(&buf->rb_mws);
  999. spin_unlock(&buf->rb_mwlock);
  1000. if (!mw)
  1001. goto out_nomws;
  1002. mw->mw_flags = 0;
  1003. return mw;
  1004. out_nomws:
  1005. dprintk("RPC: %s: no MWs available\n", __func__);
  1006. if (r_xprt->rx_ep.rep_connected != -ENODEV)
  1007. schedule_delayed_work(&buf->rb_refresh_worker, 0);
  1008. /* Allow the reply handler and refresh worker to run */
  1009. cond_resched();
  1010. return NULL;
  1011. }
  1012. void
  1013. rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
  1014. {
  1015. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  1016. spin_lock(&buf->rb_mwlock);
  1017. rpcrdma_push_mw(mw, &buf->rb_mws);
  1018. spin_unlock(&buf->rb_mwlock);
  1019. }
  1020. static struct rpcrdma_rep *
  1021. rpcrdma_buffer_get_rep(struct rpcrdma_buffer *buffers)
  1022. {
  1023. /* If an RPC previously completed without a reply (say, a
  1024. * credential problem or a soft timeout occurs) then hold off
  1025. * on supplying more Receive buffers until the number of new
  1026. * pending RPCs catches up to the number of posted Receives.
  1027. */
  1028. if (unlikely(buffers->rb_send_count < buffers->rb_recv_count))
  1029. return NULL;
  1030. if (unlikely(list_empty(&buffers->rb_recv_bufs)))
  1031. return NULL;
  1032. buffers->rb_recv_count++;
  1033. return rpcrdma_buffer_get_rep_locked(buffers);
  1034. }
  1035. /*
  1036. * Get a set of request/reply buffers.
  1037. *
  1038. * Reply buffer (if available) is attached to send buffer upon return.
  1039. */
  1040. struct rpcrdma_req *
  1041. rpcrdma_buffer_get(struct rpcrdma_buffer *buffers)
  1042. {
  1043. struct rpcrdma_req *req;
  1044. spin_lock(&buffers->rb_lock);
  1045. if (list_empty(&buffers->rb_send_bufs))
  1046. goto out_reqbuf;
  1047. buffers->rb_send_count++;
  1048. req = rpcrdma_buffer_get_req_locked(buffers);
  1049. req->rl_reply = rpcrdma_buffer_get_rep(buffers);
  1050. spin_unlock(&buffers->rb_lock);
  1051. return req;
  1052. out_reqbuf:
  1053. spin_unlock(&buffers->rb_lock);
  1054. pr_warn("RPC: %s: out of request buffers\n", __func__);
  1055. return NULL;
  1056. }
  1057. /*
  1058. * Put request/reply buffers back into pool.
  1059. * Pre-decrement counter/array index.
  1060. */
  1061. void
  1062. rpcrdma_buffer_put(struct rpcrdma_req *req)
  1063. {
  1064. struct rpcrdma_buffer *buffers = req->rl_buffer;
  1065. struct rpcrdma_rep *rep = req->rl_reply;
  1066. req->rl_send_wr.num_sge = 0;
  1067. req->rl_reply = NULL;
  1068. spin_lock(&buffers->rb_lock);
  1069. buffers->rb_send_count--;
  1070. list_add_tail(&req->rl_list, &buffers->rb_send_bufs);
  1071. if (rep) {
  1072. buffers->rb_recv_count--;
  1073. list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
  1074. }
  1075. spin_unlock(&buffers->rb_lock);
  1076. }
  1077. /*
  1078. * Recover reply buffers from pool.
  1079. * This happens when recovering from disconnect.
  1080. */
  1081. void
  1082. rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
  1083. {
  1084. struct rpcrdma_buffer *buffers = req->rl_buffer;
  1085. spin_lock(&buffers->rb_lock);
  1086. req->rl_reply = rpcrdma_buffer_get_rep(buffers);
  1087. spin_unlock(&buffers->rb_lock);
  1088. }
  1089. /*
  1090. * Put reply buffers back into pool when not attached to
  1091. * request. This happens in error conditions.
  1092. */
  1093. void
  1094. rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
  1095. {
  1096. struct rpcrdma_buffer *buffers = &rep->rr_rxprt->rx_buf;
  1097. spin_lock(&buffers->rb_lock);
  1098. buffers->rb_recv_count--;
  1099. list_add_tail(&rep->rr_list, &buffers->rb_recv_bufs);
  1100. spin_unlock(&buffers->rb_lock);
  1101. }
  1102. /**
  1103. * rpcrdma_alloc_regbuf - allocate and DMA-map memory for SEND/RECV buffers
  1104. * @size: size of buffer to be allocated, in bytes
  1105. * @direction: direction of data movement
  1106. * @flags: GFP flags
  1107. *
  1108. * Returns an ERR_PTR, or a pointer to a regbuf, a buffer that
  1109. * can be persistently DMA-mapped for I/O.
  1110. *
  1111. * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
  1112. * receiving the payload of RDMA RECV operations. During Long Calls
  1113. * or Replies they may be registered externally via ro_map.
  1114. */
  1115. struct rpcrdma_regbuf *
  1116. rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
  1117. gfp_t flags)
  1118. {
  1119. struct rpcrdma_regbuf *rb;
  1120. rb = kmalloc(sizeof(*rb) + size, flags);
  1121. if (rb == NULL)
  1122. return ERR_PTR(-ENOMEM);
  1123. rb->rg_device = NULL;
  1124. rb->rg_direction = direction;
  1125. rb->rg_iov.length = size;
  1126. return rb;
  1127. }
  1128. /**
  1129. * __rpcrdma_map_regbuf - DMA-map a regbuf
  1130. * @ia: controlling rpcrdma_ia
  1131. * @rb: regbuf to be mapped
  1132. */
  1133. bool
  1134. __rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
  1135. {
  1136. struct ib_device *device = ia->ri_device;
  1137. if (rb->rg_direction == DMA_NONE)
  1138. return false;
  1139. rb->rg_iov.addr = ib_dma_map_single(device,
  1140. (void *)rb->rg_base,
  1141. rdmab_length(rb),
  1142. rb->rg_direction);
  1143. if (ib_dma_mapping_error(device, rdmab_addr(rb)))
  1144. return false;
  1145. rb->rg_device = device;
  1146. rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
  1147. return true;
  1148. }
  1149. static void
  1150. rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb)
  1151. {
  1152. if (!rpcrdma_regbuf_is_mapped(rb))
  1153. return;
  1154. ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb),
  1155. rdmab_length(rb), rb->rg_direction);
  1156. rb->rg_device = NULL;
  1157. }
  1158. /**
  1159. * rpcrdma_free_regbuf - deregister and free registered buffer
  1160. * @rb: regbuf to be deregistered and freed
  1161. */
  1162. void
  1163. rpcrdma_free_regbuf(struct rpcrdma_regbuf *rb)
  1164. {
  1165. if (!rb)
  1166. return;
  1167. rpcrdma_dma_unmap_regbuf(rb);
  1168. kfree(rb);
  1169. }
  1170. /*
  1171. * Prepost any receive buffer, then post send.
  1172. *
  1173. * Receive buffer is donated to hardware, reclaimed upon recv completion.
  1174. */
  1175. int
  1176. rpcrdma_ep_post(struct rpcrdma_ia *ia,
  1177. struct rpcrdma_ep *ep,
  1178. struct rpcrdma_req *req)
  1179. {
  1180. struct ib_send_wr *send_wr = &req->rl_send_wr;
  1181. struct ib_send_wr *send_wr_fail;
  1182. int rc;
  1183. if (req->rl_reply) {
  1184. rc = rpcrdma_ep_post_recv(ia, req->rl_reply);
  1185. if (rc)
  1186. return rc;
  1187. req->rl_reply = NULL;
  1188. }
  1189. dprintk("RPC: %s: posting %d s/g entries\n",
  1190. __func__, send_wr->num_sge);
  1191. rpcrdma_set_signaled(ep, send_wr);
  1192. rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail);
  1193. if (rc)
  1194. goto out_postsend_err;
  1195. return 0;
  1196. out_postsend_err:
  1197. pr_err("rpcrdma: RDMA Send ib_post_send returned %i\n", rc);
  1198. return -ENOTCONN;
  1199. }
  1200. int
  1201. rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
  1202. struct rpcrdma_rep *rep)
  1203. {
  1204. struct ib_recv_wr *recv_wr_fail;
  1205. int rc;
  1206. if (!rpcrdma_dma_map_regbuf(ia, rep->rr_rdmabuf))
  1207. goto out_map;
  1208. rc = ib_post_recv(ia->ri_id->qp, &rep->rr_recv_wr, &recv_wr_fail);
  1209. if (rc)
  1210. goto out_postrecv;
  1211. return 0;
  1212. out_map:
  1213. pr_err("rpcrdma: failed to DMA map the Receive buffer\n");
  1214. return -EIO;
  1215. out_postrecv:
  1216. pr_err("rpcrdma: ib_post_recv returned %i\n", rc);
  1217. return -ENOTCONN;
  1218. }
  1219. /**
  1220. * rpcrdma_ep_post_extra_recv - Post buffers for incoming backchannel requests
  1221. * @r_xprt: transport associated with these backchannel resources
  1222. * @min_reqs: minimum number of incoming requests expected
  1223. *
  1224. * Returns zero if all requested buffers were posted, or a negative errno.
  1225. */
  1226. int
  1227. rpcrdma_ep_post_extra_recv(struct rpcrdma_xprt *r_xprt, unsigned int count)
  1228. {
  1229. struct rpcrdma_buffer *buffers = &r_xprt->rx_buf;
  1230. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  1231. struct rpcrdma_rep *rep;
  1232. int rc;
  1233. while (count--) {
  1234. spin_lock(&buffers->rb_lock);
  1235. if (list_empty(&buffers->rb_recv_bufs))
  1236. goto out_reqbuf;
  1237. rep = rpcrdma_buffer_get_rep_locked(buffers);
  1238. spin_unlock(&buffers->rb_lock);
  1239. rc = rpcrdma_ep_post_recv(ia, rep);
  1240. if (rc)
  1241. goto out_rc;
  1242. }
  1243. return 0;
  1244. out_reqbuf:
  1245. spin_unlock(&buffers->rb_lock);
  1246. pr_warn("%s: no extra receive buffers\n", __func__);
  1247. return -ENOMEM;
  1248. out_rc:
  1249. rpcrdma_recv_buffer_put(rep);
  1250. return rc;
  1251. }