iser_verbs.c 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347
  1. /*
  2. * Copyright (c) 2004, 2005, 2006 Voltaire, Inc. All rights reserved.
  3. * Copyright (c) 2005, 2006 Cisco Systems. All rights reserved.
  4. * Copyright (c) 2013-2014 Mellanox Technologies. All rights reserved.
  5. *
  6. * This software is available to you under a choice of one of two
  7. * licenses. You may choose to be licensed under the terms of the GNU
  8. * General Public License (GPL) Version 2, available from the file
  9. * COPYING in the main directory of this source tree, or the
  10. * OpenIB.org BSD license below:
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above
  17. * copyright notice, this list of conditions and the following
  18. * disclaimer.
  19. *
  20. * - Redistributions in binary form must reproduce the above
  21. * copyright notice, this list of conditions and the following
  22. * disclaimer in the documentation and/or other materials
  23. * provided with the distribution.
  24. *
  25. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  29. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  30. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  31. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  32. * SOFTWARE.
  33. */
  34. #include <linux/kernel.h>
  35. #include <linux/module.h>
  36. #include <linux/slab.h>
  37. #include <linux/delay.h>
  38. #include "iscsi_iser.h"
  39. #define ISCSI_ISER_MAX_CONN 8
  40. #define ISER_MAX_RX_LEN (ISER_QP_MAX_RECV_DTOS * ISCSI_ISER_MAX_CONN)
  41. #define ISER_MAX_TX_LEN (ISER_QP_MAX_REQ_DTOS * ISCSI_ISER_MAX_CONN)
  42. #define ISER_MAX_CQ_LEN (ISER_MAX_RX_LEN + ISER_MAX_TX_LEN + \
  43. ISCSI_ISER_MAX_CONN)
  44. static int iser_cq_poll_limit = 512;
  45. static void iser_cq_tasklet_fn(unsigned long data);
  46. static void iser_cq_callback(struct ib_cq *cq, void *cq_context);
  47. static void iser_cq_event_callback(struct ib_event *cause, void *context)
  48. {
  49. iser_err("got cq event %d \n", cause->event);
  50. }
  51. static void iser_qp_event_callback(struct ib_event *cause, void *context)
  52. {
  53. iser_err("got qp event %d\n",cause->event);
  54. }
  55. static void iser_event_handler(struct ib_event_handler *handler,
  56. struct ib_event *event)
  57. {
  58. iser_err("async event %d on device %s port %d\n", event->event,
  59. event->device->name, event->element.port_num);
  60. }
  61. /**
  62. * iser_create_device_ib_res - creates Protection Domain (PD), Completion
  63. * Queue (CQ), DMA Memory Region (DMA MR) with the device associated with
  64. * the adapator.
  65. *
  66. * returns 0 on success, -1 on failure
  67. */
  68. static int iser_create_device_ib_res(struct iser_device *device)
  69. {
  70. struct ib_device_attr *dev_attr = &device->dev_attr;
  71. int ret, i, max_cqe;
  72. ret = ib_query_device(device->ib_device, dev_attr);
  73. if (ret) {
  74. pr_warn("Query device failed for %s\n", device->ib_device->name);
  75. return ret;
  76. }
  77. /* Assign function handles - based on FMR support */
  78. if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
  79. device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
  80. iser_info("FMR supported, using FMR for registration\n");
  81. device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
  82. device->iser_free_rdma_reg_res = iser_free_fmr_pool;
  83. device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
  84. device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
  85. } else
  86. if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
  87. iser_info("FastReg supported, using FastReg for registration\n");
  88. device->iser_alloc_rdma_reg_res = iser_create_fastreg_pool;
  89. device->iser_free_rdma_reg_res = iser_free_fastreg_pool;
  90. device->iser_reg_rdma_mem = iser_reg_rdma_mem_fastreg;
  91. device->iser_unreg_rdma_mem = iser_unreg_mem_fastreg;
  92. } else {
  93. iser_err("IB device does not support FMRs nor FastRegs, can't register memory\n");
  94. return -1;
  95. }
  96. device->comps_used = min_t(int, num_online_cpus(),
  97. device->ib_device->num_comp_vectors);
  98. device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
  99. GFP_KERNEL);
  100. if (!device->comps)
  101. goto comps_err;
  102. max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
  103. iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
  104. device->comps_used, device->ib_device->name,
  105. device->ib_device->num_comp_vectors, max_cqe);
  106. device->pd = ib_alloc_pd(device->ib_device);
  107. if (IS_ERR(device->pd))
  108. goto pd_err;
  109. for (i = 0; i < device->comps_used; i++) {
  110. struct iser_comp *comp = &device->comps[i];
  111. comp->device = device;
  112. comp->cq = ib_create_cq(device->ib_device,
  113. iser_cq_callback,
  114. iser_cq_event_callback,
  115. (void *)comp,
  116. max_cqe, i);
  117. if (IS_ERR(comp->cq)) {
  118. comp->cq = NULL;
  119. goto cq_err;
  120. }
  121. if (ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP))
  122. goto cq_err;
  123. tasklet_init(&comp->tasklet, iser_cq_tasklet_fn,
  124. (unsigned long)comp);
  125. }
  126. device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
  127. IB_ACCESS_REMOTE_WRITE |
  128. IB_ACCESS_REMOTE_READ);
  129. if (IS_ERR(device->mr))
  130. goto dma_mr_err;
  131. INIT_IB_EVENT_HANDLER(&device->event_handler, device->ib_device,
  132. iser_event_handler);
  133. if (ib_register_event_handler(&device->event_handler))
  134. goto handler_err;
  135. return 0;
  136. handler_err:
  137. ib_dereg_mr(device->mr);
  138. dma_mr_err:
  139. for (i = 0; i < device->comps_used; i++)
  140. tasklet_kill(&device->comps[i].tasklet);
  141. cq_err:
  142. for (i = 0; i < device->comps_used; i++) {
  143. struct iser_comp *comp = &device->comps[i];
  144. if (comp->cq)
  145. ib_destroy_cq(comp->cq);
  146. }
  147. ib_dealloc_pd(device->pd);
  148. pd_err:
  149. kfree(device->comps);
  150. comps_err:
  151. iser_err("failed to allocate an IB resource\n");
  152. return -1;
  153. }
  154. /**
  155. * iser_free_device_ib_res - destroy/dealloc/dereg the DMA MR,
  156. * CQ and PD created with the device associated with the adapator.
  157. */
  158. static void iser_free_device_ib_res(struct iser_device *device)
  159. {
  160. int i;
  161. BUG_ON(device->mr == NULL);
  162. for (i = 0; i < device->comps_used; i++) {
  163. struct iser_comp *comp = &device->comps[i];
  164. tasklet_kill(&comp->tasklet);
  165. ib_destroy_cq(comp->cq);
  166. comp->cq = NULL;
  167. }
  168. (void)ib_unregister_event_handler(&device->event_handler);
  169. (void)ib_dereg_mr(device->mr);
  170. (void)ib_dealloc_pd(device->pd);
  171. kfree(device->comps);
  172. device->comps = NULL;
  173. device->mr = NULL;
  174. device->pd = NULL;
  175. }
  176. /**
  177. * iser_create_fmr_pool - Creates FMR pool and page_vector
  178. *
  179. * returns 0 on success, or errno code on failure
  180. */
  181. int iser_create_fmr_pool(struct ib_conn *ib_conn, unsigned cmds_max)
  182. {
  183. struct iser_device *device = ib_conn->device;
  184. struct ib_fmr_pool_param params;
  185. int ret = -ENOMEM;
  186. ib_conn->fmr.page_vec = kmalloc(sizeof(*ib_conn->fmr.page_vec) +
  187. (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
  188. GFP_KERNEL);
  189. if (!ib_conn->fmr.page_vec)
  190. return ret;
  191. ib_conn->fmr.page_vec->pages = (u64 *)(ib_conn->fmr.page_vec + 1);
  192. params.page_shift = SHIFT_4K;
  193. /* when the first/last SG element are not start/end *
  194. * page aligned, the map whould be of N+1 pages */
  195. params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
  196. /* make the pool size twice the max number of SCSI commands *
  197. * the ML is expected to queue, watermark for unmap at 50% */
  198. params.pool_size = cmds_max * 2;
  199. params.dirty_watermark = cmds_max;
  200. params.cache = 0;
  201. params.flush_function = NULL;
  202. params.access = (IB_ACCESS_LOCAL_WRITE |
  203. IB_ACCESS_REMOTE_WRITE |
  204. IB_ACCESS_REMOTE_READ);
  205. ib_conn->fmr.pool = ib_create_fmr_pool(device->pd, &params);
  206. if (!IS_ERR(ib_conn->fmr.pool))
  207. return 0;
  208. /* no FMR => no need for page_vec */
  209. kfree(ib_conn->fmr.page_vec);
  210. ib_conn->fmr.page_vec = NULL;
  211. ret = PTR_ERR(ib_conn->fmr.pool);
  212. ib_conn->fmr.pool = NULL;
  213. if (ret != -ENOSYS) {
  214. iser_err("FMR allocation failed, err %d\n", ret);
  215. return ret;
  216. } else {
  217. iser_warn("FMRs are not supported, using unaligned mode\n");
  218. return 0;
  219. }
  220. }
  221. /**
  222. * iser_free_fmr_pool - releases the FMR pool and page vec
  223. */
  224. void iser_free_fmr_pool(struct ib_conn *ib_conn)
  225. {
  226. iser_info("freeing conn %p fmr pool %p\n",
  227. ib_conn, ib_conn->fmr.pool);
  228. if (ib_conn->fmr.pool != NULL)
  229. ib_destroy_fmr_pool(ib_conn->fmr.pool);
  230. ib_conn->fmr.pool = NULL;
  231. kfree(ib_conn->fmr.page_vec);
  232. ib_conn->fmr.page_vec = NULL;
  233. }
  234. static int
  235. iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
  236. bool pi_enable, struct fast_reg_descriptor *desc)
  237. {
  238. int ret;
  239. desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
  240. ISCSI_ISER_SG_TABLESIZE + 1);
  241. if (IS_ERR(desc->data_frpl)) {
  242. ret = PTR_ERR(desc->data_frpl);
  243. iser_err("Failed to allocate ib_fast_reg_page_list err=%d\n",
  244. ret);
  245. return PTR_ERR(desc->data_frpl);
  246. }
  247. desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1);
  248. if (IS_ERR(desc->data_mr)) {
  249. ret = PTR_ERR(desc->data_mr);
  250. iser_err("Failed to allocate ib_fast_reg_mr err=%d\n", ret);
  251. goto fast_reg_mr_failure;
  252. }
  253. desc->reg_indicators |= ISER_DATA_KEY_VALID;
  254. if (pi_enable) {
  255. struct ib_mr_init_attr mr_init_attr = {0};
  256. struct iser_pi_context *pi_ctx = NULL;
  257. desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
  258. if (!desc->pi_ctx) {
  259. iser_err("Failed to allocate pi context\n");
  260. ret = -ENOMEM;
  261. goto pi_ctx_alloc_failure;
  262. }
  263. pi_ctx = desc->pi_ctx;
  264. pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
  265. ISCSI_ISER_SG_TABLESIZE);
  266. if (IS_ERR(pi_ctx->prot_frpl)) {
  267. ret = PTR_ERR(pi_ctx->prot_frpl);
  268. iser_err("Failed to allocate prot frpl ret=%d\n",
  269. ret);
  270. goto prot_frpl_failure;
  271. }
  272. pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
  273. ISCSI_ISER_SG_TABLESIZE + 1);
  274. if (IS_ERR(pi_ctx->prot_mr)) {
  275. ret = PTR_ERR(pi_ctx->prot_mr);
  276. iser_err("Failed to allocate prot frmr ret=%d\n",
  277. ret);
  278. goto prot_mr_failure;
  279. }
  280. desc->reg_indicators |= ISER_PROT_KEY_VALID;
  281. mr_init_attr.max_reg_descriptors = 2;
  282. mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
  283. pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
  284. if (IS_ERR(pi_ctx->sig_mr)) {
  285. ret = PTR_ERR(pi_ctx->sig_mr);
  286. iser_err("Failed to allocate signature enabled mr err=%d\n",
  287. ret);
  288. goto sig_mr_failure;
  289. }
  290. desc->reg_indicators |= ISER_SIG_KEY_VALID;
  291. }
  292. desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
  293. iser_dbg("Create fr_desc %p page_list %p\n",
  294. desc, desc->data_frpl->page_list);
  295. return 0;
  296. sig_mr_failure:
  297. ib_dereg_mr(desc->pi_ctx->prot_mr);
  298. prot_mr_failure:
  299. ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
  300. prot_frpl_failure:
  301. kfree(desc->pi_ctx);
  302. pi_ctx_alloc_failure:
  303. ib_dereg_mr(desc->data_mr);
  304. fast_reg_mr_failure:
  305. ib_free_fast_reg_page_list(desc->data_frpl);
  306. return ret;
  307. }
  308. /**
  309. * iser_create_fastreg_pool - Creates pool of fast_reg descriptors
  310. * for fast registration work requests.
  311. * returns 0 on success, or errno code on failure
  312. */
  313. int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max)
  314. {
  315. struct iser_device *device = ib_conn->device;
  316. struct fast_reg_descriptor *desc;
  317. int i, ret;
  318. INIT_LIST_HEAD(&ib_conn->fastreg.pool);
  319. ib_conn->fastreg.pool_size = 0;
  320. for (i = 0; i < cmds_max; i++) {
  321. desc = kzalloc(sizeof(*desc), GFP_KERNEL);
  322. if (!desc) {
  323. iser_err("Failed to allocate a new fast_reg descriptor\n");
  324. ret = -ENOMEM;
  325. goto err;
  326. }
  327. ret = iser_create_fastreg_desc(device->ib_device, device->pd,
  328. ib_conn->pi_support, desc);
  329. if (ret) {
  330. iser_err("Failed to create fastreg descriptor err=%d\n",
  331. ret);
  332. kfree(desc);
  333. goto err;
  334. }
  335. list_add_tail(&desc->list, &ib_conn->fastreg.pool);
  336. ib_conn->fastreg.pool_size++;
  337. }
  338. return 0;
  339. err:
  340. iser_free_fastreg_pool(ib_conn);
  341. return ret;
  342. }
  343. /**
  344. * iser_free_fastreg_pool - releases the pool of fast_reg descriptors
  345. */
  346. void iser_free_fastreg_pool(struct ib_conn *ib_conn)
  347. {
  348. struct fast_reg_descriptor *desc, *tmp;
  349. int i = 0;
  350. if (list_empty(&ib_conn->fastreg.pool))
  351. return;
  352. iser_info("freeing conn %p fr pool\n", ib_conn);
  353. list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.pool, list) {
  354. list_del(&desc->list);
  355. ib_free_fast_reg_page_list(desc->data_frpl);
  356. ib_dereg_mr(desc->data_mr);
  357. if (desc->pi_ctx) {
  358. ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
  359. ib_dereg_mr(desc->pi_ctx->prot_mr);
  360. ib_destroy_mr(desc->pi_ctx->sig_mr);
  361. kfree(desc->pi_ctx);
  362. }
  363. kfree(desc);
  364. ++i;
  365. }
  366. if (i < ib_conn->fastreg.pool_size)
  367. iser_warn("pool still has %d regions registered\n",
  368. ib_conn->fastreg.pool_size - i);
  369. }
  370. /**
  371. * iser_create_ib_conn_res - Queue-Pair (QP)
  372. *
  373. * returns 0 on success, -1 on failure
  374. */
  375. static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
  376. {
  377. struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
  378. ib_conn);
  379. struct iser_device *device;
  380. struct ib_device_attr *dev_attr;
  381. struct ib_qp_init_attr init_attr;
  382. int ret = -ENOMEM;
  383. int index, min_index = 0;
  384. BUG_ON(ib_conn->device == NULL);
  385. device = ib_conn->device;
  386. dev_attr = &device->dev_attr;
  387. memset(&init_attr, 0, sizeof init_attr);
  388. mutex_lock(&ig.connlist_mutex);
  389. /* select the CQ with the minimal number of usages */
  390. for (index = 0; index < device->comps_used; index++) {
  391. if (device->comps[index].active_qps <
  392. device->comps[min_index].active_qps)
  393. min_index = index;
  394. }
  395. ib_conn->comp = &device->comps[min_index];
  396. ib_conn->comp->active_qps++;
  397. mutex_unlock(&ig.connlist_mutex);
  398. iser_info("cq index %d used for ib_conn %p\n", min_index, ib_conn);
  399. init_attr.event_handler = iser_qp_event_callback;
  400. init_attr.qp_context = (void *)ib_conn;
  401. init_attr.send_cq = ib_conn->comp->cq;
  402. init_attr.recv_cq = ib_conn->comp->cq;
  403. init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
  404. init_attr.cap.max_send_sge = 2;
  405. init_attr.cap.max_recv_sge = 1;
  406. init_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
  407. init_attr.qp_type = IB_QPT_RC;
  408. if (ib_conn->pi_support) {
  409. init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
  410. init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
  411. iser_conn->max_cmds =
  412. ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
  413. } else {
  414. if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
  415. init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS + 1;
  416. iser_conn->max_cmds =
  417. ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
  418. } else {
  419. init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
  420. iser_conn->max_cmds =
  421. ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
  422. iser_dbg("device %s supports max_send_wr %d\n",
  423. device->ib_device->name, dev_attr->max_qp_wr);
  424. }
  425. }
  426. ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
  427. if (ret)
  428. goto out_err;
  429. ib_conn->qp = ib_conn->cma_id->qp;
  430. iser_info("setting conn %p cma_id %p qp %p\n",
  431. ib_conn, ib_conn->cma_id,
  432. ib_conn->cma_id->qp);
  433. return ret;
  434. out_err:
  435. mutex_lock(&ig.connlist_mutex);
  436. ib_conn->comp->active_qps--;
  437. mutex_unlock(&ig.connlist_mutex);
  438. iser_err("unable to alloc mem or create resource, err %d\n", ret);
  439. return ret;
  440. }
  441. /**
  442. * based on the resolved device node GUID see if there already allocated
  443. * device for this device. If there's no such, create one.
  444. */
  445. static
  446. struct iser_device *iser_device_find_by_ib_device(struct rdma_cm_id *cma_id)
  447. {
  448. struct iser_device *device;
  449. mutex_lock(&ig.device_list_mutex);
  450. list_for_each_entry(device, &ig.device_list, ig_list)
  451. /* find if there's a match using the node GUID */
  452. if (device->ib_device->node_guid == cma_id->device->node_guid)
  453. goto inc_refcnt;
  454. device = kzalloc(sizeof *device, GFP_KERNEL);
  455. if (device == NULL)
  456. goto out;
  457. /* assign this device to the device */
  458. device->ib_device = cma_id->device;
  459. /* init the device and link it into ig device list */
  460. if (iser_create_device_ib_res(device)) {
  461. kfree(device);
  462. device = NULL;
  463. goto out;
  464. }
  465. list_add(&device->ig_list, &ig.device_list);
  466. inc_refcnt:
  467. device->refcount++;
  468. out:
  469. mutex_unlock(&ig.device_list_mutex);
  470. return device;
  471. }
  472. /* if there's no demand for this device, release it */
  473. static void iser_device_try_release(struct iser_device *device)
  474. {
  475. mutex_lock(&ig.device_list_mutex);
  476. device->refcount--;
  477. iser_info("device %p refcount %d\n", device, device->refcount);
  478. if (!device->refcount) {
  479. iser_free_device_ib_res(device);
  480. list_del(&device->ig_list);
  481. kfree(device);
  482. }
  483. mutex_unlock(&ig.device_list_mutex);
  484. }
  485. /**
  486. * Called with state mutex held
  487. **/
  488. static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
  489. enum iser_conn_state comp,
  490. enum iser_conn_state exch)
  491. {
  492. int ret;
  493. ret = (iser_conn->state == comp);
  494. if (ret)
  495. iser_conn->state = exch;
  496. return ret;
  497. }
  498. void iser_release_work(struct work_struct *work)
  499. {
  500. struct iser_conn *iser_conn;
  501. iser_conn = container_of(work, struct iser_conn, release_work);
  502. /* Wait for conn_stop to complete */
  503. wait_for_completion(&iser_conn->stop_completion);
  504. /* Wait for IB resouces cleanup to complete */
  505. wait_for_completion(&iser_conn->ib_completion);
  506. mutex_lock(&iser_conn->state_mutex);
  507. iser_conn->state = ISER_CONN_DOWN;
  508. mutex_unlock(&iser_conn->state_mutex);
  509. iser_conn_release(iser_conn);
  510. }
  511. /**
  512. * iser_free_ib_conn_res - release IB related resources
  513. * @iser_conn: iser connection struct
  514. * @destroy_device: indicator if we need to try to release
  515. * the iser device (only iscsi shutdown and DEVICE_REMOVAL
  516. * will use this.
  517. *
  518. * This routine is called with the iser state mutex held
  519. * so the cm_id removal is out of here. It is Safe to
  520. * be invoked multiple times.
  521. */
  522. static void iser_free_ib_conn_res(struct iser_conn *iser_conn,
  523. bool destroy_device)
  524. {
  525. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  526. struct iser_device *device = ib_conn->device;
  527. iser_info("freeing conn %p cma_id %p qp %p\n",
  528. iser_conn, ib_conn->cma_id, ib_conn->qp);
  529. iser_free_rx_descriptors(iser_conn);
  530. if (ib_conn->qp != NULL) {
  531. ib_conn->comp->active_qps--;
  532. rdma_destroy_qp(ib_conn->cma_id);
  533. ib_conn->qp = NULL;
  534. }
  535. if (destroy_device && device != NULL) {
  536. iser_device_try_release(device);
  537. ib_conn->device = NULL;
  538. }
  539. }
  540. /**
  541. * Frees all conn objects and deallocs conn descriptor
  542. */
  543. void iser_conn_release(struct iser_conn *iser_conn)
  544. {
  545. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  546. mutex_lock(&ig.connlist_mutex);
  547. list_del(&iser_conn->conn_list);
  548. mutex_unlock(&ig.connlist_mutex);
  549. mutex_lock(&iser_conn->state_mutex);
  550. if (iser_conn->state != ISER_CONN_DOWN) {
  551. iser_warn("iser conn %p state %d, expected state down.\n",
  552. iser_conn, iser_conn->state);
  553. iser_conn->state = ISER_CONN_DOWN;
  554. }
  555. /*
  556. * In case we never got to bind stage, we still need to
  557. * release IB resources (which is safe to call more than once).
  558. */
  559. iser_free_ib_conn_res(iser_conn, true);
  560. mutex_unlock(&iser_conn->state_mutex);
  561. if (ib_conn->cma_id != NULL) {
  562. rdma_destroy_id(ib_conn->cma_id);
  563. ib_conn->cma_id = NULL;
  564. }
  565. kfree(iser_conn);
  566. }
  567. /**
  568. * triggers start of the disconnect procedures and wait for them to be done
  569. * Called with state mutex held
  570. */
  571. int iser_conn_terminate(struct iser_conn *iser_conn)
  572. {
  573. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  574. struct ib_send_wr *bad_wr;
  575. int err = 0;
  576. /* terminate the iser conn only if the conn state is UP */
  577. if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
  578. ISER_CONN_TERMINATING))
  579. return 0;
  580. iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state);
  581. /* suspend queuing of new iscsi commands */
  582. if (iser_conn->iscsi_conn)
  583. iscsi_suspend_queue(iser_conn->iscsi_conn);
  584. /*
  585. * In case we didn't already clean up the cma_id (peer initiated
  586. * a disconnection), we need to Cause the CMA to change the QP
  587. * state to ERROR.
  588. */
  589. if (ib_conn->cma_id) {
  590. err = rdma_disconnect(ib_conn->cma_id);
  591. if (err)
  592. iser_err("Failed to disconnect, conn: 0x%p err %d\n",
  593. iser_conn, err);
  594. /* post an indication that all flush errors were consumed */
  595. err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
  596. if (err) {
  597. iser_err("conn %p failed to post beacon", ib_conn);
  598. return 1;
  599. }
  600. wait_for_completion(&ib_conn->flush_comp);
  601. }
  602. return 1;
  603. }
  604. /**
  605. * Called with state mutex held
  606. **/
  607. static void iser_connect_error(struct rdma_cm_id *cma_id)
  608. {
  609. struct iser_conn *iser_conn;
  610. iser_conn = (struct iser_conn *)cma_id->context;
  611. iser_conn->state = ISER_CONN_DOWN;
  612. }
  613. /**
  614. * Called with state mutex held
  615. **/
  616. static void iser_addr_handler(struct rdma_cm_id *cma_id)
  617. {
  618. struct iser_device *device;
  619. struct iser_conn *iser_conn;
  620. struct ib_conn *ib_conn;
  621. int ret;
  622. iser_conn = (struct iser_conn *)cma_id->context;
  623. if (iser_conn->state != ISER_CONN_PENDING)
  624. /* bailout */
  625. return;
  626. ib_conn = &iser_conn->ib_conn;
  627. device = iser_device_find_by_ib_device(cma_id);
  628. if (!device) {
  629. iser_err("device lookup/creation failed\n");
  630. iser_connect_error(cma_id);
  631. return;
  632. }
  633. ib_conn->device = device;
  634. /* connection T10-PI support */
  635. if (iser_pi_enable) {
  636. if (!(device->dev_attr.device_cap_flags &
  637. IB_DEVICE_SIGNATURE_HANDOVER)) {
  638. iser_warn("T10-PI requested but not supported on %s, "
  639. "continue without T10-PI\n",
  640. ib_conn->device->ib_device->name);
  641. ib_conn->pi_support = false;
  642. } else {
  643. ib_conn->pi_support = true;
  644. }
  645. }
  646. ret = rdma_resolve_route(cma_id, 1000);
  647. if (ret) {
  648. iser_err("resolve route failed: %d\n", ret);
  649. iser_connect_error(cma_id);
  650. return;
  651. }
  652. }
  653. /**
  654. * Called with state mutex held
  655. **/
  656. static void iser_route_handler(struct rdma_cm_id *cma_id)
  657. {
  658. struct rdma_conn_param conn_param;
  659. int ret;
  660. struct iser_cm_hdr req_hdr;
  661. struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
  662. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  663. struct iser_device *device = ib_conn->device;
  664. if (iser_conn->state != ISER_CONN_PENDING)
  665. /* bailout */
  666. return;
  667. ret = iser_create_ib_conn_res(ib_conn);
  668. if (ret)
  669. goto failure;
  670. memset(&conn_param, 0, sizeof conn_param);
  671. conn_param.responder_resources = device->dev_attr.max_qp_rd_atom;
  672. conn_param.initiator_depth = 1;
  673. conn_param.retry_count = 7;
  674. conn_param.rnr_retry_count = 6;
  675. memset(&req_hdr, 0, sizeof(req_hdr));
  676. req_hdr.flags = (ISER_ZBVA_NOT_SUPPORTED |
  677. ISER_SEND_W_INV_NOT_SUPPORTED);
  678. conn_param.private_data = (void *)&req_hdr;
  679. conn_param.private_data_len = sizeof(struct iser_cm_hdr);
  680. ret = rdma_connect(cma_id, &conn_param);
  681. if (ret) {
  682. iser_err("failure connecting: %d\n", ret);
  683. goto failure;
  684. }
  685. return;
  686. failure:
  687. iser_connect_error(cma_id);
  688. }
  689. static void iser_connected_handler(struct rdma_cm_id *cma_id)
  690. {
  691. struct iser_conn *iser_conn;
  692. struct ib_qp_attr attr;
  693. struct ib_qp_init_attr init_attr;
  694. iser_conn = (struct iser_conn *)cma_id->context;
  695. if (iser_conn->state != ISER_CONN_PENDING)
  696. /* bailout */
  697. return;
  698. (void)ib_query_qp(cma_id->qp, &attr, ~0, &init_attr);
  699. iser_info("remote qpn:%x my qpn:%x\n", attr.dest_qp_num, cma_id->qp->qp_num);
  700. iser_conn->state = ISER_CONN_UP;
  701. complete(&iser_conn->up_completion);
  702. }
  703. static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
  704. {
  705. struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
  706. if (iser_conn_terminate(iser_conn)) {
  707. if (iser_conn->iscsi_conn)
  708. iscsi_conn_failure(iser_conn->iscsi_conn,
  709. ISCSI_ERR_CONN_FAILED);
  710. else
  711. iser_err("iscsi_iser connection isn't bound\n");
  712. }
  713. }
  714. static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
  715. bool destroy_device)
  716. {
  717. struct iser_conn *iser_conn = (struct iser_conn *)cma_id->context;
  718. /*
  719. * We are not guaranteed that we visited disconnected_handler
  720. * by now, call it here to be safe that we handle CM drep
  721. * and flush errors.
  722. */
  723. iser_disconnected_handler(cma_id);
  724. iser_free_ib_conn_res(iser_conn, destroy_device);
  725. complete(&iser_conn->ib_completion);
  726. };
  727. static int iser_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
  728. {
  729. struct iser_conn *iser_conn;
  730. int ret = 0;
  731. iser_conn = (struct iser_conn *)cma_id->context;
  732. iser_info("event %d status %d conn %p id %p\n",
  733. event->event, event->status, cma_id->context, cma_id);
  734. mutex_lock(&iser_conn->state_mutex);
  735. switch (event->event) {
  736. case RDMA_CM_EVENT_ADDR_RESOLVED:
  737. iser_addr_handler(cma_id);
  738. break;
  739. case RDMA_CM_EVENT_ROUTE_RESOLVED:
  740. iser_route_handler(cma_id);
  741. break;
  742. case RDMA_CM_EVENT_ESTABLISHED:
  743. iser_connected_handler(cma_id);
  744. break;
  745. case RDMA_CM_EVENT_ADDR_ERROR:
  746. case RDMA_CM_EVENT_ROUTE_ERROR:
  747. case RDMA_CM_EVENT_CONNECT_ERROR:
  748. case RDMA_CM_EVENT_UNREACHABLE:
  749. case RDMA_CM_EVENT_REJECTED:
  750. iser_connect_error(cma_id);
  751. break;
  752. case RDMA_CM_EVENT_DISCONNECTED:
  753. case RDMA_CM_EVENT_ADDR_CHANGE:
  754. case RDMA_CM_EVENT_TIMEWAIT_EXIT:
  755. iser_cleanup_handler(cma_id, false);
  756. break;
  757. case RDMA_CM_EVENT_DEVICE_REMOVAL:
  758. /*
  759. * we *must* destroy the device as we cannot rely
  760. * on iscsid to be around to initiate error handling.
  761. * also if we are not in state DOWN implicitly destroy
  762. * the cma_id.
  763. */
  764. iser_cleanup_handler(cma_id, true);
  765. if (iser_conn->state != ISER_CONN_DOWN) {
  766. iser_conn->ib_conn.cma_id = NULL;
  767. ret = 1;
  768. }
  769. break;
  770. default:
  771. iser_err("Unexpected RDMA CM event (%d)\n", event->event);
  772. break;
  773. }
  774. mutex_unlock(&iser_conn->state_mutex);
  775. return ret;
  776. }
  777. void iser_conn_init(struct iser_conn *iser_conn)
  778. {
  779. iser_conn->state = ISER_CONN_INIT;
  780. iser_conn->ib_conn.post_recv_buf_count = 0;
  781. init_completion(&iser_conn->ib_conn.flush_comp);
  782. init_completion(&iser_conn->stop_completion);
  783. init_completion(&iser_conn->ib_completion);
  784. init_completion(&iser_conn->up_completion);
  785. INIT_LIST_HEAD(&iser_conn->conn_list);
  786. spin_lock_init(&iser_conn->ib_conn.lock);
  787. mutex_init(&iser_conn->state_mutex);
  788. }
  789. /**
  790. * starts the process of connecting to the target
  791. * sleeps until the connection is established or rejected
  792. */
  793. int iser_connect(struct iser_conn *iser_conn,
  794. struct sockaddr *src_addr,
  795. struct sockaddr *dst_addr,
  796. int non_blocking)
  797. {
  798. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  799. int err = 0;
  800. mutex_lock(&iser_conn->state_mutex);
  801. sprintf(iser_conn->name, "%pISp", dst_addr);
  802. iser_info("connecting to: %s\n", iser_conn->name);
  803. /* the device is known only --after-- address resolution */
  804. ib_conn->device = NULL;
  805. iser_conn->state = ISER_CONN_PENDING;
  806. ib_conn->beacon.wr_id = ISER_BEACON_WRID;
  807. ib_conn->beacon.opcode = IB_WR_SEND;
  808. ib_conn->cma_id = rdma_create_id(iser_cma_handler,
  809. (void *)iser_conn,
  810. RDMA_PS_TCP, IB_QPT_RC);
  811. if (IS_ERR(ib_conn->cma_id)) {
  812. err = PTR_ERR(ib_conn->cma_id);
  813. iser_err("rdma_create_id failed: %d\n", err);
  814. goto id_failure;
  815. }
  816. err = rdma_resolve_addr(ib_conn->cma_id, src_addr, dst_addr, 1000);
  817. if (err) {
  818. iser_err("rdma_resolve_addr failed: %d\n", err);
  819. goto addr_failure;
  820. }
  821. if (!non_blocking) {
  822. wait_for_completion_interruptible(&iser_conn->up_completion);
  823. if (iser_conn->state != ISER_CONN_UP) {
  824. err = -EIO;
  825. goto connect_failure;
  826. }
  827. }
  828. mutex_unlock(&iser_conn->state_mutex);
  829. mutex_lock(&ig.connlist_mutex);
  830. list_add(&iser_conn->conn_list, &ig.connlist);
  831. mutex_unlock(&ig.connlist_mutex);
  832. return 0;
  833. id_failure:
  834. ib_conn->cma_id = NULL;
  835. addr_failure:
  836. iser_conn->state = ISER_CONN_DOWN;
  837. connect_failure:
  838. mutex_unlock(&iser_conn->state_mutex);
  839. iser_conn_release(iser_conn);
  840. return err;
  841. }
  842. /**
  843. * iser_reg_page_vec - Register physical memory
  844. *
  845. * returns: 0 on success, errno code on failure
  846. */
  847. int iser_reg_page_vec(struct ib_conn *ib_conn,
  848. struct iser_page_vec *page_vec,
  849. struct iser_mem_reg *mem_reg)
  850. {
  851. struct ib_pool_fmr *mem;
  852. u64 io_addr;
  853. u64 *page_list;
  854. int status;
  855. page_list = page_vec->pages;
  856. io_addr = page_list[0];
  857. mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
  858. page_list,
  859. page_vec->length,
  860. io_addr);
  861. if (IS_ERR(mem)) {
  862. status = (int)PTR_ERR(mem);
  863. iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
  864. return status;
  865. }
  866. mem_reg->lkey = mem->fmr->lkey;
  867. mem_reg->rkey = mem->fmr->rkey;
  868. mem_reg->len = page_vec->length * SIZE_4K;
  869. mem_reg->va = io_addr;
  870. mem_reg->mem_h = (void *)mem;
  871. mem_reg->va += page_vec->offset;
  872. mem_reg->len = page_vec->data_size;
  873. iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
  874. "entry[0]: (0x%08lx,%ld)] -> "
  875. "[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
  876. page_vec, page_vec->length,
  877. (unsigned long)page_vec->pages[0],
  878. (unsigned long)page_vec->data_size,
  879. (unsigned int)mem_reg->lkey, mem_reg->mem_h,
  880. (unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
  881. return 0;
  882. }
  883. /**
  884. * Unregister (previosuly registered using FMR) memory.
  885. * If memory is non-FMR does nothing.
  886. */
  887. void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
  888. enum iser_data_dir cmd_dir)
  889. {
  890. struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
  891. int ret;
  892. if (!reg->mem_h)
  893. return;
  894. iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
  895. ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
  896. if (ret)
  897. iser_err("ib_fmr_pool_unmap failed %d\n", ret);
  898. reg->mem_h = NULL;
  899. }
  900. void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
  901. enum iser_data_dir cmd_dir)
  902. {
  903. struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
  904. struct iser_conn *iser_conn = iser_task->iser_conn;
  905. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  906. struct fast_reg_descriptor *desc = reg->mem_h;
  907. if (!desc)
  908. return;
  909. reg->mem_h = NULL;
  910. spin_lock_bh(&ib_conn->lock);
  911. list_add_tail(&desc->list, &ib_conn->fastreg.pool);
  912. spin_unlock_bh(&ib_conn->lock);
  913. }
  914. int iser_post_recvl(struct iser_conn *iser_conn)
  915. {
  916. struct ib_recv_wr rx_wr, *rx_wr_failed;
  917. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  918. struct ib_sge sge;
  919. int ib_ret;
  920. sge.addr = iser_conn->login_resp_dma;
  921. sge.length = ISER_RX_LOGIN_SIZE;
  922. sge.lkey = ib_conn->device->mr->lkey;
  923. rx_wr.wr_id = (uintptr_t)iser_conn->login_resp_buf;
  924. rx_wr.sg_list = &sge;
  925. rx_wr.num_sge = 1;
  926. rx_wr.next = NULL;
  927. ib_conn->post_recv_buf_count++;
  928. ib_ret = ib_post_recv(ib_conn->qp, &rx_wr, &rx_wr_failed);
  929. if (ib_ret) {
  930. iser_err("ib_post_recv failed ret=%d\n", ib_ret);
  931. ib_conn->post_recv_buf_count--;
  932. }
  933. return ib_ret;
  934. }
  935. int iser_post_recvm(struct iser_conn *iser_conn, int count)
  936. {
  937. struct ib_recv_wr *rx_wr, *rx_wr_failed;
  938. int i, ib_ret;
  939. struct ib_conn *ib_conn = &iser_conn->ib_conn;
  940. unsigned int my_rx_head = iser_conn->rx_desc_head;
  941. struct iser_rx_desc *rx_desc;
  942. for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
  943. rx_desc = &iser_conn->rx_descs[my_rx_head];
  944. rx_wr->wr_id = (uintptr_t)rx_desc;
  945. rx_wr->sg_list = &rx_desc->rx_sg;
  946. rx_wr->num_sge = 1;
  947. rx_wr->next = rx_wr + 1;
  948. my_rx_head = (my_rx_head + 1) & iser_conn->qp_max_recv_dtos_mask;
  949. }
  950. rx_wr--;
  951. rx_wr->next = NULL; /* mark end of work requests list */
  952. ib_conn->post_recv_buf_count += count;
  953. ib_ret = ib_post_recv(ib_conn->qp, ib_conn->rx_wr, &rx_wr_failed);
  954. if (ib_ret) {
  955. iser_err("ib_post_recv failed ret=%d\n", ib_ret);
  956. ib_conn->post_recv_buf_count -= count;
  957. } else
  958. iser_conn->rx_desc_head = my_rx_head;
  959. return ib_ret;
  960. }
  961. /**
  962. * iser_start_send - Initiate a Send DTO operation
  963. *
  964. * returns 0 on success, -1 on failure
  965. */
  966. int iser_post_send(struct ib_conn *ib_conn, struct iser_tx_desc *tx_desc,
  967. bool signal)
  968. {
  969. int ib_ret;
  970. struct ib_send_wr send_wr, *send_wr_failed;
  971. ib_dma_sync_single_for_device(ib_conn->device->ib_device,
  972. tx_desc->dma_addr, ISER_HEADERS_LEN,
  973. DMA_TO_DEVICE);
  974. send_wr.next = NULL;
  975. send_wr.wr_id = (uintptr_t)tx_desc;
  976. send_wr.sg_list = tx_desc->tx_sg;
  977. send_wr.num_sge = tx_desc->num_sge;
  978. send_wr.opcode = IB_WR_SEND;
  979. send_wr.send_flags = signal ? IB_SEND_SIGNALED : 0;
  980. ib_ret = ib_post_send(ib_conn->qp, &send_wr, &send_wr_failed);
  981. if (ib_ret)
  982. iser_err("ib_post_send failed, ret:%d\n", ib_ret);
  983. return ib_ret;
  984. }
  985. /**
  986. * is_iser_tx_desc - Indicate if the completion wr_id
  987. * is a TX descriptor or not.
  988. * @iser_conn: iser connection
  989. * @wr_id: completion WR identifier
  990. *
  991. * Since we cannot rely on wc opcode in FLUSH errors
  992. * we must work around it by checking if the wr_id address
  993. * falls in the iser connection rx_descs buffer. If so
  994. * it is an RX descriptor, otherwize it is a TX.
  995. */
  996. static inline bool
  997. is_iser_tx_desc(struct iser_conn *iser_conn, void *wr_id)
  998. {
  999. void *start = iser_conn->rx_descs;
  1000. int len = iser_conn->num_rx_descs * sizeof(*iser_conn->rx_descs);
  1001. if (wr_id >= start && wr_id < start + len)
  1002. return false;
  1003. return true;
  1004. }
  1005. /**
  1006. * iser_handle_comp_error() - Handle error completion
  1007. * @ib_conn: connection RDMA resources
  1008. * @wc: work completion
  1009. *
  1010. * Notes: We may handle a FLUSH error completion and in this case
  1011. * we only cleanup in case TX type was DATAOUT. For non-FLUSH
  1012. * error completion we should also notify iscsi layer that
  1013. * connection is failed (in case we passed bind stage).
  1014. */
  1015. static void
  1016. iser_handle_comp_error(struct ib_conn *ib_conn,
  1017. struct ib_wc *wc)
  1018. {
  1019. void *wr_id = (void *)(uintptr_t)wc->wr_id;
  1020. struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
  1021. ib_conn);
  1022. if (wc->status != IB_WC_WR_FLUSH_ERR)
  1023. if (iser_conn->iscsi_conn)
  1024. iscsi_conn_failure(iser_conn->iscsi_conn,
  1025. ISCSI_ERR_CONN_FAILED);
  1026. if (is_iser_tx_desc(iser_conn, wr_id)) {
  1027. struct iser_tx_desc *desc = wr_id;
  1028. if (desc->type == ISCSI_TX_DATAOUT)
  1029. kmem_cache_free(ig.desc_cache, desc);
  1030. } else {
  1031. ib_conn->post_recv_buf_count--;
  1032. }
  1033. }
  1034. /**
  1035. * iser_handle_wc - handle a single work completion
  1036. * @wc: work completion
  1037. *
  1038. * Soft-IRQ context, work completion can be either
  1039. * SEND or RECV, and can turn out successful or
  1040. * with error (or flush error).
  1041. */
  1042. static void iser_handle_wc(struct ib_wc *wc)
  1043. {
  1044. struct ib_conn *ib_conn;
  1045. struct iser_tx_desc *tx_desc;
  1046. struct iser_rx_desc *rx_desc;
  1047. ib_conn = wc->qp->qp_context;
  1048. if (likely(wc->status == IB_WC_SUCCESS)) {
  1049. if (wc->opcode == IB_WC_RECV) {
  1050. rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
  1051. iser_rcv_completion(rx_desc, wc->byte_len,
  1052. ib_conn);
  1053. } else
  1054. if (wc->opcode == IB_WC_SEND) {
  1055. tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
  1056. iser_snd_completion(tx_desc, ib_conn);
  1057. } else {
  1058. iser_err("Unknown wc opcode %d\n", wc->opcode);
  1059. }
  1060. } else {
  1061. if (wc->status != IB_WC_WR_FLUSH_ERR)
  1062. iser_err("wr id %llx status %d vend_err %x\n",
  1063. wc->wr_id, wc->status, wc->vendor_err);
  1064. else
  1065. iser_dbg("flush error: wr id %llx\n", wc->wr_id);
  1066. if (wc->wr_id != ISER_FASTREG_LI_WRID &&
  1067. wc->wr_id != ISER_BEACON_WRID)
  1068. iser_handle_comp_error(ib_conn, wc);
  1069. /* complete in case all flush errors were consumed */
  1070. if (wc->wr_id == ISER_BEACON_WRID)
  1071. complete(&ib_conn->flush_comp);
  1072. }
  1073. }
  1074. /**
  1075. * iser_cq_tasklet_fn - iSER completion polling loop
  1076. * @data: iSER completion context
  1077. *
  1078. * Soft-IRQ context, polling connection CQ until
  1079. * either CQ was empty or we exausted polling budget
  1080. */
  1081. static void iser_cq_tasklet_fn(unsigned long data)
  1082. {
  1083. struct iser_comp *comp = (struct iser_comp *)data;
  1084. struct ib_cq *cq = comp->cq;
  1085. struct ib_wc *const wcs = comp->wcs;
  1086. int i, n, completed = 0;
  1087. while ((n = ib_poll_cq(cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
  1088. for (i = 0; i < n; i++)
  1089. iser_handle_wc(&wcs[i]);
  1090. completed += n;
  1091. if (completed >= iser_cq_poll_limit)
  1092. break;
  1093. }
  1094. /*
  1095. * It is assumed here that arming CQ only once its empty
  1096. * would not cause interrupts to be missed.
  1097. */
  1098. ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
  1099. iser_dbg("got %d completions\n", completed);
  1100. }
  1101. static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
  1102. {
  1103. struct iser_comp *comp = cq_context;
  1104. tasklet_schedule(&comp->tasklet);
  1105. }
  1106. u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
  1107. enum iser_data_dir cmd_dir, sector_t *sector)
  1108. {
  1109. struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
  1110. struct fast_reg_descriptor *desc = reg->mem_h;
  1111. unsigned long sector_size = iser_task->sc->device->sector_size;
  1112. struct ib_mr_status mr_status;
  1113. int ret;
  1114. if (desc && desc->reg_indicators & ISER_FASTREG_PROTECTED) {
  1115. desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
  1116. ret = ib_check_mr_status(desc->pi_ctx->sig_mr,
  1117. IB_MR_CHECK_SIG_STATUS, &mr_status);
  1118. if (ret) {
  1119. pr_err("ib_check_mr_status failed, ret %d\n", ret);
  1120. goto err;
  1121. }
  1122. if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
  1123. sector_t sector_off = mr_status.sig_err.sig_err_offset;
  1124. do_div(sector_off, sector_size + 8);
  1125. *sector = scsi_get_lba(iser_task->sc) + sector_off;
  1126. pr_err("PI error found type %d at sector %llx "
  1127. "expected %x vs actual %x\n",
  1128. mr_status.sig_err.err_type,
  1129. (unsigned long long)*sector,
  1130. mr_status.sig_err.expected,
  1131. mr_status.sig_err.actual);
  1132. switch (mr_status.sig_err.err_type) {
  1133. case IB_SIG_BAD_GUARD:
  1134. return 0x1;
  1135. case IB_SIG_BAD_REFTAG:
  1136. return 0x3;
  1137. case IB_SIG_BAD_APPTAG:
  1138. return 0x2;
  1139. }
  1140. }
  1141. }
  1142. return 0;
  1143. err:
  1144. /* Not alot we can do here, return ambiguous guard error */
  1145. return 0x1;
  1146. }