gsi.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548
  1. /*
  2. * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include "mlx5_ib.h"
  33. struct mlx5_ib_gsi_wr {
  34. struct ib_cqe cqe;
  35. struct ib_wc wc;
  36. int send_flags;
  37. bool completed:1;
  38. };
  39. struct mlx5_ib_gsi_qp {
  40. struct ib_qp ibqp;
  41. struct ib_qp *rx_qp;
  42. u8 port_num;
  43. struct ib_qp_cap cap;
  44. enum ib_sig_type sq_sig_type;
  45. /* Serialize qp state modifications */
  46. struct mutex mutex;
  47. struct ib_cq *cq;
  48. struct mlx5_ib_gsi_wr *outstanding_wrs;
  49. u32 outstanding_pi, outstanding_ci;
  50. int num_qps;
  51. /* Protects access to the tx_qps. Post send operations synchronize
  52. * with tx_qp creation in setup_qp(). Also protects the
  53. * outstanding_wrs array and indices.
  54. */
  55. spinlock_t lock;
  56. struct ib_qp **tx_qps;
  57. };
  58. static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
  59. {
  60. return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
  61. }
  62. static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
  63. {
  64. return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
  65. }
  66. static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
  67. {
  68. return ++index % gsi->cap.max_send_wr;
  69. }
  70. #define for_each_outstanding_wr(gsi, index) \
  71. for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
  72. index = next_outstanding(gsi, index))
  73. /* Call with gsi->lock locked */
  74. static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
  75. {
  76. struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
  77. struct mlx5_ib_gsi_wr *wr;
  78. u32 index;
  79. for_each_outstanding_wr(gsi, index) {
  80. wr = &gsi->outstanding_wrs[index];
  81. if (!wr->completed)
  82. break;
  83. if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
  84. wr->send_flags & IB_SEND_SIGNALED)
  85. WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
  86. wr->completed = false;
  87. }
  88. gsi->outstanding_ci = index;
  89. }
  90. static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
  91. {
  92. struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
  93. struct mlx5_ib_gsi_wr *wr =
  94. container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
  95. u64 wr_id;
  96. unsigned long flags;
  97. spin_lock_irqsave(&gsi->lock, flags);
  98. wr->completed = true;
  99. wr_id = wr->wc.wr_id;
  100. wr->wc = *wc;
  101. wr->wc.wr_id = wr_id;
  102. wr->wc.qp = &gsi->ibqp;
  103. generate_completions(gsi);
  104. spin_unlock_irqrestore(&gsi->lock, flags);
  105. }
  106. struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
  107. struct ib_qp_init_attr *init_attr)
  108. {
  109. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  110. struct mlx5_ib_gsi_qp *gsi;
  111. struct ib_qp_init_attr hw_init_attr = *init_attr;
  112. const u8 port_num = init_attr->port_num;
  113. const int num_pkeys = pd->device->attrs.max_pkeys;
  114. const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
  115. int ret;
  116. mlx5_ib_dbg(dev, "creating GSI QP\n");
  117. if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
  118. mlx5_ib_warn(dev,
  119. "invalid port number %d during GSI QP creation\n",
  120. port_num);
  121. return ERR_PTR(-EINVAL);
  122. }
  123. gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
  124. if (!gsi)
  125. return ERR_PTR(-ENOMEM);
  126. gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
  127. if (!gsi->tx_qps) {
  128. ret = -ENOMEM;
  129. goto err_free;
  130. }
  131. gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
  132. sizeof(*gsi->outstanding_wrs),
  133. GFP_KERNEL);
  134. if (!gsi->outstanding_wrs) {
  135. ret = -ENOMEM;
  136. goto err_free_tx;
  137. }
  138. mutex_init(&gsi->mutex);
  139. mutex_lock(&dev->devr.mutex);
  140. if (dev->devr.ports[port_num - 1].gsi) {
  141. mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
  142. port_num);
  143. ret = -EBUSY;
  144. goto err_free_wrs;
  145. }
  146. gsi->num_qps = num_qps;
  147. spin_lock_init(&gsi->lock);
  148. gsi->cap = init_attr->cap;
  149. gsi->sq_sig_type = init_attr->sq_sig_type;
  150. gsi->ibqp.qp_num = 1;
  151. gsi->port_num = port_num;
  152. gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
  153. IB_POLL_SOFTIRQ);
  154. if (IS_ERR(gsi->cq)) {
  155. mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
  156. PTR_ERR(gsi->cq));
  157. ret = PTR_ERR(gsi->cq);
  158. goto err_free_wrs;
  159. }
  160. hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
  161. hw_init_attr.send_cq = gsi->cq;
  162. if (num_qps) {
  163. hw_init_attr.cap.max_send_wr = 0;
  164. hw_init_attr.cap.max_send_sge = 0;
  165. hw_init_attr.cap.max_inline_data = 0;
  166. }
  167. gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
  168. if (IS_ERR(gsi->rx_qp)) {
  169. mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
  170. PTR_ERR(gsi->rx_qp));
  171. ret = PTR_ERR(gsi->rx_qp);
  172. goto err_destroy_cq;
  173. }
  174. dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
  175. mutex_unlock(&dev->devr.mutex);
  176. return &gsi->ibqp;
  177. err_destroy_cq:
  178. ib_free_cq(gsi->cq);
  179. err_free_wrs:
  180. mutex_unlock(&dev->devr.mutex);
  181. kfree(gsi->outstanding_wrs);
  182. err_free_tx:
  183. kfree(gsi->tx_qps);
  184. err_free:
  185. kfree(gsi);
  186. return ERR_PTR(ret);
  187. }
  188. int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
  189. {
  190. struct mlx5_ib_dev *dev = to_mdev(qp->device);
  191. struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
  192. const int port_num = gsi->port_num;
  193. int qp_index;
  194. int ret;
  195. mlx5_ib_dbg(dev, "destroying GSI QP\n");
  196. mutex_lock(&dev->devr.mutex);
  197. ret = ib_destroy_qp(gsi->rx_qp);
  198. if (ret) {
  199. mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
  200. ret);
  201. mutex_unlock(&dev->devr.mutex);
  202. return ret;
  203. }
  204. dev->devr.ports[port_num - 1].gsi = NULL;
  205. mutex_unlock(&dev->devr.mutex);
  206. gsi->rx_qp = NULL;
  207. for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
  208. if (!gsi->tx_qps[qp_index])
  209. continue;
  210. WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
  211. gsi->tx_qps[qp_index] = NULL;
  212. }
  213. ib_free_cq(gsi->cq);
  214. kfree(gsi->outstanding_wrs);
  215. kfree(gsi->tx_qps);
  216. kfree(gsi);
  217. return 0;
  218. }
  219. static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
  220. {
  221. struct ib_pd *pd = gsi->rx_qp->pd;
  222. struct ib_qp_init_attr init_attr = {
  223. .event_handler = gsi->rx_qp->event_handler,
  224. .qp_context = gsi->rx_qp->qp_context,
  225. .send_cq = gsi->cq,
  226. .recv_cq = gsi->rx_qp->recv_cq,
  227. .cap = {
  228. .max_send_wr = gsi->cap.max_send_wr,
  229. .max_send_sge = gsi->cap.max_send_sge,
  230. .max_inline_data = gsi->cap.max_inline_data,
  231. },
  232. .sq_sig_type = gsi->sq_sig_type,
  233. .qp_type = IB_QPT_UD,
  234. .create_flags = mlx5_ib_create_qp_sqpn_qp1(),
  235. };
  236. return ib_create_qp(pd, &init_attr);
  237. }
  238. static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
  239. u16 qp_index)
  240. {
  241. struct mlx5_ib_dev *dev = to_mdev(qp->device);
  242. struct ib_qp_attr attr;
  243. int mask;
  244. int ret;
  245. mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
  246. attr.qp_state = IB_QPS_INIT;
  247. attr.pkey_index = qp_index;
  248. attr.qkey = IB_QP1_QKEY;
  249. attr.port_num = gsi->port_num;
  250. ret = ib_modify_qp(qp, &attr, mask);
  251. if (ret) {
  252. mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
  253. qp->qp_num, ret);
  254. return ret;
  255. }
  256. attr.qp_state = IB_QPS_RTR;
  257. ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
  258. if (ret) {
  259. mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
  260. qp->qp_num, ret);
  261. return ret;
  262. }
  263. attr.qp_state = IB_QPS_RTS;
  264. attr.sq_psn = 0;
  265. ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
  266. if (ret) {
  267. mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
  268. qp->qp_num, ret);
  269. return ret;
  270. }
  271. return 0;
  272. }
  273. static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
  274. {
  275. struct ib_device *device = gsi->rx_qp->device;
  276. struct mlx5_ib_dev *dev = to_mdev(device);
  277. struct ib_qp *qp;
  278. unsigned long flags;
  279. u16 pkey;
  280. int ret;
  281. ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
  282. if (ret) {
  283. mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
  284. gsi->port_num, qp_index);
  285. return;
  286. }
  287. if (!pkey) {
  288. mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d. Skipping.\n",
  289. gsi->port_num, qp_index);
  290. return;
  291. }
  292. spin_lock_irqsave(&gsi->lock, flags);
  293. qp = gsi->tx_qps[qp_index];
  294. spin_unlock_irqrestore(&gsi->lock, flags);
  295. if (qp) {
  296. mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
  297. gsi->port_num, qp_index);
  298. return;
  299. }
  300. qp = create_gsi_ud_qp(gsi);
  301. if (IS_ERR(qp)) {
  302. mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
  303. PTR_ERR(qp));
  304. return;
  305. }
  306. ret = modify_to_rts(gsi, qp, qp_index);
  307. if (ret)
  308. goto err_destroy_qp;
  309. spin_lock_irqsave(&gsi->lock, flags);
  310. WARN_ON_ONCE(gsi->tx_qps[qp_index]);
  311. gsi->tx_qps[qp_index] = qp;
  312. spin_unlock_irqrestore(&gsi->lock, flags);
  313. return;
  314. err_destroy_qp:
  315. WARN_ON_ONCE(qp);
  316. }
  317. static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
  318. {
  319. u16 qp_index;
  320. for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
  321. setup_qp(gsi, qp_index);
  322. }
  323. int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
  324. int attr_mask)
  325. {
  326. struct mlx5_ib_dev *dev = to_mdev(qp->device);
  327. struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
  328. int ret;
  329. mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
  330. mutex_lock(&gsi->mutex);
  331. ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
  332. if (ret) {
  333. mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
  334. goto unlock;
  335. }
  336. if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
  337. setup_qps(gsi);
  338. unlock:
  339. mutex_unlock(&gsi->mutex);
  340. return ret;
  341. }
  342. int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
  343. int qp_attr_mask,
  344. struct ib_qp_init_attr *qp_init_attr)
  345. {
  346. struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
  347. int ret;
  348. mutex_lock(&gsi->mutex);
  349. ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
  350. qp_init_attr->cap = gsi->cap;
  351. mutex_unlock(&gsi->mutex);
  352. return ret;
  353. }
  354. /* Call with gsi->lock locked */
  355. static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
  356. struct ib_ud_wr *wr, struct ib_wc *wc)
  357. {
  358. struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
  359. struct mlx5_ib_gsi_wr *gsi_wr;
  360. if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
  361. mlx5_ib_warn(dev, "no available GSI work request.\n");
  362. return -ENOMEM;
  363. }
  364. gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
  365. gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
  366. if (!wc) {
  367. memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
  368. gsi_wr->wc.pkey_index = wr->pkey_index;
  369. gsi_wr->wc.wr_id = wr->wr.wr_id;
  370. } else {
  371. gsi_wr->wc = *wc;
  372. gsi_wr->completed = true;
  373. }
  374. gsi_wr->cqe.done = &handle_single_completion;
  375. wr->wr.wr_cqe = &gsi_wr->cqe;
  376. return 0;
  377. }
  378. /* Call with gsi->lock locked */
  379. static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
  380. struct ib_ud_wr *wr)
  381. {
  382. struct ib_wc wc = {
  383. { .wr_id = wr->wr.wr_id },
  384. .status = IB_WC_SUCCESS,
  385. .opcode = IB_WC_SEND,
  386. .qp = &gsi->ibqp,
  387. };
  388. int ret;
  389. ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
  390. if (ret)
  391. return ret;
  392. generate_completions(gsi);
  393. return 0;
  394. }
  395. /* Call with gsi->lock locked */
  396. static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
  397. {
  398. struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
  399. int qp_index = wr->pkey_index;
  400. if (!mlx5_ib_deth_sqpn_cap(dev))
  401. return gsi->rx_qp;
  402. if (qp_index >= gsi->num_qps)
  403. return NULL;
  404. return gsi->tx_qps[qp_index];
  405. }
  406. int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
  407. struct ib_send_wr **bad_wr)
  408. {
  409. struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
  410. struct ib_qp *tx_qp;
  411. unsigned long flags;
  412. int ret;
  413. for (; wr; wr = wr->next) {
  414. struct ib_ud_wr cur_wr = *ud_wr(wr);
  415. cur_wr.wr.next = NULL;
  416. spin_lock_irqsave(&gsi->lock, flags);
  417. tx_qp = get_tx_qp(gsi, &cur_wr);
  418. if (!tx_qp) {
  419. ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
  420. if (ret)
  421. goto err;
  422. spin_unlock_irqrestore(&gsi->lock, flags);
  423. continue;
  424. }
  425. ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
  426. if (ret)
  427. goto err;
  428. ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
  429. if (ret) {
  430. /* Undo the effect of adding the outstanding wr */
  431. gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
  432. gsi->cap.max_send_wr;
  433. goto err;
  434. }
  435. spin_unlock_irqrestore(&gsi->lock, flags);
  436. }
  437. return 0;
  438. err:
  439. spin_unlock_irqrestore(&gsi->lock, flags);
  440. *bad_wr = wr;
  441. return ret;
  442. }
  443. int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
  444. struct ib_recv_wr **bad_wr)
  445. {
  446. struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
  447. return ib_post_recv(gsi->rx_qp, wr, bad_wr);
  448. }
  449. void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
  450. {
  451. if (!gsi)
  452. return;
  453. mutex_lock(&gsi->mutex);
  454. setup_qps(gsi);
  455. mutex_unlock(&gsi->mutex);
  456. }