smc_clc.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. /*
  2. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  3. *
  4. * CLC (connection layer control) handshake over initial TCP socket to
  5. * prepare for RDMA traffic
  6. *
  7. * Copyright IBM Corp. 2016
  8. *
  9. * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
  10. */
  11. #include <linux/in.h>
  12. #include <linux/if_ether.h>
  13. #include <linux/sched/signal.h>
  14. #include <net/sock.h>
  15. #include <net/tcp.h>
  16. #include "smc.h"
  17. #include "smc_core.h"
  18. #include "smc_clc.h"
  19. #include "smc_ib.h"
  20. /* Wait for data on the tcp-socket, analyze received data
  21. * Returns:
  22. * 0 if success and it was not a decline that we received.
  23. * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send.
  24. * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise.
  25. */
  26. int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
  27. u8 expected_type)
  28. {
  29. struct sock *clc_sk = smc->clcsock->sk;
  30. struct smc_clc_msg_hdr *clcm = buf;
  31. struct msghdr msg = {NULL, 0};
  32. int reason_code = 0;
  33. struct kvec vec;
  34. int len, datlen;
  35. int krflags;
  36. /* peek the first few bytes to determine length of data to receive
  37. * so we don't consume any subsequent CLC message or payload data
  38. * in the TCP byte stream
  39. */
  40. vec.iov_base = buf;
  41. vec.iov_len = buflen;
  42. krflags = MSG_PEEK | MSG_WAITALL;
  43. smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
  44. len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1,
  45. sizeof(struct smc_clc_msg_hdr), krflags);
  46. if (signal_pending(current)) {
  47. reason_code = -EINTR;
  48. clc_sk->sk_err = EINTR;
  49. smc->sk.sk_err = EINTR;
  50. goto out;
  51. }
  52. if (clc_sk->sk_err) {
  53. reason_code = -clc_sk->sk_err;
  54. smc->sk.sk_err = clc_sk->sk_err;
  55. goto out;
  56. }
  57. if (!len) { /* peer has performed orderly shutdown */
  58. smc->sk.sk_err = ECONNRESET;
  59. reason_code = -ECONNRESET;
  60. goto out;
  61. }
  62. if (len < 0) {
  63. smc->sk.sk_err = -len;
  64. reason_code = len;
  65. goto out;
  66. }
  67. datlen = ntohs(clcm->length);
  68. if ((len < sizeof(struct smc_clc_msg_hdr)) ||
  69. (datlen < sizeof(struct smc_clc_msg_decline)) ||
  70. (datlen > sizeof(struct smc_clc_msg_accept_confirm)) ||
  71. memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) ||
  72. ((clcm->type != SMC_CLC_DECLINE) &&
  73. (clcm->type != expected_type))) {
  74. smc->sk.sk_err = EPROTO;
  75. reason_code = -EPROTO;
  76. goto out;
  77. }
  78. /* receive the complete CLC message */
  79. vec.iov_base = buf;
  80. vec.iov_len = buflen;
  81. memset(&msg, 0, sizeof(struct msghdr));
  82. krflags = MSG_WAITALL;
  83. smc->clcsock->sk->sk_rcvtimeo = CLC_WAIT_TIME;
  84. len = kernel_recvmsg(smc->clcsock, &msg, &vec, 1, datlen, krflags);
  85. if (len < datlen) {
  86. smc->sk.sk_err = EPROTO;
  87. reason_code = -EPROTO;
  88. goto out;
  89. }
  90. if (clcm->type == SMC_CLC_DECLINE) {
  91. reason_code = SMC_CLC_DECL_REPLY;
  92. if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis)
  93. == SMC_CLC_DECL_SYNCERR)
  94. smc->conn.lgr->sync_err = true;
  95. }
  96. out:
  97. return reason_code;
  98. }
  99. /* send CLC DECLINE message across internal TCP socket */
  100. int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info,
  101. u8 out_of_sync)
  102. {
  103. struct smc_clc_msg_decline dclc;
  104. struct msghdr msg;
  105. struct kvec vec;
  106. int len;
  107. memset(&dclc, 0, sizeof(dclc));
  108. memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  109. dclc.hdr.type = SMC_CLC_DECLINE;
  110. dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline));
  111. dclc.hdr.version = SMC_CLC_V1;
  112. dclc.hdr.flag = out_of_sync ? 1 : 0;
  113. memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid));
  114. dclc.peer_diagnosis = htonl(peer_diag_info);
  115. memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  116. memset(&msg, 0, sizeof(msg));
  117. vec.iov_base = &dclc;
  118. vec.iov_len = sizeof(struct smc_clc_msg_decline);
  119. len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1,
  120. sizeof(struct smc_clc_msg_decline));
  121. if (len < sizeof(struct smc_clc_msg_decline))
  122. smc->sk.sk_err = EPROTO;
  123. if (len < 0)
  124. smc->sk.sk_err = -len;
  125. return len;
  126. }
  127. /* send CLC PROPOSAL message across internal TCP socket */
  128. int smc_clc_send_proposal(struct smc_sock *smc,
  129. struct smc_ib_device *smcibdev,
  130. u8 ibport)
  131. {
  132. struct smc_clc_msg_proposal pclc;
  133. int reason_code = 0;
  134. struct msghdr msg;
  135. struct kvec vec;
  136. int len, rc;
  137. /* send SMC Proposal CLC message */
  138. memset(&pclc, 0, sizeof(pclc));
  139. memcpy(pclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  140. pclc.hdr.type = SMC_CLC_PROPOSAL;
  141. pclc.hdr.length = htons(sizeof(pclc));
  142. pclc.hdr.version = SMC_CLC_V1; /* SMC version */
  143. memcpy(pclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
  144. memcpy(&pclc.lcl.gid, &smcibdev->gid[ibport - 1], SMC_GID_SIZE);
  145. memcpy(&pclc.lcl.mac, &smcibdev->mac[ibport - 1], ETH_ALEN);
  146. /* determine subnet and mask from internal TCP socket */
  147. rc = smc_netinfo_by_tcpsk(smc->clcsock, &pclc.outgoing_subnet,
  148. &pclc.prefix_len);
  149. if (rc)
  150. return SMC_CLC_DECL_CNFERR; /* configuration error */
  151. memcpy(pclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  152. memset(&msg, 0, sizeof(msg));
  153. vec.iov_base = &pclc;
  154. vec.iov_len = sizeof(pclc);
  155. /* due to the few bytes needed for clc-handshake this cannot block */
  156. len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(pclc));
  157. if (len < sizeof(pclc)) {
  158. if (len >= 0) {
  159. reason_code = -ENETUNREACH;
  160. smc->sk.sk_err = -reason_code;
  161. } else {
  162. smc->sk.sk_err = smc->clcsock->sk->sk_err;
  163. reason_code = -smc->sk.sk_err;
  164. }
  165. }
  166. return reason_code;
  167. }
  168. /* send CLC CONFIRM message across internal TCP socket */
  169. int smc_clc_send_confirm(struct smc_sock *smc)
  170. {
  171. struct smc_connection *conn = &smc->conn;
  172. struct smc_clc_msg_accept_confirm cclc;
  173. struct smc_link *link;
  174. int reason_code = 0;
  175. struct msghdr msg;
  176. struct kvec vec;
  177. int len;
  178. link = &conn->lgr->lnk[SMC_SINGLE_LINK];
  179. /* send SMC Confirm CLC msg */
  180. memset(&cclc, 0, sizeof(cclc));
  181. memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  182. cclc.hdr.type = SMC_CLC_CONFIRM;
  183. cclc.hdr.length = htons(sizeof(cclc));
  184. cclc.hdr.version = SMC_CLC_V1; /* SMC version */
  185. memcpy(cclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
  186. memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
  187. SMC_GID_SIZE);
  188. memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
  189. hton24(cclc.qpn, link->roce_qp->qp_num);
  190. cclc.rmb_rkey =
  191. htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
  192. cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */
  193. cclc.rmbe_alert_token = htonl(conn->alert_token_local);
  194. cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
  195. cclc.rmbe_size = conn->rmbe_size_short;
  196. cclc.rmb_dma_addr = cpu_to_be64(
  197. (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
  198. hton24(cclc.psn, link->psn_initial);
  199. memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  200. memset(&msg, 0, sizeof(msg));
  201. vec.iov_base = &cclc;
  202. vec.iov_len = sizeof(cclc);
  203. len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, sizeof(cclc));
  204. if (len < sizeof(cclc)) {
  205. if (len >= 0) {
  206. reason_code = -ENETUNREACH;
  207. smc->sk.sk_err = -reason_code;
  208. } else {
  209. smc->sk.sk_err = smc->clcsock->sk->sk_err;
  210. reason_code = -smc->sk.sk_err;
  211. }
  212. }
  213. return reason_code;
  214. }
  215. /* send CLC ACCEPT message across internal TCP socket */
  216. int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
  217. {
  218. struct smc_connection *conn = &new_smc->conn;
  219. struct smc_clc_msg_accept_confirm aclc;
  220. struct smc_link *link;
  221. struct msghdr msg;
  222. struct kvec vec;
  223. int rc = 0;
  224. int len;
  225. link = &conn->lgr->lnk[SMC_SINGLE_LINK];
  226. memset(&aclc, 0, sizeof(aclc));
  227. memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  228. aclc.hdr.type = SMC_CLC_ACCEPT;
  229. aclc.hdr.length = htons(sizeof(aclc));
  230. aclc.hdr.version = SMC_CLC_V1; /* SMC version */
  231. if (srv_first_contact)
  232. aclc.hdr.flag = 1;
  233. memcpy(aclc.lcl.id_for_peer, local_systemid, sizeof(local_systemid));
  234. memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
  235. SMC_GID_SIZE);
  236. memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN);
  237. hton24(aclc.qpn, link->roce_qp->qp_num);
  238. aclc.rmb_rkey =
  239. htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
  240. aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */
  241. aclc.rmbe_alert_token = htonl(conn->alert_token_local);
  242. aclc.qp_mtu = link->path_mtu;
  243. aclc.rmbe_size = conn->rmbe_size_short,
  244. aclc.rmb_dma_addr = cpu_to_be64(
  245. (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
  246. hton24(aclc.psn, link->psn_initial);
  247. memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER));
  248. memset(&msg, 0, sizeof(msg));
  249. vec.iov_base = &aclc;
  250. vec.iov_len = sizeof(aclc);
  251. len = kernel_sendmsg(new_smc->clcsock, &msg, &vec, 1, sizeof(aclc));
  252. if (len < sizeof(aclc)) {
  253. if (len >= 0)
  254. new_smc->sk.sk_err = EPROTO;
  255. else
  256. new_smc->sk.sk_err = new_smc->clcsock->sk->sk_err;
  257. rc = sock_error(&new_smc->sk);
  258. }
  259. return rc;
  260. }