smc_close.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468
  1. /*
  2. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  3. *
  4. * Socket Closing - normal and abnormal
  5. *
  6. * Copyright IBM Corp. 2016
  7. *
  8. * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
  9. */
  10. #include <linux/workqueue.h>
  11. #include <linux/sched/signal.h>
  12. #include <net/sock.h>
  13. #include "smc.h"
  14. #include "smc_tx.h"
  15. #include "smc_cdc.h"
  16. #include "smc_close.h"
  17. #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
  18. static void smc_close_cleanup_listen(struct sock *parent)
  19. {
  20. struct sock *sk;
  21. /* Close non-accepted connections */
  22. while ((sk = smc_accept_dequeue(parent, NULL)))
  23. smc_close_non_accepted(sk);
  24. }
  25. static void smc_close_wait_tx_pends(struct smc_sock *smc)
  26. {
  27. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  28. struct sock *sk = &smc->sk;
  29. signed long timeout;
  30. timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
  31. add_wait_queue(sk_sleep(sk), &wait);
  32. while (!signal_pending(current) && timeout) {
  33. int rc;
  34. rc = sk_wait_event(sk, &timeout,
  35. !smc_cdc_tx_has_pending(&smc->conn),
  36. &wait);
  37. if (rc)
  38. break;
  39. }
  40. remove_wait_queue(sk_sleep(sk), &wait);
  41. }
  42. /* wait for sndbuf data being transmitted */
  43. static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
  44. {
  45. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  46. struct sock *sk = &smc->sk;
  47. if (!timeout)
  48. return;
  49. if (!smc_tx_prepared_sends(&smc->conn))
  50. return;
  51. smc->wait_close_tx_prepared = 1;
  52. add_wait_queue(sk_sleep(sk), &wait);
  53. while (!signal_pending(current) && timeout) {
  54. int rc;
  55. rc = sk_wait_event(sk, &timeout,
  56. !smc_tx_prepared_sends(&smc->conn) ||
  57. (sk->sk_err == ECONNABORTED) ||
  58. (sk->sk_err == ECONNRESET),
  59. &wait);
  60. if (rc)
  61. break;
  62. }
  63. remove_wait_queue(sk_sleep(sk), &wait);
  64. smc->wait_close_tx_prepared = 0;
  65. }
  66. void smc_close_wake_tx_prepared(struct smc_sock *smc)
  67. {
  68. if (smc->wait_close_tx_prepared)
  69. /* wake up socket closing */
  70. smc->sk.sk_state_change(&smc->sk);
  71. }
  72. static int smc_close_wr(struct smc_connection *conn)
  73. {
  74. conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
  75. return smc_cdc_get_slot_and_msg_send(conn);
  76. }
  77. static int smc_close_final(struct smc_connection *conn)
  78. {
  79. if (atomic_read(&conn->bytes_to_rcv))
  80. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  81. else
  82. conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
  83. return smc_cdc_get_slot_and_msg_send(conn);
  84. }
  85. static int smc_close_abort(struct smc_connection *conn)
  86. {
  87. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  88. return smc_cdc_get_slot_and_msg_send(conn);
  89. }
  90. /* terminate smc socket abnormally - active abort
  91. * RDMA communication no longer possible
  92. */
  93. void smc_close_active_abort(struct smc_sock *smc)
  94. {
  95. struct smc_cdc_conn_state_flags *txflags =
  96. &smc->conn.local_tx_ctrl.conn_state_flags;
  97. smc->sk.sk_err = ECONNABORTED;
  98. if (smc->clcsock && smc->clcsock->sk) {
  99. smc->clcsock->sk->sk_err = ECONNABORTED;
  100. smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
  101. }
  102. switch (smc->sk.sk_state) {
  103. case SMC_INIT:
  104. case SMC_ACTIVE:
  105. smc->sk.sk_state = SMC_PEERABORTWAIT;
  106. break;
  107. case SMC_APPCLOSEWAIT1:
  108. case SMC_APPCLOSEWAIT2:
  109. txflags->peer_conn_abort = 1;
  110. sock_release(smc->clcsock);
  111. if (!smc_cdc_rxed_any_close(&smc->conn))
  112. smc->sk.sk_state = SMC_PEERABORTWAIT;
  113. else
  114. smc->sk.sk_state = SMC_CLOSED;
  115. break;
  116. case SMC_PEERCLOSEWAIT1:
  117. case SMC_PEERCLOSEWAIT2:
  118. if (!txflags->peer_conn_closed) {
  119. smc->sk.sk_state = SMC_PEERABORTWAIT;
  120. txflags->peer_conn_abort = 1;
  121. sock_release(smc->clcsock);
  122. } else {
  123. smc->sk.sk_state = SMC_CLOSED;
  124. }
  125. break;
  126. case SMC_PROCESSABORT:
  127. case SMC_APPFINCLOSEWAIT:
  128. if (!txflags->peer_conn_closed) {
  129. txflags->peer_conn_abort = 1;
  130. sock_release(smc->clcsock);
  131. }
  132. smc->sk.sk_state = SMC_CLOSED;
  133. break;
  134. case SMC_PEERFINCLOSEWAIT:
  135. case SMC_PEERABORTWAIT:
  136. case SMC_CLOSED:
  137. break;
  138. }
  139. sock_set_flag(&smc->sk, SOCK_DEAD);
  140. smc->sk.sk_state_change(&smc->sk);
  141. }
  142. static inline bool smc_close_sent_any_close(struct smc_connection *conn)
  143. {
  144. return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
  145. conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
  146. }
  147. int smc_close_active(struct smc_sock *smc)
  148. {
  149. struct smc_cdc_conn_state_flags *txflags =
  150. &smc->conn.local_tx_ctrl.conn_state_flags;
  151. long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
  152. struct smc_connection *conn = &smc->conn;
  153. struct sock *sk = &smc->sk;
  154. int old_state;
  155. int rc = 0;
  156. if (sock_flag(sk, SOCK_LINGER) &&
  157. !(current->flags & PF_EXITING))
  158. timeout = sk->sk_lingertime;
  159. again:
  160. old_state = sk->sk_state;
  161. switch (old_state) {
  162. case SMC_INIT:
  163. sk->sk_state = SMC_CLOSED;
  164. if (smc->smc_listen_work.func)
  165. cancel_work_sync(&smc->smc_listen_work);
  166. break;
  167. case SMC_LISTEN:
  168. sk->sk_state = SMC_CLOSED;
  169. sk->sk_state_change(sk); /* wake up accept */
  170. if (smc->clcsock && smc->clcsock->sk) {
  171. rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
  172. /* wake up kernel_accept of smc_tcp_listen_worker */
  173. smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
  174. }
  175. release_sock(sk);
  176. smc_close_cleanup_listen(sk);
  177. cancel_work_sync(&smc->smc_listen_work);
  178. lock_sock(sk);
  179. break;
  180. case SMC_ACTIVE:
  181. smc_close_stream_wait(smc, timeout);
  182. release_sock(sk);
  183. cancel_work_sync(&conn->tx_work);
  184. lock_sock(sk);
  185. if (sk->sk_state == SMC_ACTIVE) {
  186. /* send close request */
  187. rc = smc_close_final(conn);
  188. sk->sk_state = SMC_PEERCLOSEWAIT1;
  189. } else {
  190. /* peer event has changed the state */
  191. goto again;
  192. }
  193. break;
  194. case SMC_APPFINCLOSEWAIT:
  195. /* socket already shutdown wr or both (active close) */
  196. if (txflags->peer_done_writing &&
  197. !smc_close_sent_any_close(conn)) {
  198. /* just shutdown wr done, send close request */
  199. rc = smc_close_final(conn);
  200. }
  201. sk->sk_state = SMC_CLOSED;
  202. smc_close_wait_tx_pends(smc);
  203. break;
  204. case SMC_APPCLOSEWAIT1:
  205. case SMC_APPCLOSEWAIT2:
  206. if (!smc_cdc_rxed_any_close(conn))
  207. smc_close_stream_wait(smc, timeout);
  208. release_sock(sk);
  209. cancel_work_sync(&conn->tx_work);
  210. lock_sock(sk);
  211. if (sk->sk_err != ECONNABORTED) {
  212. /* confirm close from peer */
  213. rc = smc_close_final(conn);
  214. if (rc)
  215. break;
  216. }
  217. if (smc_cdc_rxed_any_close(conn))
  218. /* peer has closed the socket already */
  219. sk->sk_state = SMC_CLOSED;
  220. else
  221. /* peer has just issued a shutdown write */
  222. sk->sk_state = SMC_PEERFINCLOSEWAIT;
  223. smc_close_wait_tx_pends(smc);
  224. break;
  225. case SMC_PEERCLOSEWAIT1:
  226. case SMC_PEERCLOSEWAIT2:
  227. if (txflags->peer_done_writing &&
  228. !smc_close_sent_any_close(conn)) {
  229. /* just shutdown wr done, send close request */
  230. rc = smc_close_final(conn);
  231. }
  232. /* peer sending PeerConnectionClosed will cause transition */
  233. break;
  234. case SMC_PEERFINCLOSEWAIT:
  235. /* peer sending PeerConnectionClosed will cause transition */
  236. break;
  237. case SMC_PROCESSABORT:
  238. cancel_work_sync(&conn->tx_work);
  239. smc_close_abort(conn);
  240. sk->sk_state = SMC_CLOSED;
  241. smc_close_wait_tx_pends(smc);
  242. break;
  243. case SMC_PEERABORTWAIT:
  244. case SMC_CLOSED:
  245. /* nothing to do, add tracing in future patch */
  246. break;
  247. }
  248. if (old_state != sk->sk_state)
  249. sk->sk_state_change(&smc->sk);
  250. return rc;
  251. }
  252. static void smc_close_passive_abort_received(struct smc_sock *smc)
  253. {
  254. struct smc_cdc_conn_state_flags *txflags =
  255. &smc->conn.local_tx_ctrl.conn_state_flags;
  256. struct sock *sk = &smc->sk;
  257. switch (sk->sk_state) {
  258. case SMC_ACTIVE:
  259. case SMC_APPFINCLOSEWAIT:
  260. case SMC_APPCLOSEWAIT1:
  261. case SMC_APPCLOSEWAIT2:
  262. smc_close_abort(&smc->conn);
  263. sk->sk_state = SMC_PROCESSABORT;
  264. break;
  265. case SMC_PEERCLOSEWAIT1:
  266. case SMC_PEERCLOSEWAIT2:
  267. if (txflags->peer_done_writing &&
  268. !smc_close_sent_any_close(&smc->conn)) {
  269. /* just shutdown, but not yet closed locally */
  270. smc_close_abort(&smc->conn);
  271. sk->sk_state = SMC_PROCESSABORT;
  272. } else {
  273. sk->sk_state = SMC_CLOSED;
  274. }
  275. break;
  276. case SMC_PEERFINCLOSEWAIT:
  277. case SMC_PEERABORTWAIT:
  278. sk->sk_state = SMC_CLOSED;
  279. break;
  280. case SMC_INIT:
  281. case SMC_PROCESSABORT:
  282. /* nothing to do, add tracing in future patch */
  283. break;
  284. }
  285. }
  286. /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
  287. * or peer_done_writing.
  288. */
  289. static void smc_close_passive_work(struct work_struct *work)
  290. {
  291. struct smc_connection *conn = container_of(work,
  292. struct smc_connection,
  293. close_work);
  294. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  295. struct smc_cdc_conn_state_flags *rxflags;
  296. struct sock *sk = &smc->sk;
  297. int old_state;
  298. lock_sock(&smc->sk);
  299. old_state = sk->sk_state;
  300. if (!conn->alert_token_local) {
  301. /* abnormal termination */
  302. smc_close_active_abort(smc);
  303. goto wakeup;
  304. }
  305. rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
  306. if (rxflags->peer_conn_abort) {
  307. smc_close_passive_abort_received(smc);
  308. goto wakeup;
  309. }
  310. switch (sk->sk_state) {
  311. case SMC_INIT:
  312. if (atomic_read(&smc->conn.bytes_to_rcv) ||
  313. (rxflags->peer_done_writing &&
  314. !smc_cdc_rxed_any_close(conn)))
  315. sk->sk_state = SMC_APPCLOSEWAIT1;
  316. else
  317. sk->sk_state = SMC_CLOSED;
  318. break;
  319. case SMC_ACTIVE:
  320. sk->sk_state = SMC_APPCLOSEWAIT1;
  321. break;
  322. case SMC_PEERCLOSEWAIT1:
  323. if (rxflags->peer_done_writing)
  324. sk->sk_state = SMC_PEERCLOSEWAIT2;
  325. /* fall through to check for closing */
  326. case SMC_PEERCLOSEWAIT2:
  327. case SMC_PEERFINCLOSEWAIT:
  328. if (!smc_cdc_rxed_any_close(&smc->conn))
  329. break;
  330. if (sock_flag(sk, SOCK_DEAD) &&
  331. smc_close_sent_any_close(conn)) {
  332. /* smc_release has already been called locally */
  333. sk->sk_state = SMC_CLOSED;
  334. } else {
  335. /* just shutdown, but not yet closed locally */
  336. sk->sk_state = SMC_APPFINCLOSEWAIT;
  337. }
  338. break;
  339. case SMC_APPCLOSEWAIT1:
  340. case SMC_APPCLOSEWAIT2:
  341. case SMC_APPFINCLOSEWAIT:
  342. case SMC_PEERABORTWAIT:
  343. case SMC_PROCESSABORT:
  344. case SMC_CLOSED:
  345. /* nothing to do, add tracing in future patch */
  346. break;
  347. }
  348. wakeup:
  349. sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
  350. sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
  351. if (old_state != sk->sk_state) {
  352. sk->sk_state_change(sk);
  353. if ((sk->sk_state == SMC_CLOSED) &&
  354. (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
  355. smc_conn_free(&smc->conn);
  356. schedule_delayed_work(&smc->sock_put_work,
  357. SMC_CLOSE_SOCK_PUT_DELAY);
  358. }
  359. }
  360. release_sock(&smc->sk);
  361. }
  362. void smc_close_sock_put_work(struct work_struct *work)
  363. {
  364. struct smc_sock *smc = container_of(to_delayed_work(work),
  365. struct smc_sock,
  366. sock_put_work);
  367. smc->sk.sk_prot->unhash(&smc->sk);
  368. sock_put(&smc->sk);
  369. }
  370. int smc_close_shutdown_write(struct smc_sock *smc)
  371. {
  372. struct smc_connection *conn = &smc->conn;
  373. long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
  374. struct sock *sk = &smc->sk;
  375. int old_state;
  376. int rc = 0;
  377. if (sock_flag(sk, SOCK_LINGER))
  378. timeout = sk->sk_lingertime;
  379. again:
  380. old_state = sk->sk_state;
  381. switch (old_state) {
  382. case SMC_ACTIVE:
  383. smc_close_stream_wait(smc, timeout);
  384. release_sock(sk);
  385. cancel_work_sync(&conn->tx_work);
  386. lock_sock(sk);
  387. /* send close wr request */
  388. rc = smc_close_wr(conn);
  389. if (sk->sk_state == SMC_ACTIVE)
  390. sk->sk_state = SMC_PEERCLOSEWAIT1;
  391. else
  392. goto again;
  393. break;
  394. case SMC_APPCLOSEWAIT1:
  395. /* passive close */
  396. if (!smc_cdc_rxed_any_close(conn))
  397. smc_close_stream_wait(smc, timeout);
  398. release_sock(sk);
  399. cancel_work_sync(&conn->tx_work);
  400. lock_sock(sk);
  401. /* confirm close from peer */
  402. rc = smc_close_wr(conn);
  403. sk->sk_state = SMC_APPCLOSEWAIT2;
  404. break;
  405. case SMC_APPCLOSEWAIT2:
  406. case SMC_PEERFINCLOSEWAIT:
  407. case SMC_PEERCLOSEWAIT1:
  408. case SMC_PEERCLOSEWAIT2:
  409. case SMC_APPFINCLOSEWAIT:
  410. case SMC_PROCESSABORT:
  411. case SMC_PEERABORTWAIT:
  412. /* nothing to do, add tracing in future patch */
  413. break;
  414. }
  415. if (old_state != sk->sk_state)
  416. sk->sk_state_change(&smc->sk);
  417. return rc;
  418. }
  419. /* Initialize close properties on connection establishment. */
  420. void smc_close_init(struct smc_sock *smc)
  421. {
  422. INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
  423. }