smc_close.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4. *
  5. * Socket Closing - normal and abnormal
  6. *
  7. * Copyright IBM Corp. 2016
  8. *
  9. * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
  10. */
  11. #include <linux/workqueue.h>
  12. #include <linux/sched/signal.h>
  13. #include <net/sock.h>
  14. #include "smc.h"
  15. #include "smc_tx.h"
  16. #include "smc_cdc.h"
  17. #include "smc_close.h"
  18. #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
  19. static void smc_close_cleanup_listen(struct sock *parent)
  20. {
  21. struct sock *sk;
  22. /* Close non-accepted connections */
  23. while ((sk = smc_accept_dequeue(parent, NULL)))
  24. smc_close_non_accepted(sk);
  25. }
  26. static void smc_close_wait_tx_pends(struct smc_sock *smc)
  27. {
  28. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  29. struct sock *sk = &smc->sk;
  30. signed long timeout;
  31. timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
  32. add_wait_queue(sk_sleep(sk), &wait);
  33. while (!signal_pending(current) && timeout) {
  34. int rc;
  35. rc = sk_wait_event(sk, &timeout,
  36. !smc_cdc_tx_has_pending(&smc->conn),
  37. &wait);
  38. if (rc)
  39. break;
  40. }
  41. remove_wait_queue(sk_sleep(sk), &wait);
  42. }
  43. /* wait for sndbuf data being transmitted */
  44. static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
  45. {
  46. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  47. struct sock *sk = &smc->sk;
  48. if (!timeout)
  49. return;
  50. if (!smc_tx_prepared_sends(&smc->conn))
  51. return;
  52. smc->wait_close_tx_prepared = 1;
  53. add_wait_queue(sk_sleep(sk), &wait);
  54. while (!signal_pending(current) && timeout) {
  55. int rc;
  56. rc = sk_wait_event(sk, &timeout,
  57. !smc_tx_prepared_sends(&smc->conn) ||
  58. (sk->sk_err == ECONNABORTED) ||
  59. (sk->sk_err == ECONNRESET),
  60. &wait);
  61. if (rc)
  62. break;
  63. }
  64. remove_wait_queue(sk_sleep(sk), &wait);
  65. smc->wait_close_tx_prepared = 0;
  66. }
  67. void smc_close_wake_tx_prepared(struct smc_sock *smc)
  68. {
  69. if (smc->wait_close_tx_prepared)
  70. /* wake up socket closing */
  71. smc->sk.sk_state_change(&smc->sk);
  72. }
  73. static int smc_close_wr(struct smc_connection *conn)
  74. {
  75. conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
  76. return smc_cdc_get_slot_and_msg_send(conn);
  77. }
  78. static int smc_close_final(struct smc_connection *conn)
  79. {
  80. if (atomic_read(&conn->bytes_to_rcv))
  81. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  82. else
  83. conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
  84. return smc_cdc_get_slot_and_msg_send(conn);
  85. }
  86. static int smc_close_abort(struct smc_connection *conn)
  87. {
  88. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  89. return smc_cdc_get_slot_and_msg_send(conn);
  90. }
  91. /* terminate smc socket abnormally - active abort
  92. * RDMA communication no longer possible
  93. */
  94. void smc_close_active_abort(struct smc_sock *smc)
  95. {
  96. struct smc_cdc_conn_state_flags *txflags =
  97. &smc->conn.local_tx_ctrl.conn_state_flags;
  98. smc->sk.sk_err = ECONNABORTED;
  99. if (smc->clcsock && smc->clcsock->sk) {
  100. smc->clcsock->sk->sk_err = ECONNABORTED;
  101. smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
  102. }
  103. switch (smc->sk.sk_state) {
  104. case SMC_INIT:
  105. case SMC_ACTIVE:
  106. smc->sk.sk_state = SMC_PEERABORTWAIT;
  107. break;
  108. case SMC_APPCLOSEWAIT1:
  109. case SMC_APPCLOSEWAIT2:
  110. txflags->peer_conn_abort = 1;
  111. sock_release(smc->clcsock);
  112. if (!smc_cdc_rxed_any_close(&smc->conn))
  113. smc->sk.sk_state = SMC_PEERABORTWAIT;
  114. else
  115. smc->sk.sk_state = SMC_CLOSED;
  116. break;
  117. case SMC_PEERCLOSEWAIT1:
  118. case SMC_PEERCLOSEWAIT2:
  119. if (!txflags->peer_conn_closed) {
  120. smc->sk.sk_state = SMC_PEERABORTWAIT;
  121. txflags->peer_conn_abort = 1;
  122. sock_release(smc->clcsock);
  123. } else {
  124. smc->sk.sk_state = SMC_CLOSED;
  125. }
  126. break;
  127. case SMC_PROCESSABORT:
  128. case SMC_APPFINCLOSEWAIT:
  129. if (!txflags->peer_conn_closed) {
  130. txflags->peer_conn_abort = 1;
  131. sock_release(smc->clcsock);
  132. }
  133. smc->sk.sk_state = SMC_CLOSED;
  134. break;
  135. case SMC_PEERFINCLOSEWAIT:
  136. case SMC_PEERABORTWAIT:
  137. case SMC_CLOSED:
  138. break;
  139. }
  140. sock_set_flag(&smc->sk, SOCK_DEAD);
  141. smc->sk.sk_state_change(&smc->sk);
  142. }
  143. static inline bool smc_close_sent_any_close(struct smc_connection *conn)
  144. {
  145. return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
  146. conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
  147. }
  148. int smc_close_active(struct smc_sock *smc)
  149. {
  150. struct smc_cdc_conn_state_flags *txflags =
  151. &smc->conn.local_tx_ctrl.conn_state_flags;
  152. struct smc_connection *conn = &smc->conn;
  153. struct sock *sk = &smc->sk;
  154. int old_state;
  155. long timeout;
  156. int rc = 0;
  157. timeout = current->flags & PF_EXITING ?
  158. 0 : sock_flag(sk, SOCK_LINGER) ?
  159. sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
  160. again:
  161. old_state = sk->sk_state;
  162. switch (old_state) {
  163. case SMC_INIT:
  164. sk->sk_state = SMC_CLOSED;
  165. if (smc->smc_listen_work.func)
  166. cancel_work_sync(&smc->smc_listen_work);
  167. break;
  168. case SMC_LISTEN:
  169. sk->sk_state = SMC_CLOSED;
  170. sk->sk_state_change(sk); /* wake up accept */
  171. if (smc->clcsock && smc->clcsock->sk) {
  172. rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
  173. /* wake up kernel_accept of smc_tcp_listen_worker */
  174. smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
  175. }
  176. release_sock(sk);
  177. smc_close_cleanup_listen(sk);
  178. cancel_work_sync(&smc->smc_listen_work);
  179. lock_sock(sk);
  180. break;
  181. case SMC_ACTIVE:
  182. smc_close_stream_wait(smc, timeout);
  183. release_sock(sk);
  184. cancel_delayed_work_sync(&conn->tx_work);
  185. lock_sock(sk);
  186. if (sk->sk_state == SMC_ACTIVE) {
  187. /* send close request */
  188. rc = smc_close_final(conn);
  189. sk->sk_state = SMC_PEERCLOSEWAIT1;
  190. } else {
  191. /* peer event has changed the state */
  192. goto again;
  193. }
  194. break;
  195. case SMC_APPFINCLOSEWAIT:
  196. /* socket already shutdown wr or both (active close) */
  197. if (txflags->peer_done_writing &&
  198. !smc_close_sent_any_close(conn)) {
  199. /* just shutdown wr done, send close request */
  200. rc = smc_close_final(conn);
  201. }
  202. sk->sk_state = SMC_CLOSED;
  203. smc_close_wait_tx_pends(smc);
  204. break;
  205. case SMC_APPCLOSEWAIT1:
  206. case SMC_APPCLOSEWAIT2:
  207. if (!smc_cdc_rxed_any_close(conn))
  208. smc_close_stream_wait(smc, timeout);
  209. release_sock(sk);
  210. cancel_delayed_work_sync(&conn->tx_work);
  211. lock_sock(sk);
  212. if (sk->sk_err != ECONNABORTED) {
  213. /* confirm close from peer */
  214. rc = smc_close_final(conn);
  215. if (rc)
  216. break;
  217. }
  218. if (smc_cdc_rxed_any_close(conn))
  219. /* peer has closed the socket already */
  220. sk->sk_state = SMC_CLOSED;
  221. else
  222. /* peer has just issued a shutdown write */
  223. sk->sk_state = SMC_PEERFINCLOSEWAIT;
  224. smc_close_wait_tx_pends(smc);
  225. break;
  226. case SMC_PEERCLOSEWAIT1:
  227. case SMC_PEERCLOSEWAIT2:
  228. if (txflags->peer_done_writing &&
  229. !smc_close_sent_any_close(conn)) {
  230. /* just shutdown wr done, send close request */
  231. rc = smc_close_final(conn);
  232. }
  233. /* peer sending PeerConnectionClosed will cause transition */
  234. break;
  235. case SMC_PEERFINCLOSEWAIT:
  236. /* peer sending PeerConnectionClosed will cause transition */
  237. break;
  238. case SMC_PROCESSABORT:
  239. release_sock(sk);
  240. cancel_delayed_work_sync(&conn->tx_work);
  241. lock_sock(sk);
  242. smc_close_abort(conn);
  243. sk->sk_state = SMC_CLOSED;
  244. smc_close_wait_tx_pends(smc);
  245. break;
  246. case SMC_PEERABORTWAIT:
  247. case SMC_CLOSED:
  248. /* nothing to do, add tracing in future patch */
  249. break;
  250. }
  251. if (old_state != sk->sk_state)
  252. sk->sk_state_change(&smc->sk);
  253. return rc;
  254. }
  255. static void smc_close_passive_abort_received(struct smc_sock *smc)
  256. {
  257. struct smc_cdc_conn_state_flags *txflags =
  258. &smc->conn.local_tx_ctrl.conn_state_flags;
  259. struct sock *sk = &smc->sk;
  260. switch (sk->sk_state) {
  261. case SMC_ACTIVE:
  262. case SMC_APPFINCLOSEWAIT:
  263. case SMC_APPCLOSEWAIT1:
  264. case SMC_APPCLOSEWAIT2:
  265. smc_close_abort(&smc->conn);
  266. sk->sk_state = SMC_PROCESSABORT;
  267. break;
  268. case SMC_PEERCLOSEWAIT1:
  269. case SMC_PEERCLOSEWAIT2:
  270. if (txflags->peer_done_writing &&
  271. !smc_close_sent_any_close(&smc->conn)) {
  272. /* just shutdown, but not yet closed locally */
  273. smc_close_abort(&smc->conn);
  274. sk->sk_state = SMC_PROCESSABORT;
  275. } else {
  276. sk->sk_state = SMC_CLOSED;
  277. }
  278. break;
  279. case SMC_PEERFINCLOSEWAIT:
  280. case SMC_PEERABORTWAIT:
  281. sk->sk_state = SMC_CLOSED;
  282. break;
  283. case SMC_INIT:
  284. case SMC_PROCESSABORT:
  285. /* nothing to do, add tracing in future patch */
  286. break;
  287. }
  288. }
  289. /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
  290. * or peer_done_writing.
  291. */
  292. static void smc_close_passive_work(struct work_struct *work)
  293. {
  294. struct smc_connection *conn = container_of(work,
  295. struct smc_connection,
  296. close_work);
  297. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  298. struct smc_cdc_conn_state_flags *rxflags;
  299. struct sock *sk = &smc->sk;
  300. int old_state;
  301. lock_sock(&smc->sk);
  302. old_state = sk->sk_state;
  303. if (!conn->alert_token_local) {
  304. /* abnormal termination */
  305. smc_close_active_abort(smc);
  306. goto wakeup;
  307. }
  308. rxflags = &smc->conn.local_rx_ctrl.conn_state_flags;
  309. if (rxflags->peer_conn_abort) {
  310. smc_close_passive_abort_received(smc);
  311. goto wakeup;
  312. }
  313. switch (sk->sk_state) {
  314. case SMC_INIT:
  315. if (atomic_read(&smc->conn.bytes_to_rcv) ||
  316. (rxflags->peer_done_writing &&
  317. !smc_cdc_rxed_any_close(conn)))
  318. sk->sk_state = SMC_APPCLOSEWAIT1;
  319. else
  320. sk->sk_state = SMC_CLOSED;
  321. break;
  322. case SMC_ACTIVE:
  323. sk->sk_state = SMC_APPCLOSEWAIT1;
  324. break;
  325. case SMC_PEERCLOSEWAIT1:
  326. if (rxflags->peer_done_writing)
  327. sk->sk_state = SMC_PEERCLOSEWAIT2;
  328. /* fall through to check for closing */
  329. case SMC_PEERCLOSEWAIT2:
  330. case SMC_PEERFINCLOSEWAIT:
  331. if (!smc_cdc_rxed_any_close(&smc->conn))
  332. break;
  333. if (sock_flag(sk, SOCK_DEAD) &&
  334. smc_close_sent_any_close(conn)) {
  335. /* smc_release has already been called locally */
  336. sk->sk_state = SMC_CLOSED;
  337. } else {
  338. /* just shutdown, but not yet closed locally */
  339. sk->sk_state = SMC_APPFINCLOSEWAIT;
  340. }
  341. break;
  342. case SMC_APPCLOSEWAIT1:
  343. case SMC_APPCLOSEWAIT2:
  344. case SMC_APPFINCLOSEWAIT:
  345. case SMC_PEERABORTWAIT:
  346. case SMC_PROCESSABORT:
  347. case SMC_CLOSED:
  348. /* nothing to do, add tracing in future patch */
  349. break;
  350. }
  351. wakeup:
  352. sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
  353. sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
  354. if (old_state != sk->sk_state) {
  355. sk->sk_state_change(sk);
  356. if ((sk->sk_state == SMC_CLOSED) &&
  357. (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket)) {
  358. smc_conn_free(&smc->conn);
  359. schedule_delayed_work(&smc->sock_put_work,
  360. SMC_CLOSE_SOCK_PUT_DELAY);
  361. }
  362. }
  363. release_sock(&smc->sk);
  364. }
  365. void smc_close_sock_put_work(struct work_struct *work)
  366. {
  367. struct smc_sock *smc = container_of(to_delayed_work(work),
  368. struct smc_sock,
  369. sock_put_work);
  370. smc->sk.sk_prot->unhash(&smc->sk);
  371. sock_put(&smc->sk);
  372. }
  373. int smc_close_shutdown_write(struct smc_sock *smc)
  374. {
  375. struct smc_connection *conn = &smc->conn;
  376. struct sock *sk = &smc->sk;
  377. int old_state;
  378. long timeout;
  379. int rc = 0;
  380. timeout = current->flags & PF_EXITING ?
  381. 0 : sock_flag(sk, SOCK_LINGER) ?
  382. sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
  383. again:
  384. old_state = sk->sk_state;
  385. switch (old_state) {
  386. case SMC_ACTIVE:
  387. smc_close_stream_wait(smc, timeout);
  388. release_sock(sk);
  389. cancel_delayed_work_sync(&conn->tx_work);
  390. lock_sock(sk);
  391. /* send close wr request */
  392. rc = smc_close_wr(conn);
  393. if (sk->sk_state == SMC_ACTIVE)
  394. sk->sk_state = SMC_PEERCLOSEWAIT1;
  395. else
  396. goto again;
  397. break;
  398. case SMC_APPCLOSEWAIT1:
  399. /* passive close */
  400. if (!smc_cdc_rxed_any_close(conn))
  401. smc_close_stream_wait(smc, timeout);
  402. release_sock(sk);
  403. cancel_delayed_work_sync(&conn->tx_work);
  404. lock_sock(sk);
  405. /* confirm close from peer */
  406. rc = smc_close_wr(conn);
  407. sk->sk_state = SMC_APPCLOSEWAIT2;
  408. break;
  409. case SMC_APPCLOSEWAIT2:
  410. case SMC_PEERFINCLOSEWAIT:
  411. case SMC_PEERCLOSEWAIT1:
  412. case SMC_PEERCLOSEWAIT2:
  413. case SMC_APPFINCLOSEWAIT:
  414. case SMC_PROCESSABORT:
  415. case SMC_PEERABORTWAIT:
  416. /* nothing to do, add tracing in future patch */
  417. break;
  418. }
  419. if (old_state != sk->sk_state)
  420. sk->sk_state_change(&smc->sk);
  421. return rc;
  422. }
  423. /* Initialize close properties on connection establishment. */
  424. void smc_close_init(struct smc_sock *smc)
  425. {
  426. INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
  427. }