smc_close.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  4. *
  5. * Socket Closing - normal and abnormal
  6. *
  7. * Copyright IBM Corp. 2016
  8. *
  9. * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
  10. */
  11. #include <linux/workqueue.h>
  12. #include <linux/sched/signal.h>
  13. #include <net/sock.h>
  14. #include "smc.h"
  15. #include "smc_tx.h"
  16. #include "smc_cdc.h"
  17. #include "smc_close.h"
  18. #define SMC_CLOSE_WAIT_LISTEN_CLCSOCK_TIME (5 * HZ)
  19. static void smc_close_cleanup_listen(struct sock *parent)
  20. {
  21. struct sock *sk;
  22. /* Close non-accepted connections */
  23. while ((sk = smc_accept_dequeue(parent, NULL)))
  24. smc_close_non_accepted(sk);
  25. }
  26. /* wait for sndbuf data being transmitted */
  27. static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
  28. {
  29. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  30. struct sock *sk = &smc->sk;
  31. if (!timeout)
  32. return;
  33. if (!smc_tx_prepared_sends(&smc->conn))
  34. return;
  35. smc->wait_close_tx_prepared = 1;
  36. add_wait_queue(sk_sleep(sk), &wait);
  37. while (!signal_pending(current) && timeout) {
  38. int rc;
  39. rc = sk_wait_event(sk, &timeout,
  40. !smc_tx_prepared_sends(&smc->conn) ||
  41. (sk->sk_err == ECONNABORTED) ||
  42. (sk->sk_err == ECONNRESET),
  43. &wait);
  44. if (rc)
  45. break;
  46. }
  47. remove_wait_queue(sk_sleep(sk), &wait);
  48. smc->wait_close_tx_prepared = 0;
  49. }
  50. void smc_close_wake_tx_prepared(struct smc_sock *smc)
  51. {
  52. if (smc->wait_close_tx_prepared)
  53. /* wake up socket closing */
  54. smc->sk.sk_state_change(&smc->sk);
  55. }
  56. static int smc_close_wr(struct smc_connection *conn)
  57. {
  58. conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
  59. return smc_cdc_get_slot_and_msg_send(conn);
  60. }
  61. static int smc_close_final(struct smc_connection *conn)
  62. {
  63. if (atomic_read(&conn->bytes_to_rcv))
  64. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  65. else
  66. conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
  67. return smc_cdc_get_slot_and_msg_send(conn);
  68. }
  69. static int smc_close_abort(struct smc_connection *conn)
  70. {
  71. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  72. return smc_cdc_get_slot_and_msg_send(conn);
  73. }
  74. /* terminate smc socket abnormally - active abort
  75. * link group is terminated, i.e. RDMA communication no longer possible
  76. */
  77. static void smc_close_active_abort(struct smc_sock *smc)
  78. {
  79. struct sock *sk = &smc->sk;
  80. struct smc_cdc_conn_state_flags *txflags =
  81. &smc->conn.local_tx_ctrl.conn_state_flags;
  82. sk->sk_err = ECONNABORTED;
  83. if (smc->clcsock && smc->clcsock->sk) {
  84. smc->clcsock->sk->sk_err = ECONNABORTED;
  85. smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
  86. }
  87. switch (sk->sk_state) {
  88. case SMC_INIT:
  89. case SMC_ACTIVE:
  90. sk->sk_state = SMC_PEERABORTWAIT;
  91. release_sock(sk);
  92. cancel_delayed_work_sync(&smc->conn.tx_work);
  93. lock_sock(sk);
  94. sock_put(sk); /* passive closing */
  95. break;
  96. case SMC_APPCLOSEWAIT1:
  97. case SMC_APPCLOSEWAIT2:
  98. if (!smc_cdc_rxed_any_close(&smc->conn))
  99. sk->sk_state = SMC_PEERABORTWAIT;
  100. else
  101. sk->sk_state = SMC_CLOSED;
  102. release_sock(sk);
  103. cancel_delayed_work_sync(&smc->conn.tx_work);
  104. lock_sock(sk);
  105. break;
  106. case SMC_PEERCLOSEWAIT1:
  107. case SMC_PEERCLOSEWAIT2:
  108. if (!txflags->peer_conn_closed) {
  109. /* just SHUTDOWN_SEND done */
  110. sk->sk_state = SMC_PEERABORTWAIT;
  111. } else {
  112. sk->sk_state = SMC_CLOSED;
  113. }
  114. sock_put(sk); /* passive closing */
  115. break;
  116. case SMC_PROCESSABORT:
  117. case SMC_APPFINCLOSEWAIT:
  118. sk->sk_state = SMC_CLOSED;
  119. break;
  120. case SMC_PEERFINCLOSEWAIT:
  121. sock_put(sk); /* passive closing */
  122. break;
  123. case SMC_PEERABORTWAIT:
  124. case SMC_CLOSED:
  125. break;
  126. }
  127. sock_set_flag(sk, SOCK_DEAD);
  128. sk->sk_state_change(sk);
  129. }
  130. static inline bool smc_close_sent_any_close(struct smc_connection *conn)
  131. {
  132. return conn->local_tx_ctrl.conn_state_flags.peer_conn_abort ||
  133. conn->local_tx_ctrl.conn_state_flags.peer_conn_closed;
  134. }
  135. int smc_close_active(struct smc_sock *smc)
  136. {
  137. struct smc_cdc_conn_state_flags *txflags =
  138. &smc->conn.local_tx_ctrl.conn_state_flags;
  139. struct smc_connection *conn = &smc->conn;
  140. struct sock *sk = &smc->sk;
  141. int old_state;
  142. long timeout;
  143. int rc = 0;
  144. timeout = current->flags & PF_EXITING ?
  145. 0 : sock_flag(sk, SOCK_LINGER) ?
  146. sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
  147. old_state = sk->sk_state;
  148. again:
  149. switch (sk->sk_state) {
  150. case SMC_INIT:
  151. sk->sk_state = SMC_CLOSED;
  152. break;
  153. case SMC_LISTEN:
  154. sk->sk_state = SMC_CLOSED;
  155. sk->sk_state_change(sk); /* wake up accept */
  156. if (smc->clcsock && smc->clcsock->sk) {
  157. rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
  158. /* wake up kernel_accept of smc_tcp_listen_worker */
  159. smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
  160. }
  161. smc_close_cleanup_listen(sk);
  162. release_sock(sk);
  163. flush_work(&smc->tcp_listen_work);
  164. lock_sock(sk);
  165. break;
  166. case SMC_ACTIVE:
  167. smc_close_stream_wait(smc, timeout);
  168. release_sock(sk);
  169. cancel_delayed_work_sync(&conn->tx_work);
  170. lock_sock(sk);
  171. if (sk->sk_state == SMC_ACTIVE) {
  172. /* send close request */
  173. rc = smc_close_final(conn);
  174. if (rc)
  175. break;
  176. sk->sk_state = SMC_PEERCLOSEWAIT1;
  177. } else {
  178. /* peer event has changed the state */
  179. goto again;
  180. }
  181. break;
  182. case SMC_APPFINCLOSEWAIT:
  183. /* socket already shutdown wr or both (active close) */
  184. if (txflags->peer_done_writing &&
  185. !smc_close_sent_any_close(conn)) {
  186. /* just shutdown wr done, send close request */
  187. rc = smc_close_final(conn);
  188. if (rc)
  189. break;
  190. }
  191. sk->sk_state = SMC_CLOSED;
  192. break;
  193. case SMC_APPCLOSEWAIT1:
  194. case SMC_APPCLOSEWAIT2:
  195. if (!smc_cdc_rxed_any_close(conn))
  196. smc_close_stream_wait(smc, timeout);
  197. release_sock(sk);
  198. cancel_delayed_work_sync(&conn->tx_work);
  199. lock_sock(sk);
  200. if (sk->sk_state != SMC_APPCLOSEWAIT1 &&
  201. sk->sk_state != SMC_APPCLOSEWAIT2)
  202. goto again;
  203. /* confirm close from peer */
  204. rc = smc_close_final(conn);
  205. if (rc)
  206. break;
  207. if (smc_cdc_rxed_any_close(conn)) {
  208. /* peer has closed the socket already */
  209. sk->sk_state = SMC_CLOSED;
  210. sock_put(sk); /* postponed passive closing */
  211. } else {
  212. /* peer has just issued a shutdown write */
  213. sk->sk_state = SMC_PEERFINCLOSEWAIT;
  214. }
  215. break;
  216. case SMC_PEERCLOSEWAIT1:
  217. case SMC_PEERCLOSEWAIT2:
  218. if (txflags->peer_done_writing &&
  219. !smc_close_sent_any_close(conn)) {
  220. /* just shutdown wr done, send close request */
  221. rc = smc_close_final(conn);
  222. if (rc)
  223. break;
  224. }
  225. /* peer sending PeerConnectionClosed will cause transition */
  226. break;
  227. case SMC_PEERFINCLOSEWAIT:
  228. /* peer sending PeerConnectionClosed will cause transition */
  229. break;
  230. case SMC_PROCESSABORT:
  231. smc_close_abort(conn);
  232. sk->sk_state = SMC_CLOSED;
  233. break;
  234. case SMC_PEERABORTWAIT:
  235. case SMC_CLOSED:
  236. /* nothing to do, add tracing in future patch */
  237. break;
  238. }
  239. if (old_state != sk->sk_state)
  240. sk->sk_state_change(sk);
  241. return rc;
  242. }
  243. static void smc_close_passive_abort_received(struct smc_sock *smc)
  244. {
  245. struct smc_cdc_conn_state_flags *txflags =
  246. &smc->conn.local_tx_ctrl.conn_state_flags;
  247. struct sock *sk = &smc->sk;
  248. switch (sk->sk_state) {
  249. case SMC_INIT:
  250. case SMC_ACTIVE:
  251. case SMC_APPCLOSEWAIT1:
  252. sk->sk_state = SMC_PROCESSABORT;
  253. sock_put(sk); /* passive closing */
  254. break;
  255. case SMC_APPFINCLOSEWAIT:
  256. sk->sk_state = SMC_PROCESSABORT;
  257. break;
  258. case SMC_PEERCLOSEWAIT1:
  259. case SMC_PEERCLOSEWAIT2:
  260. if (txflags->peer_done_writing &&
  261. !smc_close_sent_any_close(&smc->conn))
  262. /* just shutdown, but not yet closed locally */
  263. sk->sk_state = SMC_PROCESSABORT;
  264. else
  265. sk->sk_state = SMC_CLOSED;
  266. sock_put(sk); /* passive closing */
  267. break;
  268. case SMC_APPCLOSEWAIT2:
  269. case SMC_PEERFINCLOSEWAIT:
  270. sk->sk_state = SMC_CLOSED;
  271. sock_put(sk); /* passive closing */
  272. break;
  273. case SMC_PEERABORTWAIT:
  274. sk->sk_state = SMC_CLOSED;
  275. break;
  276. case SMC_PROCESSABORT:
  277. /* nothing to do, add tracing in future patch */
  278. break;
  279. }
  280. }
  281. /* Either some kind of closing has been received: peer_conn_closed,
  282. * peer_conn_abort, or peer_done_writing
  283. * or the link group of the connection terminates abnormally.
  284. */
  285. static void smc_close_passive_work(struct work_struct *work)
  286. {
  287. struct smc_connection *conn = container_of(work,
  288. struct smc_connection,
  289. close_work);
  290. struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
  291. struct smc_cdc_conn_state_flags *rxflags;
  292. struct sock *sk = &smc->sk;
  293. int old_state;
  294. lock_sock(sk);
  295. old_state = sk->sk_state;
  296. if (!conn->alert_token_local) {
  297. /* abnormal termination */
  298. smc_close_active_abort(smc);
  299. goto wakeup;
  300. }
  301. rxflags = &conn->local_rx_ctrl.conn_state_flags;
  302. if (rxflags->peer_conn_abort) {
  303. /* peer has not received all data */
  304. smc_close_passive_abort_received(smc);
  305. release_sock(&smc->sk);
  306. cancel_delayed_work_sync(&conn->tx_work);
  307. lock_sock(&smc->sk);
  308. goto wakeup;
  309. }
  310. switch (sk->sk_state) {
  311. case SMC_INIT:
  312. if (atomic_read(&conn->bytes_to_rcv) ||
  313. (rxflags->peer_done_writing &&
  314. !smc_cdc_rxed_any_close(conn))) {
  315. sk->sk_state = SMC_APPCLOSEWAIT1;
  316. } else {
  317. sk->sk_state = SMC_CLOSED;
  318. sock_put(sk); /* passive closing */
  319. }
  320. break;
  321. case SMC_ACTIVE:
  322. sk->sk_state = SMC_APPCLOSEWAIT1;
  323. /* postpone sock_put() for passive closing to cover
  324. * received SEND_SHUTDOWN as well
  325. */
  326. break;
  327. case SMC_PEERCLOSEWAIT1:
  328. if (rxflags->peer_done_writing)
  329. sk->sk_state = SMC_PEERCLOSEWAIT2;
  330. /* fall through */
  331. /* to check for closing */
  332. case SMC_PEERCLOSEWAIT2:
  333. if (!smc_cdc_rxed_any_close(conn))
  334. break;
  335. if (sock_flag(sk, SOCK_DEAD) &&
  336. smc_close_sent_any_close(conn)) {
  337. /* smc_release has already been called locally */
  338. sk->sk_state = SMC_CLOSED;
  339. } else {
  340. /* just shutdown, but not yet closed locally */
  341. sk->sk_state = SMC_APPFINCLOSEWAIT;
  342. }
  343. sock_put(sk); /* passive closing */
  344. break;
  345. case SMC_PEERFINCLOSEWAIT:
  346. if (smc_cdc_rxed_any_close(conn)) {
  347. sk->sk_state = SMC_CLOSED;
  348. sock_put(sk); /* passive closing */
  349. }
  350. break;
  351. case SMC_APPCLOSEWAIT1:
  352. case SMC_APPCLOSEWAIT2:
  353. /* postpone sock_put() for passive closing to cover
  354. * received SEND_SHUTDOWN as well
  355. */
  356. break;
  357. case SMC_APPFINCLOSEWAIT:
  358. case SMC_PEERABORTWAIT:
  359. case SMC_PROCESSABORT:
  360. case SMC_CLOSED:
  361. /* nothing to do, add tracing in future patch */
  362. break;
  363. }
  364. wakeup:
  365. sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
  366. sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
  367. if (old_state != sk->sk_state) {
  368. sk->sk_state_change(sk);
  369. if ((sk->sk_state == SMC_CLOSED) &&
  370. (sock_flag(sk, SOCK_DEAD) || !sk->sk_socket))
  371. smc_conn_free(conn);
  372. }
  373. release_sock(sk);
  374. sock_put(sk); /* sock_hold done by schedulers of close_work */
  375. }
  376. int smc_close_shutdown_write(struct smc_sock *smc)
  377. {
  378. struct smc_connection *conn = &smc->conn;
  379. struct sock *sk = &smc->sk;
  380. int old_state;
  381. long timeout;
  382. int rc = 0;
  383. timeout = current->flags & PF_EXITING ?
  384. 0 : sock_flag(sk, SOCK_LINGER) ?
  385. sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT;
  386. old_state = sk->sk_state;
  387. again:
  388. switch (sk->sk_state) {
  389. case SMC_ACTIVE:
  390. smc_close_stream_wait(smc, timeout);
  391. release_sock(sk);
  392. cancel_delayed_work_sync(&conn->tx_work);
  393. lock_sock(sk);
  394. if (sk->sk_state != SMC_ACTIVE)
  395. goto again;
  396. /* send close wr request */
  397. rc = smc_close_wr(conn);
  398. if (rc)
  399. break;
  400. sk->sk_state = SMC_PEERCLOSEWAIT1;
  401. break;
  402. case SMC_APPCLOSEWAIT1:
  403. /* passive close */
  404. if (!smc_cdc_rxed_any_close(conn))
  405. smc_close_stream_wait(smc, timeout);
  406. release_sock(sk);
  407. cancel_delayed_work_sync(&conn->tx_work);
  408. lock_sock(sk);
  409. if (sk->sk_state != SMC_APPCLOSEWAIT1)
  410. goto again;
  411. /* confirm close from peer */
  412. rc = smc_close_wr(conn);
  413. if (rc)
  414. break;
  415. sk->sk_state = SMC_APPCLOSEWAIT2;
  416. break;
  417. case SMC_APPCLOSEWAIT2:
  418. case SMC_PEERFINCLOSEWAIT:
  419. case SMC_PEERCLOSEWAIT1:
  420. case SMC_PEERCLOSEWAIT2:
  421. case SMC_APPFINCLOSEWAIT:
  422. case SMC_PROCESSABORT:
  423. case SMC_PEERABORTWAIT:
  424. /* nothing to do, add tracing in future patch */
  425. break;
  426. }
  427. if (old_state != sk->sk_state)
  428. sk->sk_state_change(sk);
  429. return rc;
  430. }
  431. /* Initialize close properties on connection establishment. */
  432. void smc_close_init(struct smc_sock *smc)
  433. {
  434. INIT_WORK(&smc->conn.close_work, smc_close_passive_work);
  435. }