smc_close.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. /*
  2. * Shared Memory Communications over RDMA (SMC-R) and RoCE
  3. *
  4. * Socket Closing - normal and abnormal
  5. *
  6. * Copyright IBM Corp. 2016
  7. *
  8. * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com>
  9. */
  10. #include <linux/workqueue.h>
  11. #include <linux/sched/signal.h>
  12. #include <net/sock.h>
  13. #include "smc.h"
  14. #include "smc_tx.h"
  15. #include "smc_cdc.h"
  16. #include "smc_close.h"
  17. #define SMC_CLOSE_WAIT_TX_PENDS_TIME (5 * HZ)
  18. static void smc_close_cleanup_listen(struct sock *parent)
  19. {
  20. struct sock *sk;
  21. /* Close non-accepted connections */
  22. while ((sk = smc_accept_dequeue(parent, NULL)))
  23. smc_close_non_accepted(sk);
  24. }
  25. static void smc_close_wait_tx_pends(struct smc_sock *smc)
  26. {
  27. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  28. struct sock *sk = &smc->sk;
  29. signed long timeout;
  30. timeout = SMC_CLOSE_WAIT_TX_PENDS_TIME;
  31. add_wait_queue(sk_sleep(sk), &wait);
  32. while (!signal_pending(current) && timeout) {
  33. int rc;
  34. rc = sk_wait_event(sk, &timeout,
  35. !smc_cdc_tx_has_pending(&smc->conn),
  36. &wait);
  37. if (rc)
  38. break;
  39. }
  40. remove_wait_queue(sk_sleep(sk), &wait);
  41. }
  42. /* wait for sndbuf data being transmitted */
  43. static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
  44. {
  45. DEFINE_WAIT_FUNC(wait, woken_wake_function);
  46. struct sock *sk = &smc->sk;
  47. if (!timeout)
  48. return;
  49. if (!smc_tx_prepared_sends(&smc->conn))
  50. return;
  51. smc->wait_close_tx_prepared = 1;
  52. add_wait_queue(sk_sleep(sk), &wait);
  53. while (!signal_pending(current) && timeout) {
  54. int rc;
  55. rc = sk_wait_event(sk, &timeout,
  56. !smc_tx_prepared_sends(&smc->conn) ||
  57. (sk->sk_err == ECONNABORTED) ||
  58. (sk->sk_err == ECONNRESET),
  59. &wait);
  60. if (rc)
  61. break;
  62. }
  63. remove_wait_queue(sk_sleep(sk), &wait);
  64. smc->wait_close_tx_prepared = 0;
  65. }
  66. void smc_close_wake_tx_prepared(struct smc_sock *smc)
  67. {
  68. if (smc->wait_close_tx_prepared)
  69. /* wake up socket closing */
  70. smc->sk.sk_state_change(&smc->sk);
  71. }
  72. static int smc_close_wr(struct smc_connection *conn)
  73. {
  74. conn->local_tx_ctrl.conn_state_flags.peer_done_writing = 1;
  75. return smc_cdc_get_slot_and_msg_send(conn);
  76. }
  77. static int smc_close_final(struct smc_connection *conn)
  78. {
  79. if (atomic_read(&conn->bytes_to_rcv))
  80. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  81. else
  82. conn->local_tx_ctrl.conn_state_flags.peer_conn_closed = 1;
  83. return smc_cdc_get_slot_and_msg_send(conn);
  84. }
  85. static int smc_close_abort(struct smc_connection *conn)
  86. {
  87. conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
  88. return smc_cdc_get_slot_and_msg_send(conn);
  89. }
  90. /* terminate smc socket abnormally - active abort
  91. * RDMA communication no longer possible
  92. */
  93. void smc_close_active_abort(struct smc_sock *smc)
  94. {
  95. struct smc_cdc_conn_state_flags *txflags =
  96. &smc->conn.local_tx_ctrl.conn_state_flags;
  97. bh_lock_sock(&smc->sk);
  98. smc->sk.sk_err = ECONNABORTED;
  99. if (smc->clcsock && smc->clcsock->sk) {
  100. smc->clcsock->sk->sk_err = ECONNABORTED;
  101. smc->clcsock->sk->sk_state_change(smc->clcsock->sk);
  102. }
  103. switch (smc->sk.sk_state) {
  104. case SMC_INIT:
  105. smc->sk.sk_state = SMC_PEERABORTWAIT;
  106. break;
  107. case SMC_APPCLOSEWAIT1:
  108. case SMC_APPCLOSEWAIT2:
  109. txflags->peer_conn_abort = 1;
  110. sock_release(smc->clcsock);
  111. if (!smc_cdc_rxed_any_close(&smc->conn))
  112. smc->sk.sk_state = SMC_PEERABORTWAIT;
  113. else
  114. smc->sk.sk_state = SMC_CLOSED;
  115. break;
  116. case SMC_PEERCLOSEWAIT1:
  117. case SMC_PEERCLOSEWAIT2:
  118. if (!txflags->peer_conn_closed) {
  119. smc->sk.sk_state = SMC_PEERABORTWAIT;
  120. txflags->peer_conn_abort = 1;
  121. sock_release(smc->clcsock);
  122. } else {
  123. smc->sk.sk_state = SMC_CLOSED;
  124. }
  125. break;
  126. case SMC_PROCESSABORT:
  127. case SMC_APPFINCLOSEWAIT:
  128. if (!txflags->peer_conn_closed) {
  129. txflags->peer_conn_abort = 1;
  130. sock_release(smc->clcsock);
  131. }
  132. smc->sk.sk_state = SMC_CLOSED;
  133. break;
  134. case SMC_PEERFINCLOSEWAIT:
  135. case SMC_PEERABORTWAIT:
  136. case SMC_CLOSED:
  137. break;
  138. }
  139. sock_set_flag(&smc->sk, SOCK_DEAD);
  140. bh_unlock_sock(&smc->sk);
  141. smc->sk.sk_state_change(&smc->sk);
  142. }
  143. int smc_close_active(struct smc_sock *smc)
  144. {
  145. struct smc_cdc_conn_state_flags *txflags =
  146. &smc->conn.local_tx_ctrl.conn_state_flags;
  147. long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
  148. struct smc_connection *conn = &smc->conn;
  149. struct sock *sk = &smc->sk;
  150. int old_state;
  151. int rc = 0;
  152. if (sock_flag(sk, SOCK_LINGER) &&
  153. !(current->flags & PF_EXITING))
  154. timeout = sk->sk_lingertime;
  155. again:
  156. old_state = sk->sk_state;
  157. switch (old_state) {
  158. case SMC_INIT:
  159. sk->sk_state = SMC_CLOSED;
  160. if (smc->smc_listen_work.func)
  161. flush_work(&smc->smc_listen_work);
  162. sock_put(sk);
  163. break;
  164. case SMC_LISTEN:
  165. sk->sk_state = SMC_CLOSED;
  166. sk->sk_state_change(sk); /* wake up accept */
  167. if (smc->clcsock && smc->clcsock->sk) {
  168. rc = kernel_sock_shutdown(smc->clcsock, SHUT_RDWR);
  169. /* wake up kernel_accept of smc_tcp_listen_worker */
  170. smc->clcsock->sk->sk_data_ready(smc->clcsock->sk);
  171. }
  172. release_sock(sk);
  173. smc_close_cleanup_listen(sk);
  174. flush_work(&smc->tcp_listen_work);
  175. lock_sock(sk);
  176. break;
  177. case SMC_ACTIVE:
  178. smc_close_stream_wait(smc, timeout);
  179. release_sock(sk);
  180. cancel_work_sync(&conn->tx_work);
  181. lock_sock(sk);
  182. if (sk->sk_state == SMC_ACTIVE) {
  183. /* send close request */
  184. rc = smc_close_final(conn);
  185. sk->sk_state = SMC_PEERCLOSEWAIT1;
  186. } else {
  187. /* peer event has changed the state */
  188. goto again;
  189. }
  190. break;
  191. case SMC_APPFINCLOSEWAIT:
  192. /* socket already shutdown wr or both (active close) */
  193. if (txflags->peer_done_writing &&
  194. !txflags->peer_conn_closed) {
  195. /* just shutdown wr done, send close request */
  196. rc = smc_close_final(conn);
  197. }
  198. sk->sk_state = SMC_CLOSED;
  199. smc_close_wait_tx_pends(smc);
  200. break;
  201. case SMC_APPCLOSEWAIT1:
  202. case SMC_APPCLOSEWAIT2:
  203. if (!smc_cdc_rxed_any_close(conn))
  204. smc_close_stream_wait(smc, timeout);
  205. release_sock(sk);
  206. cancel_work_sync(&conn->tx_work);
  207. lock_sock(sk);
  208. if (sk->sk_err != ECONNABORTED) {
  209. /* confirm close from peer */
  210. rc = smc_close_final(conn);
  211. if (rc)
  212. break;
  213. }
  214. if (smc_cdc_rxed_any_close(conn))
  215. /* peer has closed the socket already */
  216. sk->sk_state = SMC_CLOSED;
  217. else
  218. /* peer has just issued a shutdown write */
  219. sk->sk_state = SMC_PEERFINCLOSEWAIT;
  220. smc_close_wait_tx_pends(smc);
  221. break;
  222. case SMC_PEERCLOSEWAIT1:
  223. case SMC_PEERCLOSEWAIT2:
  224. case SMC_PEERFINCLOSEWAIT:
  225. /* peer sending PeerConnectionClosed will cause transition */
  226. break;
  227. case SMC_PROCESSABORT:
  228. cancel_work_sync(&conn->tx_work);
  229. smc_close_abort(conn);
  230. sk->sk_state = SMC_CLOSED;
  231. smc_close_wait_tx_pends(smc);
  232. break;
  233. case SMC_PEERABORTWAIT:
  234. case SMC_CLOSED:
  235. /* nothing to do, add tracing in future patch */
  236. break;
  237. }
  238. if (old_state != sk->sk_state)
  239. sk->sk_state_change(&smc->sk);
  240. return rc;
  241. }
  242. static void smc_close_passive_abort_received(struct smc_sock *smc)
  243. {
  244. struct smc_cdc_conn_state_flags *txflags =
  245. &smc->conn.local_tx_ctrl.conn_state_flags;
  246. struct sock *sk = &smc->sk;
  247. switch (sk->sk_state) {
  248. case SMC_ACTIVE:
  249. case SMC_APPFINCLOSEWAIT:
  250. case SMC_APPCLOSEWAIT1:
  251. case SMC_APPCLOSEWAIT2:
  252. smc_close_abort(&smc->conn);
  253. sk->sk_state = SMC_PROCESSABORT;
  254. break;
  255. case SMC_PEERCLOSEWAIT1:
  256. case SMC_PEERCLOSEWAIT2:
  257. if (txflags->peer_done_writing &&
  258. !txflags->peer_conn_closed) {
  259. /* just shutdown, but not yet closed locally */
  260. smc_close_abort(&smc->conn);
  261. sk->sk_state = SMC_PROCESSABORT;
  262. } else {
  263. sk->sk_state = SMC_CLOSED;
  264. }
  265. break;
  266. case SMC_PEERFINCLOSEWAIT:
  267. case SMC_PEERABORTWAIT:
  268. sk->sk_state = SMC_CLOSED;
  269. break;
  270. case SMC_INIT:
  271. case SMC_PROCESSABORT:
  272. /* nothing to do, add tracing in future patch */
  273. break;
  274. }
  275. }
  276. /* Some kind of closing has been received: peer_conn_closed, peer_conn_abort,
  277. * or peer_done_writing.
  278. * Called under tasklet context.
  279. */
  280. void smc_close_passive_received(struct smc_sock *smc)
  281. {
  282. struct smc_cdc_conn_state_flags *rxflags =
  283. &smc->conn.local_rx_ctrl.conn_state_flags;
  284. struct sock *sk = &smc->sk;
  285. int old_state;
  286. sk->sk_shutdown |= RCV_SHUTDOWN;
  287. if (smc->clcsock && smc->clcsock->sk)
  288. smc->clcsock->sk->sk_shutdown |= RCV_SHUTDOWN;
  289. sock_set_flag(&smc->sk, SOCK_DONE);
  290. old_state = sk->sk_state;
  291. if (rxflags->peer_conn_abort) {
  292. smc_close_passive_abort_received(smc);
  293. goto wakeup;
  294. }
  295. switch (sk->sk_state) {
  296. case SMC_INIT:
  297. if (atomic_read(&smc->conn.bytes_to_rcv) ||
  298. (rxflags->peer_done_writing &&
  299. !rxflags->peer_conn_closed))
  300. sk->sk_state = SMC_APPCLOSEWAIT1;
  301. else
  302. sk->sk_state = SMC_CLOSED;
  303. break;
  304. case SMC_ACTIVE:
  305. sk->sk_state = SMC_APPCLOSEWAIT1;
  306. break;
  307. case SMC_PEERCLOSEWAIT1:
  308. if (rxflags->peer_done_writing)
  309. sk->sk_state = SMC_PEERCLOSEWAIT2;
  310. /* fall through to check for closing */
  311. case SMC_PEERCLOSEWAIT2:
  312. case SMC_PEERFINCLOSEWAIT:
  313. if (!smc_cdc_rxed_any_close(&smc->conn))
  314. break;
  315. if (sock_flag(sk, SOCK_DEAD) &&
  316. (sk->sk_shutdown == SHUTDOWN_MASK)) {
  317. /* smc_release has already been called locally */
  318. sk->sk_state = SMC_CLOSED;
  319. } else {
  320. /* just shutdown, but not yet closed locally */
  321. sk->sk_state = SMC_APPFINCLOSEWAIT;
  322. }
  323. break;
  324. case SMC_APPCLOSEWAIT1:
  325. case SMC_APPCLOSEWAIT2:
  326. case SMC_APPFINCLOSEWAIT:
  327. case SMC_PEERABORTWAIT:
  328. case SMC_PROCESSABORT:
  329. case SMC_CLOSED:
  330. /* nothing to do, add tracing in future patch */
  331. break;
  332. }
  333. wakeup:
  334. if (old_state != sk->sk_state)
  335. sk->sk_state_change(sk);
  336. sk->sk_data_ready(sk); /* wakeup blocked rcvbuf consumers */
  337. sk->sk_write_space(sk); /* wakeup blocked sndbuf producers */
  338. if ((sk->sk_state == SMC_CLOSED) &&
  339. (sock_flag(sk, SOCK_DEAD) || (old_state == SMC_INIT))) {
  340. smc_conn_free(&smc->conn);
  341. schedule_delayed_work(&smc->sock_put_work,
  342. SMC_CLOSE_SOCK_PUT_DELAY);
  343. }
  344. }
  345. void smc_close_sock_put_work(struct work_struct *work)
  346. {
  347. struct smc_sock *smc = container_of(to_delayed_work(work),
  348. struct smc_sock,
  349. sock_put_work);
  350. smc->sk.sk_prot->unhash(&smc->sk);
  351. sock_put(&smc->sk);
  352. }
  353. int smc_close_shutdown_write(struct smc_sock *smc)
  354. {
  355. struct smc_connection *conn = &smc->conn;
  356. long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT;
  357. struct sock *sk = &smc->sk;
  358. int old_state;
  359. int rc = 0;
  360. if (sock_flag(sk, SOCK_LINGER))
  361. timeout = sk->sk_lingertime;
  362. again:
  363. old_state = sk->sk_state;
  364. switch (old_state) {
  365. case SMC_ACTIVE:
  366. smc_close_stream_wait(smc, timeout);
  367. release_sock(sk);
  368. cancel_work_sync(&conn->tx_work);
  369. lock_sock(sk);
  370. /* send close wr request */
  371. rc = smc_close_wr(conn);
  372. if (sk->sk_state == SMC_ACTIVE)
  373. sk->sk_state = SMC_PEERCLOSEWAIT1;
  374. else
  375. goto again;
  376. break;
  377. case SMC_APPCLOSEWAIT1:
  378. /* passive close */
  379. if (!smc_cdc_rxed_any_close(conn))
  380. smc_close_stream_wait(smc, timeout);
  381. release_sock(sk);
  382. cancel_work_sync(&conn->tx_work);
  383. lock_sock(sk);
  384. /* confirm close from peer */
  385. rc = smc_close_wr(conn);
  386. sk->sk_state = SMC_APPCLOSEWAIT2;
  387. break;
  388. case SMC_APPCLOSEWAIT2:
  389. case SMC_PEERFINCLOSEWAIT:
  390. case SMC_PEERCLOSEWAIT1:
  391. case SMC_PEERCLOSEWAIT2:
  392. case SMC_APPFINCLOSEWAIT:
  393. case SMC_PROCESSABORT:
  394. case SMC_PEERABORTWAIT:
  395. /* nothing to do, add tracing in future patch */
  396. break;
  397. }
  398. if (old_state != sk->sk_state)
  399. sk->sk_state_change(&smc->sk);
  400. return rc;
  401. }