Эх сурвалжийг харах

DLM: fix race condition between dlm_send and dlm_recv

When kernel_sendpage(in send_to_sock) and kernel_recvmsg
(in receive_from_sock) return error, close_connection may works at the
same time. At that time, they may wait for each other by cancel_work_sync.

Signed-off-by: Tadashi Miyauchi <miayuchi@toshiba-tops.co.jp>
Signed-off-by: Tsutomu Owa <tsutomu.owa@toshiba.co.jp>
Signed-off-by: David Teigland <teigland@redhat.com>
tsutomu.owa@toshiba.co.jp 8 жил өмнө
parent
commit
b2a6662932
1 өөрчлөгдсөн 6 нэмэгдсэн , 2 устгасан
  1. 6 2
      fs/dlm/lowcomms.c

+ 6 - 2
fs/dlm/lowcomms.c

@@ -110,6 +110,7 @@ struct connection {
 #define CF_IS_OTHERCON 5
 #define CF_CLOSE 6
 #define CF_APP_LIMITED 7
+#define CF_CLOSING 8
 	struct list_head writequeue;  /* List of outgoing writequeue_entries */
 	spinlock_t writequeue_lock;
 	int (*rx_action) (struct connection *);	/* What to do when active */
@@ -581,9 +582,11 @@ static void make_sockaddr(struct sockaddr_storage *saddr, uint16_t port,
 static void close_connection(struct connection *con, bool and_other,
 			     bool tx, bool rx)
 {
-	if (tx && cancel_work_sync(&con->swork))
+	bool closing = test_and_set_bit(CF_CLOSING, &con->flags);
+
+	if (tx && !closing && cancel_work_sync(&con->swork))
 		log_print("canceled swork for node %d", con->nodeid);
-	if (rx && cancel_work_sync(&con->rwork))
+	if (rx && !closing && cancel_work_sync(&con->rwork))
 		log_print("canceled rwork for node %d", con->nodeid);
 
 	mutex_lock(&con->sock_mutex);
@@ -603,6 +606,7 @@ static void close_connection(struct connection *con, bool and_other,
 
 	con->retries = 0;
 	mutex_unlock(&con->sock_mutex);
+	clear_bit(CF_CLOSING, &con->flags);
 }
 
 /* Data received from remote end */