Browse Source

Merge branch 'smc-next'

Ursula Braun says:

====================
net/smc: patches 2018-07-25

here are 4 more patches for SMC: The first one is just a small
code cleanup in preparation for patch 2. Patch 2 switches to the
use of the vlan-gid for VLAN traffic. Patch 3 improves diagnosis
when creating SMC connections. Patch 4 improves synchronization
between local and remote link groups.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 7 years ago
parent
commit
b9a9ad782f

+ 6 - 0
include/uapi/linux/smc_diag.h

@@ -43,6 +43,7 @@ enum {
 	SMC_DIAG_LGRINFO,
 	SMC_DIAG_SHUTDOWN,
 	SMC_DIAG_DMBINFO,
+	SMC_DIAG_FALLBACK,
 	__SMC_DIAG_MAX,
 };
 
@@ -92,6 +93,11 @@ struct smc_diag_lgrinfo {
 	__u8				role;
 };
 
+struct smc_diag_fallback {
+	__u32 reason;
+	__u32 peer_diagnosis;
+};
+
 struct smcd_diag_dmbinfo {		/* SMC-D Socket internals */
 	__u32 linkid;			/* Link identifier */
 	__u64 peer_gid;			/* Peer GID */

+ 45 - 41
net/smc/af_smc.c

@@ -344,20 +344,17 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 
 	rc = smc_ib_modify_qp_rts(link);
 	if (rc)
-		return SMC_CLC_DECL_INTERR;
+		return SMC_CLC_DECL_ERR_RDYLNK;
 
 	smc_wr_remember_qp_attr(link);
 
 	if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
-		return SMC_CLC_DECL_INTERR;
+		return SMC_CLC_DECL_ERR_REGRMB;
 
 	/* send CONFIRM LINK response over RoCE fabric */
-	rc = smc_llc_send_confirm_link(link,
-				       link->smcibdev->mac[link->ibport - 1],
-				       &link->smcibdev->gid[link->ibport - 1],
-				       SMC_LLC_RESP);
+	rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
 	if (rc < 0)
-		return SMC_CLC_DECL_TCL;
+		return SMC_CLC_DECL_TIMEOUT_CL;
 
 	/* receive ADD LINK request from server over RoCE fabric */
 	rest = wait_for_completion_interruptible_timeout(&link->llc_add,
@@ -373,10 +370,9 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 	/* send add link reject message, only one link supported for now */
 	rc = smc_llc_send_add_link(link,
 				   link->smcibdev->mac[link->ibport - 1],
-				   &link->smcibdev->gid[link->ibport - 1],
-				   SMC_LLC_RESP);
+				   link->gid, SMC_LLC_RESP);
 	if (rc < 0)
-		return SMC_CLC_DECL_TCL;
+		return SMC_CLC_DECL_TIMEOUT_AL;
 
 	smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
 
@@ -428,9 +424,10 @@ static void smc_link_save_peer_info(struct smc_link *link,
 }
 
 /* fall back during connect */
-static int smc_connect_fallback(struct smc_sock *smc)
+static int smc_connect_fallback(struct smc_sock *smc, int reason_code)
 {
 	smc->use_fallback = true;
+	smc->fallback_rsn = reason_code;
 	smc_copy_sock_settings_to_clc(smc);
 	if (smc->sk.sk_state == SMC_INIT)
 		smc->sk.sk_state = SMC_ACTIVE;
@@ -447,7 +444,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
 			sock_put(&smc->sk); /* passive closing */
 		return reason_code;
 	}
-	if (reason_code != SMC_CLC_DECL_REPLY) {
+	if (reason_code != SMC_CLC_DECL_PEERDECL) {
 		rc = smc_clc_send_decline(smc, reason_code);
 		if (rc < 0) {
 			if (smc->sk.sk_state == SMC_INIT)
@@ -455,7 +452,7 @@ static int smc_connect_decline_fallback(struct smc_sock *smc, int reason_code)
 			return rc;
 		}
 	}
-	return smc_connect_fallback(smc);
+	return smc_connect_fallback(smc, reason_code);
 }
 
 /* abort connecting */
@@ -472,7 +469,7 @@ static int smc_connect_abort(struct smc_sock *smc, int reason_code,
 /* check if there is a rdma device available for this connection. */
 /* called for connect and listen */
 static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
-			  u8 *ibport)
+			  u8 *ibport, unsigned short vlan_id, u8 gid[])
 {
 	int reason_code = 0;
 
@@ -480,7 +477,8 @@ static int smc_check_rdma(struct smc_sock *smc, struct smc_ib_device **ibdev,
 	 * within same PNETID that also contains the ethernet device
 	 * used for the internal TCP socket
 	 */
-	smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport);
+	smc_pnet_find_roce_resource(smc->clcsock->sk, ibdev, ibport, vlan_id,
+				    gid);
 	if (!(*ibdev))
 		reason_code = SMC_CLC_DECL_CNFERR; /* configuration error */
 
@@ -526,12 +524,12 @@ static int smc_connect_ism_vlan_cleanup(struct smc_sock *smc, bool is_smcd,
 static int smc_connect_clc(struct smc_sock *smc, int smc_type,
 			   struct smc_clc_msg_accept_confirm *aclc,
 			   struct smc_ib_device *ibdev, u8 ibport,
-			   struct smcd_dev *ismdev)
+			   u8 gid[], struct smcd_dev *ismdev)
 {
 	int rc = 0;
 
 	/* do inband token exchange */
-	rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, ismdev);
+	rc = smc_clc_send_proposal(smc, smc_type, ibdev, ibport, gid, ismdev);
 	if (rc)
 		return rc;
 	/* receive SMC Accept CLC message */
@@ -571,7 +569,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 		smc_link_save_peer_info(link, aclc);
 
 	if (smc_rmb_rtoken_handling(&smc->conn, aclc))
-		return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+		return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
 					 local_contact);
 
 	smc_close_init(smc);
@@ -579,12 +577,12 @@ static int smc_connect_rdma(struct smc_sock *smc,
 
 	if (local_contact == SMC_FIRST_CONTACT) {
 		if (smc_ib_ready_link(link))
-			return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
 						 local_contact);
 	} else {
 		if (!smc->conn.rmb_desc->reused &&
 		    smc_reg_rmb(link, smc->conn.rmb_desc, true))
-			return smc_connect_abort(smc, SMC_CLC_DECL_INTERR,
+			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
 						 local_contact);
 	}
 	smc_rmb_sync_sg_for_device(&smc->conn);
@@ -653,6 +651,7 @@ static int __smc_connect(struct smc_sock *smc)
 	struct smc_clc_msg_accept_confirm aclc;
 	struct smc_ib_device *ibdev;
 	struct smcd_dev *ismdev;
+	u8 gid[SMC_GID_SIZE];
 	unsigned short vlan;
 	int smc_type;
 	int rc = 0;
@@ -661,11 +660,11 @@ static int __smc_connect(struct smc_sock *smc)
 	sock_hold(&smc->sk); /* sock put in passive closing */
 
 	if (smc->use_fallback)
-		return smc_connect_fallback(smc);
+		return smc_connect_fallback(smc, smc->fallback_rsn);
 
 	/* if peer has not signalled SMC-capability, fall back */
 	if (!tcp_sk(smc->clcsock->sk)->syn_smc)
-		return smc_connect_fallback(smc);
+		return smc_connect_fallback(smc, SMC_CLC_DECL_PEERNOSMC);
 
 	/* IPSec connections opt out of SMC-R optimizations */
 	if (using_ipsec(smc))
@@ -684,7 +683,7 @@ static int __smc_connect(struct smc_sock *smc)
 	}
 
 	/* check if there is a rdma device available */
-	if (!smc_check_rdma(smc, &ibdev, &ibport)) {
+	if (!smc_check_rdma(smc, &ibdev, &ibport, vlan, gid)) {
 		/* RDMA is supported for this connection */
 		rdma_supported = true;
 		if (ism_supported)
@@ -695,10 +694,10 @@ static int __smc_connect(struct smc_sock *smc)
 
 	/* if neither ISM nor RDMA are supported, fallback */
 	if (!rdma_supported && !ism_supported)
-		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_CNFERR);
+		return smc_connect_decline_fallback(smc, SMC_CLC_DECL_NOSMCDEV);
 
 	/* perform CLC handshake */
-	rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, ismdev);
+	rc = smc_connect_clc(smc, smc_type, &aclc, ibdev, ibport, gid, ismdev);
 	if (rc) {
 		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
 		return smc_connect_decline_fallback(smc, rc);
@@ -710,7 +709,7 @@ static int __smc_connect(struct smc_sock *smc)
 	else if (ism_supported && aclc.hdr.path == SMC_TYPE_D)
 		rc = smc_connect_ism(smc, &aclc, ismdev);
 	else
-		rc = SMC_CLC_DECL_CNFERR;
+		rc = SMC_CLC_DECL_MODEUNSUPP;
 	if (rc) {
 		smc_connect_ism_vlan_cleanup(smc, ism_supported, ismdev, vlan);
 		return smc_connect_decline_fallback(smc, rc);
@@ -948,15 +947,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 	link = &lgr->lnk[SMC_SINGLE_LINK];
 
 	if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
-		return SMC_CLC_DECL_INTERR;
+		return SMC_CLC_DECL_ERR_REGRMB;
 
 	/* send CONFIRM LINK request to client over the RoCE fabric */
-	rc = smc_llc_send_confirm_link(link,
-				       link->smcibdev->mac[link->ibport - 1],
-				       &link->smcibdev->gid[link->ibport - 1],
-				       SMC_LLC_REQ);
+	rc = smc_llc_send_confirm_link(link, SMC_LLC_REQ);
 	if (rc < 0)
-		return SMC_CLC_DECL_TCL;
+		return SMC_CLC_DECL_TIMEOUT_CL;
 
 	/* receive CONFIRM LINK response from client over the RoCE fabric */
 	rest = wait_for_completion_interruptible_timeout(
@@ -976,10 +972,9 @@ static int smc_serv_conf_first_link(struct smc_sock *smc)
 	/* send ADD LINK request to client over the RoCE fabric */
 	rc = smc_llc_send_add_link(link,
 				   link->smcibdev->mac[link->ibport - 1],
-				   &link->smcibdev->gid[link->ibport - 1],
-				   SMC_LLC_REQ);
+				   link->gid, SMC_LLC_REQ);
 	if (rc < 0)
-		return SMC_CLC_DECL_TCL;
+		return SMC_CLC_DECL_TIMEOUT_AL;
 
 	/* receive ADD LINK response from client over the RoCE fabric */
 	rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
@@ -1054,7 +1049,8 @@ static void smc_listen_decline(struct smc_sock *new_smc, int reason_code,
 	}
 	smc_conn_free(&new_smc->conn);
 	new_smc->use_fallback = true;
-	if (reason_code && reason_code != SMC_CLC_DECL_REPLY) {
+	new_smc->fallback_rsn = reason_code;
+	if (reason_code && reason_code != SMC_CLC_DECL_PEERDECL) {
 		if (smc_clc_send_decline(new_smc, reason_code) < 0) {
 			smc_listen_out_err(new_smc);
 			return;
@@ -1145,7 +1141,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 	if (local_contact != SMC_FIRST_CONTACT) {
 		if (!new_smc->conn.rmb_desc->reused) {
 			if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
-				return SMC_CLC_DECL_INTERR;
+				return SMC_CLC_DECL_ERR_REGRMB;
 		}
 	}
 	smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -1165,13 +1161,13 @@ static void smc_listen_rdma_finish(struct smc_sock *new_smc,
 		smc_link_save_peer_info(link, cclc);
 
 	if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
-		reason_code = SMC_CLC_DECL_INTERR;
+		reason_code = SMC_CLC_DECL_ERR_RTOK;
 		goto decline;
 	}
 
 	if (local_contact == SMC_FIRST_CONTACT) {
 		if (smc_ib_ready_link(link)) {
-			reason_code = SMC_CLC_DECL_INTERR;
+			reason_code = SMC_CLC_DECL_ERR_RDYLNK;
 			goto decline;
 		}
 		/* QP confirmation over RoCE fabric */
@@ -1199,6 +1195,7 @@ static void smc_listen_work(struct work_struct *work)
 	struct smcd_dev *ismdev;
 	u8 buf[SMC_CLC_MAX_LEN];
 	int local_contact = 0;
+	unsigned short vlan;
 	int reason_code = 0;
 	int rc = 0;
 	u8 ibport;
@@ -1211,6 +1208,7 @@ static void smc_listen_work(struct work_struct *work)
 	/* check if peer is smc capable */
 	if (!tcp_sk(newclcsock->sk)->syn_smc) {
 		new_smc->use_fallback = true;
+		new_smc->fallback_rsn = SMC_CLC_DECL_PEERNOSMC;
 		smc_listen_out_connected(new_smc);
 		return;
 	}
@@ -1247,14 +1245,16 @@ static void smc_listen_work(struct work_struct *work)
 	/* check if RDMA is available */
 	if (!ism_supported &&
 	    ((pclc->hdr.path != SMC_TYPE_R && pclc->hdr.path != SMC_TYPE_B) ||
-	     smc_check_rdma(new_smc, &ibdev, &ibport) ||
+	     smc_vlan_by_tcpsk(new_smc->clcsock, &vlan) ||
+	     smc_check_rdma(new_smc, &ibdev, &ibport, vlan, NULL) ||
 	     smc_listen_rdma_check(new_smc, pclc) ||
 	     smc_listen_rdma_init(new_smc, pclc, ibdev, ibport,
 				  &local_contact) ||
 	     smc_listen_rdma_reg(new_smc, local_contact))) {
 		/* SMC not supported, decline */
 		mutex_unlock(&smc_create_lgr_pending);
-		smc_listen_decline(new_smc, SMC_CLC_DECL_CNFERR, local_contact);
+		smc_listen_decline(new_smc, SMC_CLC_DECL_MODEUNSUPP,
+				   local_contact);
 		return;
 	}
 
@@ -1301,6 +1301,7 @@ static void smc_tcp_listen_work(struct work_struct *work)
 
 		new_smc->listen_smc = lsmc;
 		new_smc->use_fallback = lsmc->use_fallback;
+		new_smc->fallback_rsn = lsmc->fallback_rsn;
 		sock_hold(lsk); /* sock_put in smc_listen_work */
 		INIT_WORK(&new_smc->smc_listen_work, smc_listen_work);
 		smc_copy_sock_settings_to_smc(new_smc);
@@ -1455,6 +1456,7 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 	if (msg->msg_flags & MSG_FASTOPEN) {
 		if (sk->sk_state == SMC_INIT) {
 			smc->use_fallback = true;
+			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			rc = -EINVAL;
 			goto out;
@@ -1652,6 +1654,7 @@ static int smc_setsockopt(struct socket *sock, int level, int optname,
 		/* option not supported by SMC */
 		if (sk->sk_state == SMC_INIT) {
 			smc->use_fallback = true;
+			smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
 		} else {
 			if (!smc->use_fallback)
 				rc = -EINVAL;
@@ -1889,6 +1892,7 @@ static int smc_create(struct net *net, struct socket *sock, int protocol,
 	/* create internal TCP socket for CLC handshake and fallback */
 	smc = smc_sk(sk);
 	smc->use_fallback = false; /* assume rdma capability first */
+	smc->fallback_rsn = 0;
 	rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP,
 			      &smc->clcsock);
 	if (rc) {

+ 2 - 0
net/smc/smc.h

@@ -208,6 +208,8 @@ struct smc_sock {				/* smc sock container */
 	struct list_head	accept_q;	/* sockets to be accepted */
 	spinlock_t		accept_q_lock;	/* protects accept_q */
 	bool			use_fallback;	/* fallback to tcp */
+	int			fallback_rsn;	/* reason for fallback */
+	u32			peer_diagnosis; /* decline reason from peer */
 	int			sockopt_defer_accept;
 						/* sockopt TCP_DEFER_ACCEPT
 						 * value

+ 9 - 7
net/smc/smc_clc.c

@@ -334,7 +334,11 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		goto out;
 	}
 	if (clcm->type == SMC_CLC_DECLINE) {
-		reason_code = SMC_CLC_DECL_REPLY;
+		struct smc_clc_msg_decline *dclc;
+
+		dclc = (struct smc_clc_msg_decline *)clcm;
+		reason_code = SMC_CLC_DECL_PEERDECL;
+		smc->peer_diagnosis = ntohl(dclc->peer_diagnosis);
 		if (((struct smc_clc_msg_decline *)buf)->hdr.flag) {
 			smc->conn.lgr->sync_err = 1;
 			smc_lgr_terminate(smc->conn.lgr);
@@ -378,7 +382,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info)
 
 /* send CLC PROPOSAL message across internal TCP socket */
 int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
-			  struct smc_ib_device *ibdev, u8 ibport,
+			  struct smc_ib_device *ibdev, u8 ibport, u8 gid[],
 			  struct smcd_dev *ismdev)
 {
 	struct smc_clc_ipv6_prefix ipv6_prfx[SMC_CLC_MAX_V6_PREFIX];
@@ -409,7 +413,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
 		/* add SMC-R specifics */
 		memcpy(pclc.lcl.id_for_peer, local_systemid,
 		       sizeof(local_systemid));
-		memcpy(&pclc.lcl.gid, &ibdev->gid[ibport - 1], SMC_GID_SIZE);
+		memcpy(&pclc.lcl.gid, gid, SMC_GID_SIZE);
 		memcpy(&pclc.lcl.mac, &ibdev->mac[ibport - 1], ETH_ALEN);
 		pclc.iparea_offset = htons(0);
 	}
@@ -492,8 +496,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 		cclc.hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN);
 		memcpy(cclc.lcl.id_for_peer, local_systemid,
 		       sizeof(local_systemid));
-		memcpy(&cclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
-		       SMC_GID_SIZE);
+		memcpy(&cclc.lcl.gid, link->gid, SMC_GID_SIZE);
 		memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1],
 		       ETH_ALEN);
 		hton24(cclc.qpn, link->roce_qp->qp_num);
@@ -566,8 +569,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 		link = &conn->lgr->lnk[SMC_SINGLE_LINK];
 		memcpy(aclc.lcl.id_for_peer, local_systemid,
 		       sizeof(local_systemid));
-		memcpy(&aclc.lcl.gid, &link->smcibdev->gid[link->ibport - 1],
-		       SMC_GID_SIZE);
+		memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
 		memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1],
 		       ETH_ALEN);
 		hton24(aclc.qpn, link->roce_qp->qp_num);

+ 13 - 7
net/smc/smc_clc.h

@@ -28,15 +28,21 @@
 #define SMC_TYPE_B		3		/* SMC-R and SMC-D	      */
 #define CLC_WAIT_TIME		(6 * HZ)	/* max. wait time on clcsock  */
 #define SMC_CLC_DECL_MEM	0x01010000  /* insufficient memory resources  */
-#define SMC_CLC_DECL_TIMEOUT	0x02000000  /* timeout                        */
+#define SMC_CLC_DECL_TIMEOUT_CL	0x02010000  /* timeout w4 QP confirm link     */
+#define SMC_CLC_DECL_TIMEOUT_AL	0x02020000  /* timeout w4 QP add link	      */
 #define SMC_CLC_DECL_CNFERR	0x03000000  /* configuration error            */
-#define SMC_CLC_DECL_IPSEC	0x03030000  /* IPsec usage                    */
+#define SMC_CLC_DECL_PEERNOSMC	0x03010000  /* peer did not indicate SMC      */
+#define SMC_CLC_DECL_IPSEC	0x03020000  /* IPsec usage		      */
+#define SMC_CLC_DECL_NOSMCDEV	0x03030000  /* no SMC device found	      */
+#define SMC_CLC_DECL_MODEUNSUPP	0x03040000  /* smc modes do not match (R or D)*/
+#define SMC_CLC_DECL_RMBE_EC	0x03050000  /* peer has eyecatcher in RMBE    */
+#define SMC_CLC_DECL_OPTUNSUPP	0x03060000  /* fastopen sockopt not supported */
 #define SMC_CLC_DECL_SYNCERR	0x04000000  /* synchronization error          */
-#define SMC_CLC_DECL_REPLY	0x06000000  /* reply to a received decline    */
+#define SMC_CLC_DECL_PEERDECL	0x05000000  /* peer declined during handshake */
 #define SMC_CLC_DECL_INTERR	0x99990000  /* internal error                 */
-#define SMC_CLC_DECL_TCL	0x02040000  /* timeout w4 QP confirm          */
-#define SMC_CLC_DECL_SEND	0x07000000  /* sending problem                */
-#define SMC_CLC_DECL_RMBE_EC	0x08000000  /* peer has eyecatcher in RMBE    */
+#define SMC_CLC_DECL_ERR_RTOK	0x99990001  /*	 rtoken handling failed       */
+#define SMC_CLC_DECL_ERR_RDYLNK	0x99990002  /*	 ib ready link failed	      */
+#define SMC_CLC_DECL_ERR_REGRMB	0x99990003  /*	 reg rmb failed		      */
 
 struct smc_clc_msg_hdr {	/* header1 of clc messages */
 	u8 eyecatcher[4];	/* eye catcher */
@@ -179,7 +185,7 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen,
 		     u8 expected_type);
 int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info);
 int smc_clc_send_proposal(struct smc_sock *smc, int smc_type,
-			  struct smc_ib_device *smcibdev, u8 ibport,
+			  struct smc_ib_device *smcibdev, u8 ibport, u8 gid[],
 			  struct smcd_dev *ismdev);
 int smc_clc_send_confirm(struct smc_sock *smc);
 int smc_clc_send_accept(struct smc_sock *smc, int srv_first_contact);

+ 46 - 38
net/smc/smc_core.c

@@ -30,6 +30,7 @@
 #define SMC_LGR_NUM_INCR		256
 #define SMC_LGR_FREE_DELAY_SERV		(600 * HZ)
 #define SMC_LGR_FREE_DELAY_CLNT		(SMC_LGR_FREE_DELAY_SERV + 10 * HZ)
+#define SMC_LGR_FREE_DELAY_FAST		(8 * HZ)
 
 static struct smc_lgr_list smc_lgr_list = {	/* established link groups */
 	.lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock),
@@ -51,6 +52,11 @@ static void smc_lgr_schedule_free_work(struct smc_link_group *lgr)
 			 SMC_LGR_FREE_DELAY_CLNT : SMC_LGR_FREE_DELAY_SERV);
 }
 
+void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr)
+{
+	mod_delayed_work(system_wq, &lgr->free_work, SMC_LGR_FREE_DELAY_FAST);
+}
+
 /* Register connection's alert token in our lookup structure.
  * To use rbtrees we have to implement our own insert core.
  * Requires @conns_lock
@@ -133,6 +139,20 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 	smc_lgr_schedule_free_work(lgr);
 }
 
+/* Send delete link, either as client to request the initiation
+ * of the DELETE LINK sequence from server; or as server to
+ * initiate the delete processing. See smc_llc_rx_delete_link().
+ */
+static int smc_link_send_delete(struct smc_link *lnk)
+{
+	if (lnk->state == SMC_LNK_ACTIVE &&
+	    !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, true)) {
+		smc_llc_link_deleting(lnk);
+		return 0;
+	}
+	return -ENOTCONN;
+}
+
 static void smc_lgr_free_work(struct work_struct *work)
 {
 	struct smc_link_group *lgr = container_of(to_delayed_work(work),
@@ -153,10 +173,21 @@ static void smc_lgr_free_work(struct work_struct *work)
 	list_del_init(&lgr->list); /* remove from smc_lgr_list */
 free:
 	spin_unlock_bh(&smc_lgr_list.lock);
+
+	if (!lgr->is_smcd && !lgr->terminating)	{
+		/* try to send del link msg, on error free lgr immediately */
+		if (!smc_link_send_delete(&lgr->lnk[SMC_SINGLE_LINK])) {
+			/* reschedule in case we never receive a response */
+			smc_lgr_schedule_free_work(lgr);
+			return;
+		}
+	}
+
 	if (!delayed_work_pending(&lgr->free_work)) {
-		if (!lgr->is_smcd &&
-		    lgr->lnk[SMC_SINGLE_LINK].state != SMC_LNK_INACTIVE)
-			smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+		struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+
+		if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
+			smc_llc_link_inactive(lnk);
 		smc_lgr_free(lgr);
 	}
 }
@@ -219,6 +250,10 @@ static int smc_lgr_create(struct smc_sock *smc, bool is_smcd,
 		get_random_bytes(rndvec, sizeof(rndvec));
 		lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
 			(rndvec[2] << 16);
+		rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
+					  vlan_id, lnk->gid, &lnk->sgid_index);
+		if (rc)
+			goto free_lgr;
 		rc = smc_llc_link_init(lnk);
 		if (rc)
 			goto free_lgr;
@@ -522,37 +557,6 @@ out:
 	return rc;
 }
 
-/* determine the link gid matching the vlan id of the link group */
-static int smc_link_determine_gid(struct smc_link_group *lgr)
-{
-	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
-	struct ib_gid_attr gattr;
-	union ib_gid gid;
-	int i;
-
-	if (!lgr->vlan_id) {
-		lnk->gid = lnk->smcibdev->gid[lnk->ibport - 1];
-		return 0;
-	}
-
-	for (i = 0; i < lnk->smcibdev->pattr[lnk->ibport - 1].gid_tbl_len;
-	     i++) {
-		if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid,
-				 &gattr))
-			continue;
-		if (gattr.ndev) {
-			if (is_vlan_dev(gattr.ndev) &&
-			    vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) {
-				lnk->gid = gid;
-				dev_put(gattr.ndev);
-				return 0;
-			}
-			dev_put(gattr.ndev);
-		}
-	}
-	return -ENODEV;
-}
-
 static bool smcr_lgr_match(struct smc_link_group *lgr,
 			   struct smc_clc_msg_local *lcl,
 			   enum smc_lgr_role role)
@@ -631,8 +635,6 @@ create:
 		if (rc)
 			goto out;
 		smc_lgr_register_conn(conn); /* add smc conn to lgr */
-		if (!is_smcd)
-			rc = smc_link_determine_gid(conn->lgr);
 	}
 	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
 	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
@@ -1013,8 +1015,14 @@ void smc_core_exit(void)
 	spin_unlock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, lg, &lgr_freeing_list, list) {
 		list_del_init(&lgr->list);
-		if (!lgr->is_smcd)
-			smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+		if (!lgr->is_smcd) {
+			struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+
+			if (lnk->state == SMC_LNK_ACTIVE)
+				smc_llc_send_delete_link(lnk, SMC_LLC_REQ,
+							 false);
+			smc_llc_link_inactive(lnk);
+		}
 		cancel_delayed_work_sync(&lgr->free_work);
 		smc_lgr_free(lgr); /* free link group */
 	}

+ 6 - 3
net/smc/smc_core.h

@@ -34,7 +34,8 @@ enum smc_lgr_role {		/* possible roles of a link group */
 enum smc_link_state {			/* possible states of a link */
 	SMC_LNK_INACTIVE,	/* link is inactive */
 	SMC_LNK_ACTIVATING,	/* link is being activated */
-	SMC_LNK_ACTIVE		/* link is active */
+	SMC_LNK_ACTIVE,		/* link is active */
+	SMC_LNK_DELETING,	/* link is being deleted */
 };
 
 #define SMC_WR_BUF_SIZE		48	/* size of work request buffer */
@@ -84,14 +85,15 @@ struct smc_link {
 	wait_queue_head_t	wr_reg_wait;	/* wait for wr_reg result */
 	enum smc_wr_reg_state	wr_reg_state;	/* state of wr_reg request */
 
-	union ib_gid		gid;		/* gid matching used vlan id */
+	u8			gid[SMC_GID_SIZE];/* gid matching used vlan id*/
+	u8			sgid_index;	/* gid index for vlan id      */
 	u32			peer_qpn;	/* QP number of peer */
 	enum ib_mtu		path_mtu;	/* used mtu */
 	enum ib_mtu		peer_mtu;	/* mtu size of peer */
 	u32			psn_initial;	/* QP tx initial packet seqno */
 	u32			peer_psn;	/* QP rx initial packet seqno */
 	u8			peer_mac[ETH_ALEN];	/* = gid[8:10||13:15] */
-	u8			peer_gid[sizeof(union ib_gid)];	/* gid of peer*/
+	u8			peer_gid[SMC_GID_SIZE];	/* gid of peer*/
 	u8			link_id;	/* unique # within link group */
 
 	enum smc_link_state	state;		/* state of link */
@@ -264,6 +266,7 @@ int smc_conn_create(struct smc_sock *smc, bool is_smcd, int srv_first_contact,
 		    struct smc_clc_msg_local *lcl, struct smcd_dev *smcd,
 		    u64 peer_gid);
 void smcd_conn_free(struct smc_connection *conn);
+void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
 void smc_core_exit(void);
 
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)

+ 7 - 1
net/smc/smc_diag.c

@@ -79,6 +79,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 			   struct nlattr *bc)
 {
 	struct smc_sock *smc = smc_sk(sk);
+	struct smc_diag_fallback fallback;
 	struct user_namespace *user_ns;
 	struct smc_diag_msg *r;
 	struct nlmsghdr *nlh;
@@ -101,6 +102,11 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 	if (smc_diag_msg_attrs_fill(sk, skb, r, user_ns))
 		goto errout;
 
+	fallback.reason = smc->fallback_rsn;
+	fallback.peer_diagnosis = smc->peer_diagnosis;
+	if (nla_put(skb, SMC_DIAG_FALLBACK, sizeof(fallback), &fallback) < 0)
+		goto errout;
+
 	if ((req->diag_ext & (1 << (SMC_DIAG_CONNINFO - 1))) &&
 	    smc->conn.alert_token_local) {
 		struct smc_connection *conn = &smc->conn;
@@ -154,7 +160,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
 		       smc->conn.lgr->lnk[0].smcibdev->ibdev->name,
 		       sizeof(smc->conn.lgr->lnk[0].smcibdev->ibdev->name));
 		smc_gid_be16_convert(linfo.lnk[0].gid,
-				     smc->conn.lgr->lnk[0].gid.raw);
+				     smc->conn.lgr->lnk[0].gid);
 		smc_gid_be16_convert(linfo.lnk[0].peer_gid,
 				     smc->conn.lgr->lnk[0].peer_gid);
 

+ 36 - 5
net/smc/smc_ib.c

@@ -68,7 +68,7 @@ static int smc_ib_modify_qp_rtr(struct smc_link *lnk)
 	qp_attr.path_mtu = min(lnk->path_mtu, lnk->peer_mtu);
 	qp_attr.ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
 	rdma_ah_set_port_num(&qp_attr.ah_attr, lnk->ibport);
-	rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, 0, 1, 0);
+	rdma_ah_set_grh(&qp_attr.ah_attr, NULL, 0, lnk->sgid_index, 1, 0);
 	rdma_ah_set_dgid_raw(&qp_attr.ah_attr, lnk->peer_gid);
 	memcpy(&qp_attr.ah_attr.roce.dmac, lnk->peer_mac,
 	       sizeof(lnk->peer_mac));
@@ -142,13 +142,13 @@ out:
 	return rc;
 }
 
-static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport)
+static int smc_ib_fill_mac(struct smc_ib_device *smcibdev, u8 ibport)
 {
 	struct ib_gid_attr gattr;
+	union ib_gid gid;
 	int rc;
 
-	rc = ib_query_gid(smcibdev->ibdev, ibport, 0,
-			  &smcibdev->gid[ibport - 1], &gattr);
+	rc = ib_query_gid(smcibdev->ibdev, ibport, 0, &gid, &gattr);
 	if (rc || !gattr.ndev)
 		return -ENODEV;
 
@@ -175,6 +175,37 @@ bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport)
 	return smcibdev->pattr[ibport - 1].state == IB_PORT_ACTIVE;
 }
 
+/* determine the gid for an ib-device port and vlan id */
+int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
+			 unsigned short vlan_id, u8 gid[], u8 *sgid_index)
+{
+	struct ib_gid_attr gattr;
+	union ib_gid _gid;
+	int i;
+
+	for (i = 0; i < smcibdev->pattr[ibport - 1].gid_tbl_len; i++) {
+		memset(&_gid, 0, SMC_GID_SIZE);
+		memset(&gattr, 0, sizeof(gattr));
+		if (ib_query_gid(smcibdev->ibdev, ibport, i, &_gid, &gattr))
+			continue;
+		if (!gattr.ndev)
+			continue;
+		if (((!vlan_id && !is_vlan_dev(gattr.ndev)) ||
+		     (vlan_id && is_vlan_dev(gattr.ndev) &&
+		      vlan_dev_vlan_id(gattr.ndev) == vlan_id)) &&
+		    gattr.gid_type == IB_GID_TYPE_IB) {
+			if (gid)
+				memcpy(gid, &_gid, SMC_GID_SIZE);
+			if (sgid_index)
+				*sgid_index = i;
+			dev_put(gattr.ndev);
+			return 0;
+		}
+		dev_put(gattr.ndev);
+	}
+	return -ENODEV;
+}
+
 static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
 {
 	int rc;
@@ -186,7 +217,7 @@ static int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport)
 	if (rc)
 		goto out;
 	/* the SMC protocol requires specification of the RoCE MAC address */
-	rc = smc_ib_fill_gid_and_mac(smcibdev, ibport);
+	rc = smc_ib_fill_mac(smcibdev, ibport);
 	if (rc)
 		goto out;
 	if (!strncmp(local_systemid, SMC_LOCAL_SYSTEMID_RESET,

+ 2 - 1
net/smc/smc_ib.h

@@ -40,7 +40,6 @@ struct smc_ib_device {				/* ib-device infos for smc */
 	struct tasklet_struct	recv_tasklet;	/* called by recv cq handler */
 	char			mac[SMC_MAX_PORTS][ETH_ALEN];
 						/* mac address per port*/
-	union ib_gid		gid[SMC_MAX_PORTS]; /* gid per port */
 	u8			pnetid[SMC_MAX_PORTS][SMC_MAX_PNETID_LEN];
 						/* pnetid per port */
 	u8			initialized : 1; /* ib dev CQ, evthdl done */
@@ -77,4 +76,6 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
 void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
 			       struct smc_buf_desc *buf_slot,
 			       enum dma_data_direction data_direction);
+int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
+			 unsigned short vlan_id, u8 gid[], u8 *sgid_index);
 #endif

+ 27 - 23
net/smc/smc_llc.c

@@ -182,8 +182,7 @@ static int smc_llc_add_pending_send(struct smc_link *link,
 }
 
 /* high-level API to send LLC confirm link */
-int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
-			      union ib_gid *gid,
+int smc_llc_send_confirm_link(struct smc_link *link,
 			      enum smc_llc_reqresp reqresp)
 {
 	struct smc_link_group *lgr = smc_get_lgr(link);
@@ -202,8 +201,9 @@ int smc_llc_send_confirm_link(struct smc_link *link, u8 mac[],
 	confllc->hd.flags |= SMC_LLC_FLAG_NO_RMBE_EYEC;
 	if (reqresp == SMC_LLC_RESP)
 		confllc->hd.flags |= SMC_LLC_FLAG_RESP;
-	memcpy(confllc->sender_mac, mac, ETH_ALEN);
-	memcpy(confllc->sender_gid, gid, SMC_GID_SIZE);
+	memcpy(confllc->sender_mac, link->smcibdev->mac[link->ibport - 1],
+	       ETH_ALEN);
+	memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
 	hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
 	confllc->link_num = link->link_id;
 	memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
@@ -240,8 +240,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link,
 
 /* prepare an add link message */
 static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
-				  struct smc_link *link, u8 mac[],
-				  union ib_gid *gid,
+				  struct smc_link *link, u8 mac[], u8 gid[],
 				  enum smc_llc_reqresp reqresp)
 {
 	memset(addllc, 0, sizeof(*addllc));
@@ -258,8 +257,7 @@ static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
 }
 
 /* send ADD LINK request or response */
-int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
-			  union ib_gid *gid,
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
 			  enum smc_llc_reqresp reqresp)
 {
 	struct smc_llc_msg_add_link *addllc;
@@ -280,7 +278,7 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[],
 /* prepare a delete link message */
 static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
 				     struct smc_link *link,
-				     enum smc_llc_reqresp reqresp)
+				     enum smc_llc_reqresp reqresp, bool orderly)
 {
 	memset(delllc, 0, sizeof(*delllc));
 	delllc->hd.common.type = SMC_LLC_DELETE_LINK;
@@ -289,13 +287,14 @@ static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
 		delllc->hd.flags |= SMC_LLC_FLAG_RESP;
 	/* DEL_LINK_ALL because only 1 link supported */
 	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
-	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+	if (orderly)
+		delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
 	delllc->link_num = link->link_id;
 }
 
 /* send DELETE LINK request or response */
 int smc_llc_send_delete_link(struct smc_link *link,
-			     enum smc_llc_reqresp reqresp)
+			     enum smc_llc_reqresp reqresp, bool orderly)
 {
 	struct smc_llc_msg_del_link *delllc;
 	struct smc_wr_tx_pend_priv *pend;
@@ -306,7 +305,7 @@ int smc_llc_send_delete_link(struct smc_link *link,
 	if (rc)
 		return rc;
 	delllc = (struct smc_llc_msg_del_link *)wr_buf;
-	smc_llc_prep_delete_link(delllc, link, reqresp);
+	smc_llc_prep_delete_link(delllc, link, reqresp, orderly);
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
 	return rc;
@@ -422,14 +421,12 @@ static void smc_llc_rx_add_link(struct smc_link *link,
 		if (lgr->role == SMC_SERV) {
 			smc_llc_prep_add_link(llc, link,
 					link->smcibdev->mac[link->ibport - 1],
-					&link->smcibdev->gid[link->ibport - 1],
-					SMC_LLC_REQ);
+					link->gid, SMC_LLC_REQ);
 
 		} else {
 			smc_llc_prep_add_link(llc, link,
 					link->smcibdev->mac[link->ibport - 1],
-					&link->smcibdev->gid[link->ibport - 1],
-					SMC_LLC_RESP);
+					link->gid, SMC_LLC_RESP);
 		}
 		smc_llc_send_message(link, llc, sizeof(*llc));
 	}
@@ -442,17 +439,19 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
 
 	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
 		if (lgr->role == SMC_SERV)
-			smc_lgr_terminate(lgr);
+			smc_lgr_schedule_free_work_fast(lgr);
 	} else {
+		smc_lgr_forget(lgr);
+		smc_llc_link_deleting(link);
 		if (lgr->role == SMC_SERV) {
-			smc_lgr_forget(lgr);
-			smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ);
-			smc_llc_send_message(link, llc, sizeof(*llc));
+			/* client asks to delete this link, send request */
+			smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
 		} else {
-			smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP);
-			smc_llc_send_message(link, llc, sizeof(*llc));
-			smc_lgr_terminate(lgr);
+			/* server requests to delete this link, send response */
+			smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
 		}
+		smc_llc_send_message(link, llc, sizeof(*llc));
+		smc_lgr_schedule_free_work_fast(lgr);
 	}
 }
 
@@ -626,6 +625,11 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time)
 	}
 }
 
+void smc_llc_link_deleting(struct smc_link *link)
+{
+	link->state = SMC_LNK_DELETING;
+}
+
 /* called in tasklet context */
 void smc_llc_link_inactive(struct smc_link *link)
 {

+ 4 - 3
net/smc/smc_llc.h

@@ -36,14 +36,15 @@ enum smc_llc_msg_type {
 };
 
 /* transmit */
-int smc_llc_send_confirm_link(struct smc_link *lnk, u8 mac[], union ib_gid *gid,
+int smc_llc_send_confirm_link(struct smc_link *lnk,
 			      enum smc_llc_reqresp reqresp);
-int smc_llc_send_add_link(struct smc_link *link, u8 mac[], union ib_gid *gid,
+int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
 			  enum smc_llc_reqresp reqresp);
 int smc_llc_send_delete_link(struct smc_link *link,
-			     enum smc_llc_reqresp reqresp);
+			     enum smc_llc_reqresp reqresp, bool orderly);
 int smc_llc_link_init(struct smc_link *link);
 void smc_llc_link_active(struct smc_link *link, int testlink_time);
+void smc_llc_link_deleting(struct smc_link *link);
 void smc_llc_link_inactive(struct smc_link *link);
 void smc_llc_link_clear(struct smc_link *link);
 int smc_llc_do_confirm_rkey(struct smc_link *link,

+ 21 - 9
net/smc/smc_pnet.c

@@ -535,11 +535,13 @@ static struct net_device *pnet_find_base_ndev(struct net_device *ndev)
 }
 
 /* Determine the corresponding IB device port based on the hardware PNETID.
- * Searching stops at the first matching active IB device port.
+ * Searching stops at the first matching active IB device port with vlan_id
+ * configured.
  */
 static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
 					 struct smc_ib_device **smcibdev,
-					 u8 *ibport)
+					 u8 *ibport, unsigned short vlan_id,
+					 u8 gid[])
 {
 	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
 	struct smc_ib_device *ibdev;
@@ -553,15 +555,20 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
 	spin_lock(&smc_ib_devices.lock);
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
 		for (i = 1; i <= SMC_MAX_PORTS; i++) {
+			if (!rdma_is_port_valid(ibdev->ibdev, i))
+				continue;
 			if (!memcmp(ibdev->pnetid[i - 1], ndev_pnetid,
 				    SMC_MAX_PNETID_LEN) &&
-			    smc_ib_port_active(ibdev, i)) {
+			    smc_ib_port_active(ibdev, i) &&
+			    !smc_ib_determine_gid(ibdev, i, vlan_id, gid,
+						  NULL))  {
 				*smcibdev = ibdev;
 				*ibport = i;
-				break;
+				goto out;
 			}
 		}
 	}
+out:
 	spin_unlock(&smc_ib_devices.lock);
 }
 
@@ -589,7 +596,8 @@ static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
 /* Lookup of coupled ib_device via SMC pnet table */
 static void smc_pnet_find_roce_by_table(struct net_device *netdev,
 					struct smc_ib_device **smcibdev,
-					u8 *ibport)
+					u8 *ibport, unsigned short vlan_id,
+					u8 gid[])
 {
 	struct smc_pnetentry *pnetelem;
 
@@ -597,7 +605,10 @@ static void smc_pnet_find_roce_by_table(struct net_device *netdev,
 	list_for_each_entry(pnetelem, &smc_pnettable.pnetlist, list) {
 		if (netdev == pnetelem->ndev) {
 			if (smc_ib_port_active(pnetelem->smcibdev,
-					       pnetelem->ib_port)) {
+					       pnetelem->ib_port) &&
+			    !smc_ib_determine_gid(pnetelem->smcibdev,
+						  pnetelem->ib_port, vlan_id,
+						  gid, NULL)) {
 				*smcibdev = pnetelem->smcibdev;
 				*ibport = pnetelem->ib_port;
 			}
@@ -612,7 +623,8 @@ static void smc_pnet_find_roce_by_table(struct net_device *netdev,
  * ethernet interface.
  */
 void smc_pnet_find_roce_resource(struct sock *sk,
-				 struct smc_ib_device **smcibdev, u8 *ibport)
+				 struct smc_ib_device **smcibdev, u8 *ibport,
+				 unsigned short vlan_id, u8 gid[])
 {
 	struct dst_entry *dst = sk_dst_get(sk);
 
@@ -625,12 +637,12 @@ void smc_pnet_find_roce_resource(struct sock *sk,
 		goto out_rel;
 
 	/* if possible, lookup via hardware-defined pnetid */
-	smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport);
+	smc_pnet_find_roce_by_pnetid(dst->dev, smcibdev, ibport, vlan_id, gid);
 	if (*smcibdev)
 		goto out_rel;
 
 	/* lookup via SMC PNET table */
-	smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport);
+	smc_pnet_find_roce_by_table(dst->dev, smcibdev, ibport, vlan_id, gid);
 
 out_rel:
 	dst_release(dst);

+ 2 - 1
net/smc/smc_pnet.h

@@ -33,7 +33,8 @@ int smc_pnet_init(void) __init;
 void smc_pnet_exit(void);
 int smc_pnet_remove_by_ibdev(struct smc_ib_device *ibdev);
 void smc_pnet_find_roce_resource(struct sock *sk,
-				 struct smc_ib_device **smcibdev, u8 *ibport);
+				 struct smc_ib_device **smcibdev, u8 *ibport,
+				 unsigned short vlan_id, u8 gid[]);
 void smc_pnet_find_ism_resource(struct sock *sk, struct smcd_dev **smcismdev);
 
 #endif

+ 2 - 5
net/smc/smc_wr.c

@@ -182,17 +182,14 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
 		if (rc)
 			return rc;
 	} else {
-		struct smc_link_group *lgr;
-
-		lgr = smc_get_lgr(link);
 		rc = wait_event_timeout(
 			link->wr_tx_wait,
-			list_empty(&lgr->list) || /* lgr terminated */
+			link->state == SMC_LNK_INACTIVE ||
 			(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
 			SMC_WR_TX_WAIT_FREE_SLOT_TIME);
 		if (!rc) {
 			/* timeout - terminate connections */
-			smc_lgr_terminate(lgr);
+			smc_lgr_terminate(smc_get_lgr(link));
 			return -EPIPE;
 		}
 		if (idx == link->wr_tx_cnt)