10 years ago · d2fe937ce6
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -307,111 +307,6 @@ static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int
			
 
				-ipoib_mcast_sendonly_join_complete(int status,
			
 
				-				   struct ib_sa_multicast *multicast)
			
 
				-{
			
 
				-	struct ipoib_mcast *mcast = multicast->context;
			
 
				-	struct net_device *dev = mcast->dev;
			
 
				-	struct ipoib_dev_priv *priv = netdev_priv(dev);
			
 
				-
			
 
				-	/*
			
 
				-	 * We have to take the mutex to force mcast_sendonly_join to
			
 
				-	 * return from ib_sa_multicast_join and set mcast->mc to a
			
 
				-	 * valid value.  Otherwise we were racing with ourselves in
			
 
				-	 * that we might fail here, but get a valid return from
			
 
				-	 * ib_sa_multicast_join after we had cleared mcast->mc here,
			
 
				-	 * resulting in mis-matched joins and leaves and a deadlock
			
 
				-	 */
			
 
				-	mutex_lock(&mcast_mutex);
			
 
				-
			
 
				-	/* We trap for port events ourselves. */
			
 
				-	if (status == -ENETRESET) {
			
 
				-		status = 0;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (!status)
			
 
				-		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
			
 
				-
			
 
				-	if (status) {
			
 
				-		if (mcast->logcount++ < 20)
			
 
				-			ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast "
			
 
				-					"join failed for %pI6, status %d\n",
			
 
				-					mcast->mcmember.mgid.raw, status);
			
 
				-
			
 
				-		/* Flush out any queued packets */
			
 
				-		netif_tx_lock_bh(dev);
			
 
				-		while (!skb_queue_empty(&mcast->pkt_queue)) {
			
 
				-			++dev->stats.tx_dropped;
			
 
				-			dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
			
 
				-		}
			
 
				-		netif_tx_unlock_bh(dev);
			
 
				-		__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
			
 
				-	} else {
			
 
				-		mcast->backoff = 1;
			
 
				-		mcast->delay_until = jiffies;
			
 
				-		__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
			
 
				-	}
			
 
				-out:
			
 
				-	clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
			
 
				-	if (status)
			
 
				-		mcast->mc = NULL;
			
 
				-	complete(&mcast->done);
			
 
				-	mutex_unlock(&mcast_mutex);
			
 
				-	return status;
			
 
				-}
			
 
				-
			
 
				-static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
			
 
				-{
			
 
				-	struct net_device *dev = mcast->dev;
			
 
				-	struct ipoib_dev_priv *priv = netdev_priv(dev);
			
 
				-	struct ib_sa_mcmember_rec rec = {
			
 
				-#if 0				/* Some SMs don't support send-only yet */
			
 
				-		.join_state = 4
			
 
				-#else
			
 
				-		.join_state = 1
			
 
				-#endif
			
 
				-	};
			
 
				-	int ret = 0;
			
 
				-
			
 
				-	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
			
 
				-		ipoib_dbg_mcast(priv, "device shutting down, no sendonly "
			
 
				-				"multicast joins\n");
			
 
				-		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
			
 
				-		complete(&mcast->done);
			
 
				-		return -ENODEV;
			
 
				-	}
			
 
				-
			
 
				-	rec.mgid     = mcast->mcmember.mgid;
			
 
				-	rec.port_gid = priv->local_gid;
			
 
				-	rec.pkey     = cpu_to_be16(priv->pkey);
			
 
				-
			
 
				-	mutex_lock(&mcast_mutex);
			
 
				-	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
			
 
				-					 priv->port, &rec,
			
 
				-					 IB_SA_MCMEMBER_REC_MGID	|
			
 
				-					 IB_SA_MCMEMBER_REC_PORT_GID	|
			
 
				-					 IB_SA_MCMEMBER_REC_PKEY	|
			
 
				-					 IB_SA_MCMEMBER_REC_JOIN_STATE,
			
 
				-					 GFP_ATOMIC,
			
 
				-					 ipoib_mcast_sendonly_join_complete,
			
 
				-					 mcast);
			
 
				-	if (IS_ERR(mcast->mc)) {
			
 
				-		ret = PTR_ERR(mcast->mc);
			
 
				-		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
			
 
				-		ipoib_warn(priv, "ib_sa_join_multicast for sendonly join "
			
 
				-			   "failed (ret = %d)\n", ret);
			
 
				-		complete(&mcast->done);
			
 
				-	} else {
			
 
				-		ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting "
			
 
				-				"sendonly join\n", mcast->mcmember.mgid.raw);
			
 
				-	}
			
 
				-	mutex_unlock(&mcast_mutex);
			
 
				-
			
 
				-	return ret;
			
 
				-}
			
 
				-
			
 
				 void ipoib_mcast_carrier_on_task(struct work_struct *work)
			
 
				 {
			
 
				 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
			
@@ -452,7 +347,9 @@ static int ipoib_mcast_join_complete(int status,
 
				 	struct net_device *dev = mcast->dev;
			
 
				 	struct ipoib_dev_priv *priv = netdev_priv(dev);
			
 
				 
			
 
				-	ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
			
 
				+	ipoib_dbg_mcast(priv, "%sjoin completion for %pI6 (status %d)\n",
			
 
				+			test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ?
			
 
				+			"sendonly " : "",
			
 
				 			mcast->mcmember.mgid.raw, status);
			
 
				 
			
 
				 	/*
			
@@ -477,27 +374,52 @@ static int ipoib_mcast_join_complete(int status,
 
				 	if (!status) {
			
 
				 		mcast->backoff = 1;
			
 
				 		mcast->delay_until = jiffies;
			
 
				-		__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
			
 
				 
			
 
				 		/*
			
 
				 		 * Defer carrier on work to priv->wq to avoid a
			
 
				-		 * deadlock on rtnl_lock here.
			
 
				+		 * deadlock on rtnl_lock here.  Requeue our multicast
			
 
				+		 * work too, which will end up happening right after
			
 
				+		 * our carrier on task work and will allow us to
			
 
				+		 * send out all of the non-broadcast joins
			
 
				 		 */
			
 
				-		if (mcast == priv->broadcast)
			
 
				+		if (mcast == priv->broadcast) {
			
 
				 			queue_work(priv->wq, &priv->carrier_on_task);
			
 
				+			__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
			
 
				+		}
			
 
				 	} else {
			
 
				 		if (mcast->logcount++ < 20) {
			
 
				 			if (status == -ETIMEDOUT || status == -EAGAIN) {
			
 
				-				ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
			
 
				+				ipoib_dbg_mcast(priv, "%smulticast join failed for %pI6, status %d\n",
			
 
				+						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
			
 
				 						mcast->mcmember.mgid.raw, status);
			
 
				 			} else {
			
 
				-				ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
			
 
				+				ipoib_warn(priv, "%smulticast join failed for %pI6, status %d\n",
			
 
				+						test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ? "sendonly " : "",
			
 
				 					   mcast->mcmember.mgid.raw, status);
			
 
				 			}
			
 
				 		}
			
 
				 
			
 
				-		/* Requeue this join task with a backoff delay */
			
 
				-		__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
			
 
				+		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) &&
			
 
				+		    mcast->backoff >= 2) {
			
 
				+			/*
			
 
				+			 * We only retry sendonly joins once before we drop
			
 
				+			 * the packet and quit trying to deal with the
			
 
				+			 * group.  However, we leave the group in the
			
 
				+			 * mcast list as an unjoined group.  If we want to
			
 
				+			 * try joining again, we simply queue up a packet
			
 
				+			 * and restart the join thread.  The empty queue
			
 
				+			 * is why the join thread ignores this group.
			
 
				+			 */
			
 
				+			mcast->backoff = 1;
			
 
				+			netif_tx_lock_bh(dev);
			
 
				+			while (!skb_queue_empty(&mcast->pkt_queue)) {
			
 
				+				++dev->stats.tx_dropped;
			
 
				+				dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
			
 
				+			}
			
 
				+			netif_tx_unlock_bh(dev);
			
 
				+		} else
			
 
				+			/* Requeue this join task with a backoff delay */
			
 
				+			__ipoib_mcast_schedule_join_thread(priv, mcast, 1);
			
 
				 	}
			
 
				 out:
			
 
				 	clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
			
@@ -650,45 +572,45 @@ void ipoib_mcast_join_task(struct work_struct *work)
 
				 	list_for_each_entry(mcast, &priv->multicast_list, list) {
			
 
				 		if (IS_ERR_OR_NULL(mcast->mc) &&
			
 
				 		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
			
 
				-		    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
			
 
				+		    (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags) ||
			
 
				+		     !skb_queue_empty(&mcast->pkt_queue))) {
			
 
				 			if (mcast->backoff == 1 ||
			
 
				-			    time_after_eq(jiffies, mcast->delay_until))
			
 
				+			    time_after_eq(jiffies, mcast->delay_until)) {
			
 
				 				/* Found the next unjoined group */
			
 
				-				break;
			
 
				-			else if (!delay_until ||
			
 
				+				init_completion(&mcast->done);
			
 
				+				set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
			
 
				+				if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
			
 
				+					create = 0;
			
 
				+				else
			
 
				+					create = 1;
			
 
				+				spin_unlock_irq(&priv->lock);
			
 
				+				mutex_unlock(&mcast_mutex);
			
 
				+				ipoib_mcast_join(dev, mcast, create);
			
 
				+				mutex_lock(&mcast_mutex);
			
 
				+				spin_lock_irq(&priv->lock);
			
 
				+			} else if (!delay_until ||
			
 
				 				 time_before(mcast->delay_until, delay_until))
			
 
				 				delay_until = mcast->delay_until;
			
 
				 		}
			
 
				 	}
			
 
				 
			
 
				-	if (&mcast->list == &priv->multicast_list) {
			
 
				-		/*
			
 
				-		 * All done, unless we have delayed work from
			
 
				-		 * backoff retransmissions, but we will get
			
 
				-		 * restarted when the time is right, so we are
			
 
				-		 * done for now
			
 
				-		 */
			
 
				-		mcast = NULL;
			
 
				-		ipoib_dbg_mcast(priv, "successfully joined all "
			
 
				-				"multicast groups\n");
			
 
				-	}
			
 
				+	mcast = NULL;
			
 
				+	ipoib_dbg_mcast(priv, "successfully started all multicast joins\n");
			
 
				 
			
 
				 out:
			
 
				+	if (delay_until) {
			
 
				+		cancel_delayed_work(&priv->mcast_task);
			
 
				+		queue_delayed_work(priv->wq, &priv->mcast_task,
			
 
				+				   delay_until - jiffies);
			
 
				+	}
			
 
				 	if (mcast) {
			
 
				 		init_completion(&mcast->done);
			
 
				 		set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
			
 
				 	}
			
 
				 	spin_unlock_irq(&priv->lock);
			
 
				 	mutex_unlock(&mcast_mutex);
			
 
				-	if (mcast) {
			
 
				-		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
			
 
				-			ipoib_mcast_sendonly_join(mcast);
			
 
				-		else
			
 
				-			ipoib_mcast_join(dev, mcast, create);
			
 
				-	}
			
 
				-	if (delay_until)
			
 
				-		queue_delayed_work(priv->wq, &priv->mcast_task,
			
 
				-				   delay_until - jiffies);
			
 
				+	if (mcast)
			
 
				+		ipoib_mcast_join(dev, mcast, create);
			
 
				 }
			
 
				 
			
 
				 int ipoib_mcast_start_thread(struct net_device *dev)
			
@@ -731,8 +653,6 @@ static int ipoib_mcast_leave(struct net_device *dev, struct ipoib_mcast *mcast)
 
				 
			
 
				 	if (!IS_ERR_OR_NULL(mcast->mc))
			
 
				 		ib_sa_free_multicast(mcast->mc);
			
 
				-	else
			
 
				-		ipoib_dbg(priv, "ipoib_mcast_leave with mcast->mc invalid\n");
			
 
				 
			
 
				 	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
			
 
				 		ipoib_dbg_mcast(priv, "leaving MGID %pI6\n",
			
@@ -768,43 +688,37 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb)
 
				 	}
			
 
				 
			
 
				 	mcast = __ipoib_mcast_find(dev, mgid);
			
 
				-	if (!mcast) {
			
 
				-		/* Let's create a new send only group now */
			
 
				-		ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
			
 
				-				mgid);
			
 
				-
			
 
				-		mcast = ipoib_mcast_alloc(dev, 0);
			
 
				+	if (!mcast || !mcast->ah) {
			
 
				 		if (!mcast) {
			
 
				-			ipoib_warn(priv, "unable to allocate memory for "
			
 
				-				   "multicast structure\n");
			
 
				-			++dev->stats.tx_dropped;
			
 
				-			dev_kfree_skb_any(skb);
			
 
				-			goto out;
			
 
				-		}
			
 
				-
			
 
				-		set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
			
 
				-		memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
			
 
				-		__ipoib_mcast_add(dev, mcast);
			
 
				-		list_add_tail(&mcast->list, &priv->multicast_list);
			
 
				-		__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
			
 
				-	}
			
 
				+			/* Let's create a new send only group now */
			
 
				+			ipoib_dbg_mcast(priv, "setting up send only multicast group for %pI6\n",
			
 
				+					mgid);
			
 
				+
			
 
				+			mcast = ipoib_mcast_alloc(dev, 0);
			
 
				+			if (!mcast) {
			
 
				+				ipoib_warn(priv, "unable to allocate memory "
			
 
				+					   "for multicast structure\n");
			
 
				+				++dev->stats.tx_dropped;
			
 
				+				dev_kfree_skb_any(skb);
			
 
				+				goto unlock;
			
 
				+			}
			
 
				 
			
 
				-	if (!mcast->ah) {
			
 
				+			set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
			
 
				+			memcpy(mcast->mcmember.mgid.raw, mgid,
			
 
				+			       sizeof (union ib_gid));
			
 
				+			__ipoib_mcast_add(dev, mcast);
			
 
				+			list_add_tail(&mcast->list, &priv->multicast_list);
			
 
				+		}
			
 
				 		if (skb_queue_len(&mcast->pkt_queue) < IPOIB_MAX_MCAST_QUEUE)
			
 
				 			skb_queue_tail(&mcast->pkt_queue, skb);
			
 
				 		else {
			
 
				 			++dev->stats.tx_dropped;
			
 
				 			dev_kfree_skb_any(skb);
			
 
				 		}
			
 
				-		/*
			
 
				-		 * If lookup completes between here and out:, don't
			
 
				-		 * want to send packet twice.
			
 
				-		 */
			
 
				-		mcast = NULL;
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	if (mcast && mcast->ah) {
			
 
				+		if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
			
 
				+			__ipoib_mcast_schedule_join_thread(priv, NULL, 0);
			
 
				+		}
			
 
				+	} else {
			
 
				 		struct ipoib_neigh *neigh;
			
 
				 
			
 
				 		spin_unlock_irqrestore(&priv->lock, flags);