10 years ago · c60cd8c505
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -328,6 +328,17 @@ struct adapter_params {
 
				 	unsigned int max_ird_adapter;     /* Max read depth per adapter */
			
 
				 };
			
 
				 
			
 
				+/* State needed to monitor the forward progress of SGE Ingress DMA activities
			
 
				+ * and possible hangs.
			
 
				+ */
			
 
				+struct sge_idma_monitor_state {
			
 
				+	unsigned int idma_1s_thresh;	/* 1s threshold in Core Clock ticks */
			
 
				+	unsigned int idma_stalled[2];	/* synthesized stalled timers in HZ */
			
 
				+	unsigned int idma_state[2];	/* IDMA Hang detect state */
			
 
				+	unsigned int idma_qid[2];	/* IDMA Hung Ingress Queue ID */
			
 
				+	unsigned int idma_warn[2];	/* time to warning in HZ */
			
 
				+};
			
 
				+
			
 
				 #include "t4fw_api.h"
			
 
				 
			
 
				 #define FW_VERSION(chip) ( \
			
@@ -630,12 +641,7 @@ struct sge {
 
				 	u32 fl_align;               /* response queue message alignment */
			
 
				 	u32 fl_starve_thres;        /* Free List starvation threshold */
			
 
				 
			
 
				-	/* State variables for detecting an SGE Ingress DMA hang */
			
 
				-	unsigned int idma_1s_thresh;/* SGE same State Counter 1s threshold */
			
 
				-	unsigned int idma_stalled[2];/* SGE synthesized stalled timers in HZ */
			
 
				-	unsigned int idma_state[2]; /* SGE IDMA Hang detect state */
			
 
				-	unsigned int idma_qid[2];   /* SGE IDMA Hung Ingress Queue ID */
			
 
				-
			
 
				+	struct sge_idma_monitor_state idma_monitor;
			
 
				 	unsigned int egr_start;
			
 
				 	unsigned int egr_sz;
			
 
				 	unsigned int ingr_start;
			
@@ -1055,7 +1061,7 @@ int t4_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
 
				 int t4_ofld_send(struct adapter *adap, struct sk_buff *skb);
			
 
				 int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
			
 
				 		     struct net_device *dev, int intr_idx,
			
 
				-		     struct sge_fl *fl, rspq_handler_t hnd);
			
 
				+		     struct sge_fl *fl, rspq_handler_t hnd, int cong);
			
 
				 int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
			
 
				 			 struct net_device *dev, struct netdev_queue *netdevq,
			
 
				 			 unsigned int iqid);
			
@@ -1215,6 +1221,7 @@ int t4_mc_read(struct adapter *adap, int idx, u32 addr, __be32 *data,
 
				 	       u64 *parity);
			
 
				 int t4_edc_read(struct adapter *adap, int idx, u32 addr, __be32 *data,
			
 
				 		u64 *parity);
			
 
				+unsigned int t4_get_mps_bg_map(struct adapter *adapter, int idx);
			
 
				 void t4_pmtx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]);
			
 
				 void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[]);
			
 
				 int t4_read_cim_ibq(struct adapter *adap, unsigned int qid, u32 *data,
			
@@ -1310,4 +1317,9 @@ int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox,
 
				 			 u32 addr, u32 val);
			
 
				 void t4_sge_decode_idma_state(struct adapter *adapter, int state);
			
 
				 void t4_free_mem(void *addr);
			
 
				+void t4_idma_monitor_init(struct adapter *adapter,
			
 
				+			  struct sge_idma_monitor_state *idma);
			
 
				+void t4_idma_monitor(struct adapter *adapter,
			
 
				+		     struct sge_idma_monitor_state *idma,
			
 
				+		     int hz, int ticks);
			
 
				 #endif /* __CXGB4_H__ */
			
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -977,7 +977,7 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q,
 
				 		err = t4_sge_alloc_rxq(adap, &q->rspq, false,
			
 
				 				       adap->port[i / per_chan],
			
 
				 				       msi_idx, q->fl.size ? &q->fl : NULL,
			
 
				-				       uldrx_handler);
			
 
				+				       uldrx_handler, 0);
			
 
				 		if (err)
			
 
				 			return err;
			
 
				 		memset(&q->stats, 0, sizeof(q->stats));
			
@@ -1007,7 +1007,7 @@ static int setup_sge_queues(struct adapter *adap)
 
				 		msi_idx = 1;         /* vector 0 is for non-queue interrupts */
			
 
				 	else {
			
 
				 		err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0,
			
 
				-				       NULL, NULL);
			
 
				+				       NULL, NULL, -1);
			
 
				 		if (err)
			
 
				 			return err;
			
 
				 		msi_idx = -((int)s->intrq.abs_id + 1);
			
@@ -1027,7 +1027,7 @@ static int setup_sge_queues(struct adapter *adap)
 
				 	 *    new/deleted queues.
			
 
				 	 */
			
 
				 	err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0],
			
 
				-			       msi_idx, NULL, fwevtq_handler);
			
 
				+			       msi_idx, NULL, fwevtq_handler, -1);
			
 
				 	if (err) {
			
 
				 freeout:	t4_free_sge_resources(adap);
			
 
				 		return err;
			
@@ -1044,7 +1044,9 @@ freeout:	t4_free_sge_resources(adap);
 
				 				msi_idx++;
			
 
				 			err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev,
			
 
				 					       msi_idx, &q->fl,
			
 
				-					       t4_ethrx_handler);
			
 
				+					       t4_ethrx_handler,
			
 
				+					       t4_get_mps_bg_map(adap,
			
 
				+								 pi->tx_chan));
			
 
				 			if (err)
			
 
				 				goto freeout;
			
 
				 			q->rspq.idx = j;
			
@@ -2432,6 +2434,7 @@ static void uld_attach(struct adapter *adap, unsigned int uld)
 
				 	lli.max_ordird_qp = adap->params.max_ordird_qp;
			
 
				 	lli.max_ird_adapter = adap->params.max_ird_adapter;
			
 
				 	lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl;
			
 
				+	lli.nodeid = dev_to_node(adap->pdev_dev);
			
 
				 
			
 
				 	handle = ulds[uld].add(&lli);
			
 
				 	if (IS_ERR(handle)) {
			
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -264,6 +264,7 @@ struct cxgb4_lld_info {
 
				 	unsigned int max_ordird_qp;          /* Max ORD/IRD depth per RDMA QP */
			
 
				 	unsigned int max_ird_adapter;        /* Max IRD memory per adapter */
			
 
				 	bool ulptx_memwrite_dsgl;            /* use of T5 DSGL allowed */
			
 
				+	int nodeid;			     /* device numa node id */
			
 
				 };
			
 
				 
			
 
				 struct cxgb4_uld_info {
			
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -100,16 +100,6 @@
 
				  */
			
 
				 #define TX_QCHECK_PERIOD (HZ / 2)
			
 
				 
			
 
				-/* SGE Hung Ingress DMA Threshold Warning time (in Hz) and Warning Repeat Rate
			
 
				- * (in RX_QCHECK_PERIOD multiples).  If we find one of the SGE Ingress DMA
			
 
				- * State Machines in the same state for this amount of time (in HZ) then we'll
			
 
				- * issue a warning about a potential hang.  We'll repeat the warning as the
			
 
				- * SGE Ingress DMA Channel appears to be hung every N RX_QCHECK_PERIODs till
			
 
				- * the situation clears.  If the situation clears, we'll note that as well.
			
 
				- */
			
 
				-#define SGE_IDMA_WARN_THRESH (1 * HZ)
			
 
				-#define SGE_IDMA_WARN_REPEAT (20 * RX_QCHECK_PERIOD)
			
 
				-
			
 
				 /*
			
 
				  * Max number of Tx descriptors to be reclaimed by the Tx timer.
			
 
				  */
			
@@ -1130,7 +1120,6 @@ cxgb_fcoe_offload(struct sk_buff *skb, struct adapter *adap,
 
				  */
			
 
				 netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
			
 
				 {
			
 
				-	int len;
			
 
				 	u32 wr_mid;
			
 
				 	u64 cntrl, *end;
			
 
				 	int qidx, credits;
			
@@ -1143,6 +1132,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 
				 	const struct skb_shared_info *ssi;
			
 
				 	dma_addr_t addr[MAX_SKB_FRAGS + 1];
			
 
				 	bool immediate = false;
			
 
				+	int len, max_pkt_len;
			
 
				 #ifdef CONFIG_CHELSIO_T4_FCOE
			
 
				 	int err;
			
 
				 #endif /* CONFIG_CHELSIO_T4_FCOE */
			
@@ -1156,6 +1146,13 @@ out_free:	dev_kfree_skb_any(skb);
 
				 		return NETDEV_TX_OK;
			
 
				 	}
			
 
				 
			
 
				+	/* Discard the packet if the length is greater than mtu */
			
 
				+	max_pkt_len = ETH_HLEN + dev->mtu;
			
 
				+	if (skb_vlan_tag_present(skb))
			
 
				+		max_pkt_len += VLAN_HLEN;
			
 
				+	if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
			
 
				+		goto out_free;
			
 
				+
			
 
				 	pi = netdev_priv(dev);
			
 
				 	adap = pi->adapter;
			
 
				 	qidx = skb_get_queue_mapping(skb);
			
@@ -2279,7 +2276,7 @@ irq_handler_t t4_intr_handler(struct adapter *adap)
 
				 static void sge_rx_timer_cb(unsigned long data)
			
 
				 {
			
 
				 	unsigned long m;
			
 
				-	unsigned int i, idma_same_state_cnt[2];
			
 
				+	unsigned int i;
			
 
				 	struct adapter *adap = (struct adapter *)data;
			
 
				 	struct sge *s = &adap->sge;
			
 
				 
			
@@ -2300,67 +2297,16 @@ static void sge_rx_timer_cb(unsigned long data)
 
				 					set_bit(id, s->starving_fl);
			
 
				 			}
			
 
				 		}
			
 
				+	/* The remainder of the SGE RX Timer Callback routine is dedicated to
			
 
				+	 * global Master PF activities like checking for chip ingress stalls,
			
 
				+	 * etc.
			
 
				+	 */
			
 
				+	if (!(adap->flags & MASTER_PF))
			
 
				+		goto done;
			
 
				 
			
 
				-	t4_write_reg(adap, SGE_DEBUG_INDEX_A, 13);
			
 
				-	idma_same_state_cnt[0] = t4_read_reg(adap, SGE_DEBUG_DATA_HIGH_A);
			
 
				-	idma_same_state_cnt[1] = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
			
 
				-
			
 
				-	for (i = 0; i < 2; i++) {
			
 
				-		u32 debug0, debug11;
			
 
				-
			
 
				-		/* If the Ingress DMA Same State Counter ("timer") is less
			
 
				-		 * than 1s, then we can reset our synthesized Stall Timer and
			
 
				-		 * continue.  If we have previously emitted warnings about a
			
 
				-		 * potential stalled Ingress Queue, issue a note indicating
			
 
				-		 * that the Ingress Queue has resumed forward progress.
			
 
				-		 */
			
 
				-		if (idma_same_state_cnt[i] < s->idma_1s_thresh) {
			
 
				-			if (s->idma_stalled[i] >= SGE_IDMA_WARN_THRESH)
			
 
				-				CH_WARN(adap, "SGE idma%d, queue%u,resumed after %d sec\n",
			
 
				-					i, s->idma_qid[i],
			
 
				-					s->idma_stalled[i]/HZ);
			
 
				-			s->idma_stalled[i] = 0;
			
 
				-			continue;
			
 
				-		}
			
 
				-
			
 
				-		/* Synthesize an SGE Ingress DMA Same State Timer in the Hz
			
 
				-		 * domain.  The first time we get here it'll be because we
			
 
				-		 * passed the 1s Threshold; each additional time it'll be
			
 
				-		 * because the RX Timer Callback is being fired on its regular
			
 
				-		 * schedule.
			
 
				-		 *
			
 
				-		 * If the stall is below our Potential Hung Ingress Queue
			
 
				-		 * Warning Threshold, continue.
			
 
				-		 */
			
 
				-		if (s->idma_stalled[i] == 0)
			
 
				-			s->idma_stalled[i] = HZ;
			
 
				-		else
			
 
				-			s->idma_stalled[i] += RX_QCHECK_PERIOD;
			
 
				-
			
 
				-		if (s->idma_stalled[i] < SGE_IDMA_WARN_THRESH)
			
 
				-			continue;
			
 
				-
			
 
				-		/* We'll issue a warning every SGE_IDMA_WARN_REPEAT Hz */
			
 
				-		if (((s->idma_stalled[i] - HZ) % SGE_IDMA_WARN_REPEAT) != 0)
			
 
				-			continue;
			
 
				-
			
 
				-		/* Read and save the SGE IDMA State and Queue ID information.
			
 
				-		 * We do this every time in case it changes across time ...
			
 
				-		 */
			
 
				-		t4_write_reg(adap, SGE_DEBUG_INDEX_A, 0);
			
 
				-		debug0 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
			
 
				-		s->idma_state[i] = (debug0 >> (i * 9)) & 0x3f;
			
 
				-
			
 
				-		t4_write_reg(adap, SGE_DEBUG_INDEX_A, 11);
			
 
				-		debug11 = t4_read_reg(adap, SGE_DEBUG_DATA_LOW_A);
			
 
				-		s->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff;
			
 
				-
			
 
				-		CH_WARN(adap, "SGE idma%u, queue%u, maybe stuck state%u %dsecs (debug0=%#x, debug11=%#x)\n",
			
 
				-			i, s->idma_qid[i], s->idma_state[i],
			
 
				-			s->idma_stalled[i]/HZ, debug0, debug11);
			
 
				-		t4_sge_decode_idma_state(adap, s->idma_state[i]);
			
 
				-	}
			
 
				+	t4_idma_monitor(adap, &s->idma_monitor, HZ, RX_QCHECK_PERIOD);
			
 
				 
			
 
				+done:
			
 
				 	mod_timer(&s->rx_timer, jiffies + RX_QCHECK_PERIOD);
			
 
				 }
			
 
				 
			
@@ -2437,9 +2383,12 @@ static void __iomem *bar2_address(struct adapter *adapter,
 
				 	return adapter->bar2 + bar2_qoffset;
			
 
				 }
			
 
				 
			
 
				+/* @intr_idx: MSI/MSI-X vector if >=0, -(absolute qid + 1) if < 0
			
 
				+ * @cong: < 0 -> no congestion feedback, >= 0 -> congestion channel map
			
 
				+ */
			
 
				 int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
			
 
				 		     struct net_device *dev, int intr_idx,
			
 
				-		     struct sge_fl *fl, rspq_handler_t hnd)
			
 
				+		     struct sge_fl *fl, rspq_handler_t hnd, int cong)
			
 
				 {
			
 
				 	int ret, flsz = 0;
			
 
				 	struct fw_iq_cmd c;
			
@@ -2471,8 +2420,19 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 
				 		FW_IQ_CMD_IQESIZE_V(ilog2(iq->iqe_len) - 4));
			
 
				 	c.iqsize = htons(iq->size);
			
 
				 	c.iqaddr = cpu_to_be64(iq->phys_addr);
			
 
				+	if (cong >= 0)
			
 
				+		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_IQFLINTCONGEN_F);
			
 
				 
			
 
				 	if (fl) {
			
 
				+		/* Allocate the ring for the hardware free list (with space
			
 
				+		 * for its status page) along with the associated software
			
 
				+		 * descriptor ring.  The free list size needs to be a multiple
			
 
				+		 * of the Egress Queue Unit and at least 2 Egress Units larger
			
 
				+		 * than the SGE's Egress Congrestion Threshold
			
 
				+		 * (fl_starve_thres - 1).
			
 
				+		 */
			
 
				+		if (fl->size < s->fl_starve_thres - 1 + 2 * 8)
			
 
				+			fl->size = s->fl_starve_thres - 1 + 2 * 8;
			
 
				 		fl->size = roundup(fl->size, 8);
			
 
				 		fl->desc = alloc_ring(adap->pdev_dev, fl->size, sizeof(__be64),
			
 
				 				      sizeof(struct rx_sw_desc), &fl->addr,
			
@@ -2481,10 +2441,15 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 
				 			goto fl_nomem;
			
 
				 
			
 
				 		flsz = fl->size / 8 + s->stat_len / sizeof(struct tx_desc);
			
 
				-		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN_F |
			
 
				-					    FW_IQ_CMD_FL0FETCHRO_F |
			
 
				-					    FW_IQ_CMD_FL0DATARO_F |
			
 
				-					    FW_IQ_CMD_FL0PADEN_F);
			
 
				+		c.iqns_to_fl0congen |= htonl(FW_IQ_CMD_FL0PACKEN_F |
			
 
				+					     FW_IQ_CMD_FL0FETCHRO_F |
			
 
				+					     FW_IQ_CMD_FL0DATARO_F |
			
 
				+					     FW_IQ_CMD_FL0PADEN_F);
			
 
				+		if (cong >= 0)
			
 
				+			c.iqns_to_fl0congen |=
			
 
				+				htonl(FW_IQ_CMD_FL0CNGCHMAP_V(cong) |
			
 
				+				      FW_IQ_CMD_FL0CONGCIF_F |
			
 
				+				      FW_IQ_CMD_FL0CONGEN_F);
			
 
				 		c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN_V(2) |
			
 
				 				FW_IQ_CMD_FL0FBMAX_V(3));
			
 
				 		c.fl0size = htons(flsz);
			
@@ -2532,6 +2497,41 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 
				 					     &fl->bar2_qid);
			
 
				 		refill_fl(adap, fl, fl_cap(fl), GFP_KERNEL);
			
 
				 	}
			
 
				+
			
 
				+	/* For T5 and later we attempt to set up the Congestion Manager values
			
 
				+	 * of the new RX Ethernet Queue.  This should really be handled by
			
 
				+	 * firmware because it's more complex than any host driver wants to
			
 
				+	 * get involved with and it's different per chip and this is almost
			
 
				+	 * certainly wrong.  Firmware would be wrong as well, but it would be
			
 
				+	 * a lot easier to fix in one place ...  For now we do something very
			
 
				+	 * simple (and hopefully less wrong).
			
 
				+	 */
			
 
				+	if (!is_t4(adap->params.chip) && cong >= 0) {
			
 
				+		u32 param, val;
			
 
				+		int i;
			
 
				+
			
 
				+		param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) |
			
 
				+			 FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
			
 
				+			 FW_PARAMS_PARAM_YZ_V(iq->cntxt_id));
			
 
				+		if (cong == 0) {
			
 
				+			val = CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_QUEUE_X);
			
 
				+		} else {
			
 
				+			val =
			
 
				+			    CONMCTXT_CNGTPMODE_V(CONMCTXT_CNGTPMODE_CHANNEL_X);
			
 
				+			for (i = 0; i < 4; i++) {
			
 
				+				if (cong & (1 << i))
			
 
				+					val |=
			
 
				+					     CONMCTXT_CNGCHMAP_V(1 << (i << 2));
			
 
				+			}
			
 
				+		}
			
 
				+		ret = t4_set_params(adap, adap->mbox, adap->fn, 0, 1,
			
 
				+				    &param, &val);
			
 
				+		if (ret)
			
 
				+			dev_warn(adap->pdev_dev, "Failed to set Congestion"
			
 
				+				 " Manager Context for Ingress Queue %d: %d\n",
			
 
				+				 iq->cntxt_id, -ret);
			
 
				+	}
			
 
				+
			
 
				 	return 0;
			
 
				 
			
 
				 fl_nomem:
			
@@ -2637,7 +2637,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 
				 
			
 
				 	txq->q.desc = alloc_ring(adap->pdev_dev, nentries,
			
 
				 				 sizeof(struct tx_desc), 0, &txq->q.phys_addr,
			
 
				-				 NULL, 0, NUMA_NO_NODE);
			
 
				+				 NULL, 0, dev_to_node(adap->pdev_dev));
			
 
				 	if (!txq->q.desc)
			
 
				 		return -ENOMEM;
			
 
				 
			
@@ -3067,11 +3067,11 @@ int t4_sge_init(struct adapter *adap)
 
				 		egress_threshold = EGRTHRESHOLDPACKING_G(sge_conm_ctrl);
			
 
				 	s->fl_starve_thres = 2*egress_threshold + 1;
			
 
				 
			
 
				+	t4_idma_monitor_init(adap, &s->idma_monitor);
			
 
				+
			
 
				 	setup_timer(&s->rx_timer, sge_rx_timer_cb, (unsigned long)adap);
			
 
				 	setup_timer(&s->tx_timer, sge_tx_timer_cb, (unsigned long)adap);
			
 
				-	s->idma_1s_thresh = core_ticks_per_usec(adap) * 1000000;  /* 1 s */
			
 
				-	s->idma_stalled[0] = 0;
			
 
				-	s->idma_stalled[1] = 0;
			
 
				+
			
 
				 	spin_lock_init(&s->intrq_lock);
			
 
				 
			
 
				 	return 0;
			
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -3401,7 +3401,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[])
 
				 }
			
 
				 
			
 
				 /**
			
 
				- *	get_mps_bg_map - return the buffer groups associated with a port
			
 
				+ *	t4_get_mps_bg_map - return the buffer groups associated with a port
			
 
				  *	@adap: the adapter
			
 
				  *	@idx: the port index
			
 
				  *
			
@@ -3409,7 +3409,7 @@ void t4_pmrx_get_stats(struct adapter *adap, u32 cnt[], u64 cycles[])
 
				  *	with the given port.  Bit i is set if buffer group i is used by the
			
 
				  *	port.
			
 
				  */
			
 
				-static unsigned int get_mps_bg_map(struct adapter *adap, int idx)
			
 
				+unsigned int t4_get_mps_bg_map(struct adapter *adap, int idx)
			
 
				 {
			
 
				 	u32 n = NUMPORTS_G(t4_read_reg(adap, MPS_CMN_CTL_A));
			
 
				 
			
@@ -3460,7 +3460,7 @@ const char *t4_get_port_type_description(enum fw_port_type port_type)
 
				  */
			
 
				 void t4_get_port_stats(struct adapter *adap, int idx, struct port_stats *p)
			
 
				 {
			
 
				-	u32 bgmap = get_mps_bg_map(adap, idx);
			
 
				+	u32 bgmap = t4_get_mps_bg_map(adap, idx);
			
 
				 
			
 
				 #define GET_STAT(name) \
			
 
				 	t4_read_reg64(adap, \
			
@@ -5717,3 +5717,130 @@ void t4_tp_read_la(struct adapter *adap, u64 *la_buf, unsigned int *wrptr)
 
				 		t4_write_reg(adap, TP_DBG_LA_CONFIG_A,
			
 
				 			     cfg | adap->params.tp.la_mask);
			
 
				 }
			
 
				+
			
 
				+/* SGE Hung Ingress DMA Warning Threshold time and Warning Repeat Rate (in
			
 
				+ * seconds).  If we find one of the SGE Ingress DMA State Machines in the same
			
 
				+ * state for more than the Warning Threshold then we'll issue a warning about
			
 
				+ * a potential hang.  We'll repeat the warning as the SGE Ingress DMA Channel
			
 
				+ * appears to be hung every Warning Repeat second till the situation clears.
			
 
				+ * If the situation clears, we'll note that as well.
			
 
				+ */
			
 
				+#define SGE_IDMA_WARN_THRESH 1
			
 
				+#define SGE_IDMA_WARN_REPEAT 300
			
 
				+
			
 
				+/**
			
 
				+ *	t4_idma_monitor_init - initialize SGE Ingress DMA Monitor
			
 
				+ *	@adapter: the adapter
			
 
				+ *	@idma: the adapter IDMA Monitor state
			
 
				+ *
			
 
				+ *	Initialize the state of an SGE Ingress DMA Monitor.
			
 
				+ */
			
 
				+void t4_idma_monitor_init(struct adapter *adapter,
			
 
				+			  struct sge_idma_monitor_state *idma)
			
 
				+{
			
 
				+	/* Initialize the state variables for detecting an SGE Ingress DMA
			
 
				+	 * hang.  The SGE has internal counters which count up on each clock
			
 
				+	 * tick whenever the SGE finds its Ingress DMA State Engines in the
			
 
				+	 * same state they were on the previous clock tick.  The clock used is
			
 
				+	 * the Core Clock so we have a limit on the maximum "time" they can
			
 
				+	 * record; typically a very small number of seconds.  For instance,
			
 
				+	 * with a 600MHz Core Clock, we can only count up to a bit more than
			
 
				+	 * 7s.  So we'll synthesize a larger counter in order to not run the
			
 
				+	 * risk of having the "timers" overflow and give us the flexibility to
			
 
				+	 * maintain a Hung SGE State Machine of our own which operates across
			
 
				+	 * a longer time frame.
			
 
				+	 */
			
 
				+	idma->idma_1s_thresh = core_ticks_per_usec(adapter) * 1000000; /* 1s */
			
 
				+	idma->idma_stalled[0] = 0;
			
 
				+	idma->idma_stalled[1] = 0;
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ *	t4_idma_monitor - monitor SGE Ingress DMA state
			
 
				+ *	@adapter: the adapter
			
 
				+ *	@idma: the adapter IDMA Monitor state
			
 
				+ *	@hz: number of ticks/second
			
 
				+ *	@ticks: number of ticks since the last IDMA Monitor call
			
 
				+ */
			
 
				+void t4_idma_monitor(struct adapter *adapter,
			
 
				+		     struct sge_idma_monitor_state *idma,
			
 
				+		     int hz, int ticks)
			
 
				+{
			
 
				+	int i, idma_same_state_cnt[2];
			
 
				+
			
 
				+	 /* Read the SGE Debug Ingress DMA Same State Count registers.  These
			
 
				+	  * are counters inside the SGE which count up on each clock when the
			
 
				+	  * SGE finds its Ingress DMA State Engines in the same states they
			
 
				+	  * were in the previous clock.  The counters will peg out at
			
 
				+	  * 0xffffffff without wrapping around so once they pass the 1s
			
 
				+	  * threshold they'll stay above that till the IDMA state changes.
			
 
				+	  */
			
 
				+	t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 13);
			
 
				+	idma_same_state_cnt[0] = t4_read_reg(adapter, SGE_DEBUG_DATA_HIGH_A);
			
 
				+	idma_same_state_cnt[1] = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
			
 
				+
			
 
				+	for (i = 0; i < 2; i++) {
			
 
				+		u32 debug0, debug11;
			
 
				+
			
 
				+		/* If the Ingress DMA Same State Counter ("timer") is less
			
 
				+		 * than 1s, then we can reset our synthesized Stall Timer and
			
 
				+		 * continue.  If we have previously emitted warnings about a
			
 
				+		 * potential stalled Ingress Queue, issue a note indicating
			
 
				+		 * that the Ingress Queue has resumed forward progress.
			
 
				+		 */
			
 
				+		if (idma_same_state_cnt[i] < idma->idma_1s_thresh) {
			
 
				+			if (idma->idma_stalled[i] >= SGE_IDMA_WARN_THRESH * hz)
			
 
				+				dev_warn(adapter->pdev_dev, "SGE idma%d, queue %u, "
			
 
				+					 "resumed after %d seconds\n",
			
 
				+					 i, idma->idma_qid[i],
			
 
				+					 idma->idma_stalled[i] / hz);
			
 
				+			idma->idma_stalled[i] = 0;
			
 
				+			continue;
			
 
				+		}
			
 
				+
			
 
				+		/* Synthesize an SGE Ingress DMA Same State Timer in the Hz
			
 
				+		 * domain.  The first time we get here it'll be because we
			
 
				+		 * passed the 1s Threshold; each additional time it'll be
			
 
				+		 * because the RX Timer Callback is being fired on its regular
			
 
				+		 * schedule.
			
 
				+		 *
			
 
				+		 * If the stall is below our Potential Hung Ingress Queue
			
 
				+		 * Warning Threshold, continue.
			
 
				+		 */
			
 
				+		if (idma->idma_stalled[i] == 0) {
			
 
				+			idma->idma_stalled[i] = hz;
			
 
				+			idma->idma_warn[i] = 0;
			
 
				+		} else {
			
 
				+			idma->idma_stalled[i] += ticks;
			
 
				+			idma->idma_warn[i] -= ticks;
			
 
				+		}
			
 
				+
			
 
				+		if (idma->idma_stalled[i] < SGE_IDMA_WARN_THRESH * hz)
			
 
				+			continue;
			
 
				+
			
 
				+		/* We'll issue a warning every SGE_IDMA_WARN_REPEAT seconds.
			
 
				+		 */
			
 
				+		if (idma->idma_warn[i] > 0)
			
 
				+			continue;
			
 
				+		idma->idma_warn[i] = SGE_IDMA_WARN_REPEAT * hz;
			
 
				+
			
 
				+		/* Read and save the SGE IDMA State and Queue ID information.
			
 
				+		 * We do this every time in case it changes across time ...
			
 
				+		 * can't be too careful ...
			
 
				+		 */
			
 
				+		t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 0);
			
 
				+		debug0 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
			
 
				+		idma->idma_state[i] = (debug0 >> (i * 9)) & 0x3f;
			
 
				+
			
 
				+		t4_write_reg(adapter, SGE_DEBUG_INDEX_A, 11);
			
 
				+		debug11 = t4_read_reg(adapter, SGE_DEBUG_DATA_LOW_A);
			
 
				+		idma->idma_qid[i] = (debug11 >> (i * 16)) & 0xffff;
			
 
				+
			
 
				+		dev_warn(adapter->pdev_dev, "SGE idma%u, queue %u, potentially stuck in "
			
 
				+			 "state %u for %d seconds (debug0=%#x, debug11=%#x)\n",
			
 
				+			 i, idma->idma_qid[i], idma->idma_state[i],
			
 
				+			 idma->idma_stalled[i] / hz,
			
 
				+			 debug0, debug11);
			
 
				+		t4_sge_decode_idma_state(adapter, idma->idma_state[i]);
			
 
				+	}
			
 
				+}
			
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_values.h
@@ -61,6 +61,15 @@
 
				 #define SGE_TIMERREGS			6
			
 
				 #define TIMERREG_COUNTER0_X		0
			
 
				 
			
 
				+/* Congestion Manager Definitions.
			
 
				+ */
			
 
				+#define CONMCTXT_CNGTPMODE_S		19
			
 
				+#define CONMCTXT_CNGTPMODE_V(x)		((x) << CONMCTXT_CNGTPMODE_S)
			
 
				+#define CONMCTXT_CNGCHMAP_S		0
			
 
				+#define CONMCTXT_CNGCHMAP_V(x)		((x) << CONMCTXT_CNGCHMAP_S)
			
 
				+#define CONMCTXT_CNGTPMODE_CHANNEL_X	2
			
 
				+#define CONMCTXT_CNGTPMODE_QUEUE_X	1
			
 
				+
			
 
				 /* T5 and later support a new BAR2-based doorbell mechanism for Egress Queues.
			
 
				  * The User Doorbells are each 128 bytes in length with a Simple Doorbell at
			
 
				  * offsets 8x and a Write Combining single 64-byte Egress Queue Unit
			
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1123,6 +1123,7 @@ enum fw_params_param_dmaq {
 
				 	FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL = 0x11,
			
 
				 	FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH = 0x12,
			
 
				 	FW_PARAMS_PARAM_DMAQ_EQ_DCBPRIO_ETH = 0x13,
			
 
				+	FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20,
			
 
				 };
			
 
				 
			
 
				 enum fw_params_param_dev_diag {
			
@@ -1377,6 +1378,7 @@ struct fw_iq_cmd {
 
				 
			
 
				 #define FW_IQ_CMD_IQFLINTCONGEN_S	27
			
 
				 #define FW_IQ_CMD_IQFLINTCONGEN_V(x)	((x) << FW_IQ_CMD_IQFLINTCONGEN_S)
			
 
				+#define FW_IQ_CMD_IQFLINTCONGEN_F	FW_IQ_CMD_IQFLINTCONGEN_V(1U)
			
 
				 
			
 
				 #define FW_IQ_CMD_IQFLINTISCSIC_S	26
			
 
				 #define FW_IQ_CMD_IQFLINTISCSIC_V(x)	((x) << FW_IQ_CMD_IQFLINTISCSIC_S)
			
@@ -1399,6 +1401,7 @@ struct fw_iq_cmd {
 
				 
			
 
				 #define FW_IQ_CMD_FL0CONGCIF_S		11
			
 
				 #define FW_IQ_CMD_FL0CONGCIF_V(x)	((x) << FW_IQ_CMD_FL0CONGCIF_S)
			
 
				+#define FW_IQ_CMD_FL0CONGCIF_F		FW_IQ_CMD_FL0CONGCIF_V(1U)
			
 
				 
			
 
				 #define FW_IQ_CMD_FL0ONCHIP_S		10
			
 
				 #define FW_IQ_CMD_FL0ONCHIP_V(x)	((x) << FW_IQ_CMD_FL0ONCHIP_S)
			
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1160,7 +1160,7 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 
				 {
			
 
				 	u32 wr_mid;
			
 
				 	u64 cntrl, *end;
			
 
				-	int qidx, credits;
			
 
				+	int qidx, credits, max_pkt_len;
			
 
				 	unsigned int flits, ndesc;
			
 
				 	struct adapter *adapter;
			
 
				 	struct sge_eth_txq *txq;
			
@@ -1183,6 +1183,13 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 
				 	if (unlikely(skb->len < fw_hdr_copy_len))
			
 
				 		goto out_free;
			
 
				 
			
 
				+	/* Discard the packet if the length is greater than mtu */
			
 
				+	max_pkt_len = ETH_HLEN + dev->mtu;
			
 
				+	if (skb_vlan_tag_present(skb))
			
 
				+		max_pkt_len += VLAN_HLEN;
			
 
				+	if (!skb_shinfo(skb)->gso_size && (unlikely(skb->len > max_pkt_len)))
			
 
				+		goto out_free;
			
 
				+
			
 
				 	/*
			
 
				 	 * Figure out which TX Queue we're going to use.
			
 
				 	 */
			
@@ -2243,8 +2250,12 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
 
				 		 * Allocate the ring for the hardware free list (with space
			
 
				 		 * for its status page) along with the associated software
			
 
				 		 * descriptor ring.  The free list size needs to be a multiple
			
 
				-		 * of the Egress Queue Unit.
			
 
				+		 * of the Egress Queue Unit and at least 2 Egress Units larger
			
 
				+		 * than the SGE's Egress Congrestion Threshold
			
 
				+		 * (fl_starve_thres - 1).
			
 
				 		 */
			
 
				+		if (fl->size < s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT)
			
 
				+			fl->size = s->fl_starve_thres - 1 + 2 * FL_PER_EQ_UNIT;
			
 
				 		fl->size = roundup(fl->size, FL_PER_EQ_UNIT);
			
 
				 		fl->desc = alloc_ring(adapter->pdev_dev, fl->size,
			
 
				 				      sizeof(__be64), sizeof(struct rx_sw_desc),