|
@@ -658,6 +658,8 @@ static inline u32 i40e_get_head(struct i40e_ring *tx_ring)
|
|
return le32_to_cpu(*(volatile __le32 *)head);
|
|
return le32_to_cpu(*(volatile __le32 *)head);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+#define WB_STRIDE 0x3
|
|
|
|
+
|
|
/**
|
|
/**
|
|
* i40e_clean_tx_irq - Reclaim resources after transmit completes
|
|
* i40e_clean_tx_irq - Reclaim resources after transmit completes
|
|
* @tx_ring: tx ring to clean
|
|
* @tx_ring: tx ring to clean
|
|
@@ -759,6 +761,18 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
|
tx_ring->q_vector->tx.total_bytes += total_bytes;
|
|
tx_ring->q_vector->tx.total_bytes += total_bytes;
|
|
tx_ring->q_vector->tx.total_packets += total_packets;
|
|
tx_ring->q_vector->tx.total_packets += total_packets;
|
|
|
|
|
|
|
|
+ /* check to see if there are any non-cache aligned descriptors
|
|
|
|
+ * waiting to be written back, and kick the hardware to force
|
|
|
|
+ * them to be written back in case of napi polling
|
|
|
|
+ */
|
|
|
|
+ if (budget &&
|
|
|
|
+ !((i & WB_STRIDE) == WB_STRIDE) &&
|
|
|
|
+ !test_bit(__I40E_DOWN, &tx_ring->vsi->state) &&
|
|
|
|
+ (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
|
|
|
|
+ tx_ring->arm_wb = true;
|
|
|
|
+ else
|
|
|
|
+ tx_ring->arm_wb = false;
|
|
|
|
+
|
|
if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
|
|
if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
|
|
/* schedule immediate reset if we believe we hung */
|
|
/* schedule immediate reset if we believe we hung */
|
|
dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
|
|
dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
|
|
@@ -777,13 +791,16 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
|
netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
|
|
netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
|
|
|
|
|
|
dev_info(tx_ring->dev,
|
|
dev_info(tx_ring->dev,
|
|
- "tx hang detected on queue %d, resetting adapter\n",
|
|
|
|
|
|
+ "tx hang detected on queue %d, reset requested\n",
|
|
tx_ring->queue_index);
|
|
tx_ring->queue_index);
|
|
|
|
|
|
- tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev);
|
|
|
|
|
|
+ /* do not fire the reset immediately, wait for the stack to
|
|
|
|
+ * decide we are truly stuck, also prevents every queue from
|
|
|
|
+ * simultaneously requesting a reset
|
|
|
|
+ */
|
|
|
|
|
|
- /* the adapter is about to reset, no point in enabling stuff */
|
|
|
|
- return true;
|
|
|
|
|
|
+ /* the adapter is about to reset, no point in enabling polling */
|
|
|
|
+ budget = 1;
|
|
}
|
|
}
|
|
|
|
|
|
netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
|
|
netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev,
|
|
@@ -806,7 +823,25 @@ static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
- return budget > 0;
|
|
|
|
|
|
+ return !!budget;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * i40e_force_wb - Arm hardware to do a wb on noncache aligned descriptors
|
|
|
|
+ * @vsi: the VSI we care about
|
|
|
|
+ * @q_vector: the vector on which to force writeback
|
|
|
|
+ *
|
|
|
|
+ **/
|
|
|
|
+static void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
|
|
|
|
+{
|
|
|
|
+ u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
|
|
|
|
+ I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
|
|
|
|
+ I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK
|
|
|
|
+ /* allow 00 to be written to the index */;
|
|
|
|
+
|
|
|
|
+ wr32(&vsi->back->hw,
|
|
|
|
+ I40E_PFINT_DYN_CTLN(q_vector->v_idx + vsi->base_vector - 1),
|
|
|
|
+ val);
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
/**
|
|
@@ -1581,6 +1616,7 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
|
|
struct i40e_vsi *vsi = q_vector->vsi;
|
|
struct i40e_vsi *vsi = q_vector->vsi;
|
|
struct i40e_ring *ring;
|
|
struct i40e_ring *ring;
|
|
bool clean_complete = true;
|
|
bool clean_complete = true;
|
|
|
|
+ bool arm_wb = false;
|
|
int budget_per_ring;
|
|
int budget_per_ring;
|
|
|
|
|
|
if (test_bit(__I40E_DOWN, &vsi->state)) {
|
|
if (test_bit(__I40E_DOWN, &vsi->state)) {
|
|
@@ -1591,8 +1627,10 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
|
|
/* Since the actual Tx work is minimal, we can give the Tx a larger
|
|
/* Since the actual Tx work is minimal, we can give the Tx a larger
|
|
* budget and be more aggressive about cleaning up the Tx descriptors.
|
|
* budget and be more aggressive about cleaning up the Tx descriptors.
|
|
*/
|
|
*/
|
|
- i40e_for_each_ring(ring, q_vector->tx)
|
|
|
|
|
|
+ i40e_for_each_ring(ring, q_vector->tx) {
|
|
clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
|
|
clean_complete &= i40e_clean_tx_irq(ring, vsi->work_limit);
|
|
|
|
+ arm_wb |= ring->arm_wb;
|
|
|
|
+ }
|
|
|
|
|
|
/* We attempt to distribute budget to each Rx queue fairly, but don't
|
|
/* We attempt to distribute budget to each Rx queue fairly, but don't
|
|
* allow the budget to go below 1 because that would exit polling early.
|
|
* allow the budget to go below 1 because that would exit polling early.
|
|
@@ -1603,8 +1641,11 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
|
|
clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
|
|
clean_complete &= i40e_clean_rx_irq(ring, budget_per_ring);
|
|
|
|
|
|
/* If work not completed, return budget and polling will return */
|
|
/* If work not completed, return budget and polling will return */
|
|
- if (!clean_complete)
|
|
|
|
|
|
+ if (!clean_complete) {
|
|
|
|
+ if (arm_wb)
|
|
|
|
+ i40e_force_wb(vsi, q_vector);
|
|
return budget;
|
|
return budget;
|
|
|
|
+ }
|
|
|
|
|
|
/* Work is done so exit the polling mode and re-enable the interrupt */
|
|
/* Work is done so exit the polling mode and re-enable the interrupt */
|
|
napi_complete(napi);
|
|
napi_complete(napi);
|
|
@@ -2198,7 +2239,6 @@ static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
|
|
/* Place RS bit on last descriptor of any packet that spans across the
|
|
/* Place RS bit on last descriptor of any packet that spans across the
|
|
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
|
|
* 4th descriptor (WB_STRIDE aka 0x3) in a 64B cacheline.
|
|
*/
|
|
*/
|
|
-#define WB_STRIDE 0x3
|
|
|
|
if (((i & WB_STRIDE) != WB_STRIDE) &&
|
|
if (((i & WB_STRIDE) != WB_STRIDE) &&
|
|
(first <= &tx_ring->tx_bi[i]) &&
|
|
(first <= &tx_ring->tx_bi[i]) &&
|
|
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
|
|
(first >= &tx_ring->tx_bi[i & ~WB_STRIDE])) {
|