Browse Source

Merge branch 'mlx4-XDP-TX-improvements'

Tariq Toukan says:

====================
mlx4_en XDP TX improvements

This patchset contains performance improvements
to the XDP_TX use case in the mlx4 Eth driver.

Patch 1 is a simple change in a function parameter type.
Patch 2 replaces a call to a generic function with the
  relevant parts inlined.
Patch 3 moves the write of descriptors' constant values
  from data path to control path.

Series generated against net-next commit:
833e0e2f24fd net: dst: move cpu inside ifdef to avoid compilation warning
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller 7 years ago
parent
commit
df24cd4fda

+ 1 - 0
drivers/net/ethernet/mellanox/mlx4/en_netdev.c

@@ -1752,6 +1752,7 @@ int mlx4_en_start_port(struct net_device *dev)
 				mlx4_en_arm_cq(priv, cq);
 				mlx4_en_arm_cq(priv, cq);
 
 
 			} else {
 			} else {
+				mlx4_en_init_tx_xdp_ring_descs(priv, tx_ring);
 				mlx4_en_init_recycle_ring(priv, i);
 				mlx4_en_init_recycle_ring(priv, i);
 				/* XDP TX CQ should never be armed */
 				/* XDP TX CQ should never be armed */
 			}
 			}

+ 1 - 1
drivers/net/ethernet/mellanox/mlx4/en_rx.c

@@ -778,7 +778,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 			case XDP_PASS:
 			case XDP_PASS:
 				break;
 				break;
 			case XDP_TX:
 			case XDP_TX:
-				if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
+				if (likely(!mlx4_en_xmit_frame(ring, frags, priv,
 							length, cq_ring,
 							length, cq_ring,
 							&doorbell_pending))) {
 							&doorbell_pending))) {
 					frags[0].page = NULL;
 					frags[0].page = NULL;

+ 31 - 15
drivers/net/ethernet/mellanox/mlx4/en_tx.c

@@ -1085,13 +1085,35 @@ tx_drop:
 #define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
 #define MLX4_EN_XDP_TX_REAL_SZ (((CTRL_SIZE + MLX4_EN_XDP_TX_NRTXBB * DS_SIZE) \
 				 / 16) & 0x3f)
 				 / 16) & 0x3f)
 
 
+void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+				    struct mlx4_en_tx_ring *ring)
+{
+	int i;
+
+	for (i = 0; i < ring->size; i++) {
+		struct mlx4_en_tx_info *tx_info = &ring->tx_info[i];
+		struct mlx4_en_tx_desc *tx_desc = ring->buf +
+			(i << LOG_TXBB_SIZE);
+
+		tx_info->map0_byte_count = PAGE_SIZE;
+		tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
+		tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
+		tx_info->ts_requested = 0;
+		tx_info->nr_maps = 1;
+		tx_info->linear = 1;
+		tx_info->inl = 0;
+
+		tx_desc->data.lkey = ring->mr_key;
+		tx_desc->ctrl.qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+		tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
+	}
+}
+
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 			       struct mlx4_en_rx_alloc *frame,
 			       struct mlx4_en_rx_alloc *frame,
-			       struct net_device *dev, unsigned int length,
+			       struct mlx4_en_priv *priv, unsigned int length,
 			       int tx_ind, bool *doorbell_pending)
 			       int tx_ind, bool *doorbell_pending)
 {
 {
-	struct mlx4_en_priv *priv = netdev_priv(dev);
-	union mlx4_wqe_qpn_vlan	qpn_vlan = {};
 	struct mlx4_en_tx_desc *tx_desc;
 	struct mlx4_en_tx_desc *tx_desc;
 	struct mlx4_en_tx_info *tx_info;
 	struct mlx4_en_tx_info *tx_info;
 	struct mlx4_wqe_data_seg *data;
 	struct mlx4_wqe_data_seg *data;
@@ -1123,25 +1145,16 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 	tx_info->page = frame->page;
 	tx_info->page = frame->page;
 	frame->page = NULL;
 	frame->page = NULL;
 	tx_info->map0_dma = dma;
 	tx_info->map0_dma = dma;
-	tx_info->map0_byte_count = PAGE_SIZE;
-	tx_info->nr_txbb = MLX4_EN_XDP_TX_NRTXBB;
 	tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
 	tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
-	tx_info->data_offset = offsetof(struct mlx4_en_tx_desc, data);
-	tx_info->ts_requested = 0;
-	tx_info->nr_maps = 1;
-	tx_info->linear = 1;
-	tx_info->inl = 0;
 
 
 	dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
 	dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
 					 length, PCI_DMA_TODEVICE);
 					 length, PCI_DMA_TODEVICE);
 
 
 	data->addr = cpu_to_be64(dma + frame->page_offset);
 	data->addr = cpu_to_be64(dma + frame->page_offset);
-	data->lkey = ring->mr_key;
 	dma_wmb();
 	dma_wmb();
 	data->byte_count = cpu_to_be32(length);
 	data->byte_count = cpu_to_be32(length);
 
 
 	/* tx completion can avoid cache line miss for common cases */
 	/* tx completion can avoid cache line miss for common cases */
-	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
 
 
 	op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
 	op_own = cpu_to_be32(MLX4_OPCODE_SEND) |
 		((ring->prod & ring->size) ?
 		((ring->prod & ring->size) ?
@@ -1152,10 +1165,13 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 
 
 	ring->prod += MLX4_EN_XDP_TX_NRTXBB;
 	ring->prod += MLX4_EN_XDP_TX_NRTXBB;
 
 
-	qpn_vlan.fence_size = MLX4_EN_XDP_TX_REAL_SZ;
+	/* Ensure new descriptor hits memory
+	 * before setting ownership of this descriptor to HW
+	 */
+	dma_wmb();
+	tx_desc->ctrl.owner_opcode = op_own;
+	ring->xmit_more++;
 
 
-	mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, 0,
-			      op_own, false, false);
 	*doorbell_pending = true;
 	*doorbell_pending = true;
 
 
 	return NETDEV_TX_OK;
 	return NETDEV_TX_OK;

+ 3 - 1
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

@@ -693,7 +693,7 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 			       struct mlx4_en_rx_alloc *frame,
 			       struct mlx4_en_rx_alloc *frame,
-			       struct net_device *dev, unsigned int length,
+			       struct mlx4_en_priv *priv, unsigned int length,
 			       int tx_ind, bool *doorbell_pending);
 			       int tx_ind, bool *doorbell_pending);
 void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
 void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
 bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
 bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
@@ -705,6 +705,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			   int node, int queue_index);
 			   int node, int queue_index);
 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_tx_ring **pring);
 			     struct mlx4_en_tx_ring **pring);
+void mlx4_en_init_tx_xdp_ring_descs(struct mlx4_en_priv *priv,
+				    struct mlx4_en_tx_ring *ring);
 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_tx_ring *ring,
 			     struct mlx4_en_tx_ring *ring,
 			     int cq, int user_prio);
 			     int cq, int user_prio);