Explorar o código

Merge branch 'mlx5e-tstamp'

Saeed Mahameed says:

====================
Introduce mlx5 ethernet timestamping

This patch series introduces the support for ConnectX-4 timestamping
and the PTP kernel interface.

Changes from V2:
net/mlx5_core: Introduce access function to read internal_timer
	- Remove one line function
	- Change function name

net/mlx5e: Add HW timestamping (TS) support:
	- Data path performance optimization (caching tstamp struct in rq,sq)
	- Change read/write_lock_irqsave to read/write_lock
	- Move ioctl functions to en_clock file
	- Changed overflow start algorithm according to comments from Richard
	- Move timestamp init/cleanup to open/close ndos.

In details:

1st patch prevents the driver from modifying skb->data and SKB CB in
device xmit function.

2nd patch adds the needed low level helpers for:
	- Fetching the hardware clock (hardware internal timer)
	- Parsing CQEs timestamps
	- Device frequency capability

3rd patch adds new en_clock.c file that handles all needed timestamping
operations:
	- Internal clock structure initialization and other helper functions
	- Added the needed ioctl for setting/getting the current timestamping
	  configuration.
	- used this configuration in RX/TX data path to fill the SKB with
	  the timestamp.

4th patch Introduces PTP (PHC) support.
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
David S. Miller %!s(int64=9) %!d(string=hai) anos
pai
achega
1633bf118b

+ 1 - 0
drivers/net/ethernet/mellanox/mlx5/core/Kconfig

@@ -13,6 +13,7 @@ config MLX5_CORE
 config MLX5_CORE_EN
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
 	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
+	select PTP_1588_CLOCK
 	default n
 	---help---
 	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.

+ 1 - 1
drivers/net/ethernet/mellanox/mlx5/core/Makefile

@@ -5,4 +5,4 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 		mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o \
 		en_main.o en_fs.o en_ethtool.o en_tx.o en_rx.o \
-		en_txrx.o
+		en_txrx.o en_clock.o

+ 28 - 3
drivers/net/ethernet/mellanox/mlx5/core/en.h

@@ -32,6 +32,9 @@
 
 #include <linux/if_vlan.h>
 #include <linux/etherdevice.h>
+#include <linux/timecounter.h>
+#include <linux/net_tstamp.h>
+#include <linux/ptp_clock_kernel.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/cq.h>
@@ -284,6 +287,19 @@ struct mlx5e_params {
 	u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
 };
 
+struct mlx5e_tstamp {
+	rwlock_t                   lock;
+	struct cyclecounter        cycles;
+	struct timecounter         clock;
+	struct hwtstamp_config     hwtstamp_config;
+	u32                        nominal_c_mult;
+	unsigned long              overflow_period;
+	struct delayed_work        overflow_work;
+	struct mlx5_core_dev      *mdev;
+	struct ptp_clock          *ptp;
+	struct ptp_clock_info      ptp_info;
+};
+
 enum {
 	MLX5E_RQ_STATE_POST_WQES_ENABLE,
 };
@@ -315,6 +331,7 @@ struct mlx5e_rq {
 
 	struct device         *pdev;
 	struct net_device     *netdev;
+	struct mlx5e_tstamp   *tstamp;
 	struct mlx5e_rq_stats  stats;
 	struct mlx5e_cq        cq;
 
@@ -328,14 +345,12 @@ struct mlx5e_rq {
 	struct mlx5e_priv     *priv;
 } ____cacheline_aligned_in_smp;
 
-struct mlx5e_tx_skb_cb {
+struct mlx5e_tx_wqe_info {
 	u32 num_bytes;
 	u8  num_wqebbs;
 	u8  num_dma;
 };
 
-#define MLX5E_TX_SKB_CB(__skb) ((struct mlx5e_tx_skb_cb *)__skb->cb)
-
 enum mlx5e_dma_map_type {
 	MLX5E_DMA_MAP_SINGLE,
 	MLX5E_DMA_MAP_PAGE
@@ -371,6 +386,7 @@ struct mlx5e_sq {
 	/* pointers to per packet info: write@xmit, read@completion */
 	struct sk_buff           **skb;
 	struct mlx5e_sq_dma       *dma_fifo;
+	struct mlx5e_tx_wqe_info  *wqe_info;
 
 	/* read only */
 	struct mlx5_wq_cyc         wq;
@@ -383,6 +399,7 @@ struct mlx5e_sq {
 	u16                        max_inline;
 	u16                        edge;
 	struct device             *pdev;
+	struct mlx5e_tstamp       *tstamp;
 	__be32                     mkey_be;
 	unsigned long              state;
 
@@ -519,6 +536,7 @@ struct mlx5e_priv {
 	struct mlx5_core_dev      *mdev;
 	struct net_device         *netdev;
 	struct mlx5e_stats         stats;
+	struct mlx5e_tstamp        tstamp;
 };
 
 #define MLX5E_NET_IP_ALIGN 2
@@ -585,6 +603,13 @@ void mlx5e_destroy_flow_tables(struct mlx5e_priv *priv);
 void mlx5e_init_eth_addr(struct mlx5e_priv *priv);
 void mlx5e_set_rx_mode_work(struct work_struct *work);
 
+void mlx5e_fill_hwstamp(struct mlx5e_tstamp *clock, u64 timestamp,
+			struct skb_shared_hwtstamps *hwts);
+void mlx5e_timestamp_init(struct mlx5e_priv *priv);
+void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv);
+int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr);
+int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr);
+
 int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto,
 			  u16 vid);
 int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto,

+ 287 - 0
drivers/net/ethernet/mellanox/mlx5/core/en_clock.c

@@ -0,0 +1,287 @@
+/*
+ * Copyright (c) 2015, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/clocksource.h>
+#include "en.h"
+
+enum {
+	MLX5E_CYCLES_SHIFT	= 23
+};
+
+void mlx5e_fill_hwstamp(struct mlx5e_tstamp *tstamp, u64 timestamp,
+			struct skb_shared_hwtstamps *hwts)
+{
+	u64 nsec;
+
+	read_lock(&tstamp->lock);
+	nsec = timecounter_cyc2time(&tstamp->clock, timestamp);
+	read_unlock(&tstamp->lock);
+
+	hwts->hwtstamp = ns_to_ktime(nsec);
+}
+
+static cycle_t mlx5e_read_internal_timer(const struct cyclecounter *cc)
+{
+	struct mlx5e_tstamp *tstamp = container_of(cc, struct mlx5e_tstamp,
+						   cycles);
+
+	return mlx5_read_internal_timer(tstamp->mdev) & cc->mask;
+}
+
+static void mlx5e_timestamp_overflow(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct mlx5e_tstamp *tstamp = container_of(dwork, struct mlx5e_tstamp,
+						   overflow_work);
+
+	write_lock(&tstamp->lock);
+	timecounter_read(&tstamp->clock);
+	write_unlock(&tstamp->lock);
+	schedule_delayed_work(&tstamp->overflow_work, tstamp->overflow_period);
+}
+
+int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct hwtstamp_config config;
+
+	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
+		return -EFAULT;
+
+	/* TX HW timestamp */
+	switch (config.tx_type) {
+	case HWTSTAMP_TX_OFF:
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	/* RX HW timestamp */
+	switch (config.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		config.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	memcpy(&priv->tstamp.hwtstamp_config, &config, sizeof(config));
+
+	return copy_to_user(ifr->ifr_data, &config,
+			    sizeof(config)) ? -EFAULT : 0;
+}
+
+int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct hwtstamp_config *cfg = &priv->tstamp.hwtstamp_config;
+
+	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+		return -EOPNOTSUPP;
+
+	return copy_to_user(ifr->ifr_data, cfg, sizeof(*cfg)) ? -EFAULT : 0;
+}
+
+static int mlx5e_ptp_settime(struct ptp_clock_info *ptp,
+			     const struct timespec64 *ts)
+{
+	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
+						   ptp_info);
+	u64 ns = timespec64_to_ns(ts);
+
+	write_lock(&tstamp->lock);
+	timecounter_init(&tstamp->clock, &tstamp->cycles, ns);
+	write_unlock(&tstamp->lock);
+
+	return 0;
+}
+
+static int mlx5e_ptp_gettime(struct ptp_clock_info *ptp,
+			     struct timespec64 *ts)
+{
+	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
+						   ptp_info);
+	u64 ns;
+
+	write_lock(&tstamp->lock);
+	ns = timecounter_read(&tstamp->clock);
+	write_unlock(&tstamp->lock);
+
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int mlx5e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
+						   ptp_info);
+
+	write_lock(&tstamp->lock);
+	timecounter_adjtime(&tstamp->clock, delta);
+	write_unlock(&tstamp->lock);
+
+	return 0;
+}
+
+static int mlx5e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
+{
+	u64 adj;
+	u32 diff;
+	int neg_adj = 0;
+	struct mlx5e_tstamp *tstamp = container_of(ptp, struct mlx5e_tstamp,
+						  ptp_info);
+
+	if (delta < 0) {
+		neg_adj = 1;
+		delta = -delta;
+	}
+
+	adj = tstamp->nominal_c_mult;
+	adj *= delta;
+	diff = div_u64(adj, 1000000000ULL);
+
+	write_lock(&tstamp->lock);
+	timecounter_read(&tstamp->clock);
+	tstamp->cycles.mult = neg_adj ? tstamp->nominal_c_mult - diff :
+					tstamp->nominal_c_mult + diff;
+	write_unlock(&tstamp->lock);
+
+	return 0;
+}
+
+static const struct ptp_clock_info mlx5e_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.max_adj	= 100000000,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.adjfreq	= mlx5e_ptp_adjfreq,
+	.adjtime	= mlx5e_ptp_adjtime,
+	.gettime64	= mlx5e_ptp_gettime,
+	.settime64	= mlx5e_ptp_settime,
+	.enable		= NULL,
+};
+
+static void mlx5e_timestamp_init_config(struct mlx5e_tstamp *tstamp)
+{
+	tstamp->hwtstamp_config.tx_type = HWTSTAMP_TX_OFF;
+	tstamp->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_NONE;
+}
+
+void mlx5e_timestamp_init(struct mlx5e_priv *priv)
+{
+	struct mlx5e_tstamp *tstamp = &priv->tstamp;
+	u64 ns;
+	u64 frac = 0;
+	u32 dev_freq;
+
+	mlx5e_timestamp_init_config(tstamp);
+	dev_freq = MLX5_CAP_GEN(priv->mdev, device_frequency_khz);
+	if (!dev_freq) {
+		mlx5_core_warn(priv->mdev, "invalid device_frequency_khz, aborting HW clock init\n");
+		return;
+	}
+	rwlock_init(&tstamp->lock);
+	tstamp->cycles.read = mlx5e_read_internal_timer;
+	tstamp->cycles.shift = MLX5E_CYCLES_SHIFT;
+	tstamp->cycles.mult = clocksource_khz2mult(dev_freq,
+						   tstamp->cycles.shift);
+	tstamp->nominal_c_mult = tstamp->cycles.mult;
+	tstamp->cycles.mask = CLOCKSOURCE_MASK(41);
+	tstamp->mdev = priv->mdev;
+
+	timecounter_init(&tstamp->clock, &tstamp->cycles,
+			 ktime_to_ns(ktime_get_real()));
+
+	/* Calculate period in seconds to call the overflow watchdog - to make
+	 * sure counter is checked at least once every wrap around.
+	 */
+	ns = cyclecounter_cyc2ns(&tstamp->cycles, tstamp->cycles.mask,
+				 frac, &frac);
+	do_div(ns, NSEC_PER_SEC / 2 / HZ);
+	tstamp->overflow_period = ns;
+
+	INIT_DELAYED_WORK(&tstamp->overflow_work, mlx5e_timestamp_overflow);
+	if (tstamp->overflow_period)
+		schedule_delayed_work(&tstamp->overflow_work, 0);
+	else
+		mlx5_core_warn(priv->mdev, "invalid overflow period, overflow_work is not scheduled\n");
+
+	/* Configure the PHC */
+	tstamp->ptp_info = mlx5e_ptp_clock_info;
+	snprintf(tstamp->ptp_info.name, 16, "mlx5 ptp");
+
+	tstamp->ptp = ptp_clock_register(&tstamp->ptp_info,
+					 &priv->mdev->pdev->dev);
+	if (IS_ERR_OR_NULL(tstamp->ptp)) {
+		mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n",
+			       PTR_ERR(tstamp->ptp));
+		tstamp->ptp = NULL;
+	}
+}
+
+void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv)
+{
+	struct mlx5e_tstamp *tstamp = &priv->tstamp;
+
+	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+		return;
+
+	if (priv->tstamp.ptp) {
+		ptp_clock_unregister(priv->tstamp.ptp);
+		priv->tstamp.ptp = NULL;
+	}
+
+	cancel_delayed_work_sync(&tstamp->overflow_work);
+}

+ 30 - 0
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c

@@ -855,6 +855,35 @@ static int mlx5e_set_pauseparam(struct net_device *netdev,
 	return err;
 }
 
+static int mlx5e_get_ts_info(struct net_device *dev,
+			     struct ethtool_ts_info *info)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	int ret;
+
+	ret = ethtool_op_get_ts_info(dev, info);
+	if (ret)
+		return ret;
+
+	info->phc_index = priv->tstamp.ptp ?
+			  ptp_clock_index(priv->tstamp.ptp) : -1;
+
+	if (!MLX5_CAP_GEN(priv->mdev, device_frequency_khz))
+		return 0;
+
+	info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE |
+				 SOF_TIMESTAMPING_RX_HARDWARE |
+				 SOF_TIMESTAMPING_RAW_HARDWARE;
+
+	info->tx_types = (BIT(1) << HWTSTAMP_TX_OFF) |
+			 (BIT(1) << HWTSTAMP_TX_ON);
+
+	info->rx_filters = (BIT(1) << HWTSTAMP_FILTER_NONE) |
+			   (BIT(1) << HWTSTAMP_FILTER_ALL);
+
+	return 0;
+}
+
 const struct ethtool_ops mlx5e_ethtool_ops = {
 	.get_drvinfo       = mlx5e_get_drvinfo,
 	.get_link          = ethtool_op_get_link,
@@ -878,4 +907,5 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.set_tunable       = mlx5e_set_tunable,
 	.get_pauseparam    = mlx5e_get_pauseparam,
 	.set_pauseparam    = mlx5e_set_pauseparam,
+	.get_ts_info       = mlx5e_get_ts_info,
 };

+ 22 - 2
drivers/net/ethernet/mellanox/mlx5/core/en_main.c

@@ -351,6 +351,7 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 
 	rq->pdev    = c->pdev;
 	rq->netdev  = c->netdev;
+	rq->tstamp  = &priv->tstamp;
 	rq->channel = c;
 	rq->ix      = c->ix;
 	rq->priv    = c->priv;
@@ -507,6 +508,7 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq)
 
 static void mlx5e_free_sq_db(struct mlx5e_sq *sq)
 {
+	kfree(sq->wqe_info);
 	kfree(sq->dma_fifo);
 	kfree(sq->skb);
 }
@@ -519,8 +521,10 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa)
 	sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa);
 	sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL,
 				    numa);
+	sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL,
+				    numa);
 
-	if (!sq->skb || !sq->dma_fifo) {
+	if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) {
 		mlx5e_free_sq_db(sq);
 		return -ENOMEM;
 	}
@@ -568,6 +572,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	sq->txq = netdev_get_tx_queue(priv->netdev, txq_ix);
 
 	sq->pdev      = c->pdev;
+	sq->tstamp    = &priv->tstamp;
 	sq->mkey_be   = c->mkey_be;
 	sq->channel   = c;
 	sq->tc        = tc;
@@ -1427,6 +1432,7 @@ int mlx5e_open_locked(struct net_device *netdev)
 
 	mlx5e_update_carrier(priv);
 	mlx5e_redirect_rqts(priv);
+	mlx5e_timestamp_init(priv);
 
 	schedule_delayed_work(&priv->update_stats_work, 0);
 
@@ -1463,6 +1469,7 @@ int mlx5e_close_locked(struct net_device *netdev)
 
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
+	mlx5e_timestamp_cleanup(priv);
 	mlx5e_redirect_rqts(priv);
 	netif_carrier_off(priv->netdev);
 	mlx5e_close_channels(priv);
@@ -1932,6 +1939,18 @@ static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 	return err;
 }
 
+static int mlx5e_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return mlx5e_hwstamp_set(dev, ifr);
+	case SIOCGHWTSTAMP:
+		return mlx5e_hwstamp_get(dev, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
@@ -2015,7 +2034,8 @@ static struct net_device_ops mlx5e_netdev_ops = {
 	.ndo_vlan_rx_add_vid	 = mlx5e_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	 = mlx5e_vlan_rx_kill_vid,
 	.ndo_set_features        = mlx5e_set_features,
-	.ndo_change_mtu		 = mlx5e_change_mtu
+	.ndo_change_mtu		 = mlx5e_change_mtu,
+	.ndo_do_ioctl		 = mlx5e_ioctl,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)

+ 9 - 0
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c

@@ -36,6 +36,11 @@
 #include <net/busy_poll.h>
 #include "en.h"
 
+static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp)
+{
+	return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL;
+}
+
 static inline int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq,
 				     struct mlx5e_rx_wqe *wqe, u16 ix)
 {
@@ -190,6 +195,7 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 {
 	struct net_device *netdev = rq->netdev;
 	u32 cqe_bcnt = be32_to_cpu(cqe->byte_cnt);
+	struct mlx5e_tstamp *tstamp = rq->tstamp;
 	int lro_num_seg;
 
 	skb_put(skb, cqe_bcnt);
@@ -202,6 +208,9 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe,
 		rq->stats.lro_bytes += cqe_bcnt;
 	}
 
+	if (unlikely(mlx5e_rx_hw_stamp(tstamp)))
+		mlx5e_fill_hwstamp(tstamp, get_cqe_ts(cqe), skb_hwtstamps(skb));
+
 	mlx5e_handle_csum(netdev, cqe, rq, skb);
 
 	skb->protocol = eth_type_trans(skb, netdev);

+ 55 - 30
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

@@ -92,11 +92,11 @@ static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i)
 	return &sq->dma_fifo[i & sq->dma_fifo_mask];
 }
 
-static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, struct sk_buff *skb)
+static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma)
 {
 	int i;
 
-	for (i = 0; i < MLX5E_TX_SKB_CB(skb)->num_dma; i++) {
+	for (i = 0; i < num_dma; i++) {
 		struct mlx5e_sq_dma *last_pushed_dma =
 			mlx5e_dma_get(sq, --sq->dma_fifo_pc);
 
@@ -139,19 +139,28 @@ static inline u16 mlx5e_get_inline_hdr_size(struct mlx5e_sq *sq,
 	return MLX5E_MIN_INLINE;
 }
 
-static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs)
+static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data,
+					    unsigned int *skb_len,
+					    unsigned int len)
+{
+	*skb_len -= len;
+	*skb_data += len;
+}
+
+static inline void mlx5e_insert_vlan(void *start, struct sk_buff *skb, u16 ihs,
+				     unsigned char **skb_data,
+				     unsigned int *skb_len)
 {
 	struct vlan_ethhdr *vhdr = (struct vlan_ethhdr *)start;
 	int cpy1_sz = 2 * ETH_ALEN;
 	int cpy2_sz = ihs - cpy1_sz;
 
-	skb_copy_from_linear_data(skb, vhdr, cpy1_sz);
-	skb_pull_inline(skb, cpy1_sz);
+	memcpy(vhdr, *skb_data, cpy1_sz);
+	mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy1_sz);
 	vhdr->h_vlan_proto = skb->vlan_proto;
 	vhdr->h_vlan_TCI = cpu_to_be16(skb_vlan_tag_get(skb));
-	skb_copy_from_linear_data(skb, &vhdr->h_vlan_encapsulated_proto,
-				  cpy2_sz);
-	skb_pull_inline(skb, cpy2_sz);
+	memcpy(&vhdr->h_vlan_encapsulated_proto, *skb_data, cpy2_sz);
+	mlx5e_tx_skb_pull_inline(skb_data, skb_len, cpy2_sz);
 }
 
 static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
@@ -160,11 +169,14 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 
 	u16 pi = sq->pc & wq->sz_m1;
 	struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(wq, pi);
+	struct mlx5e_tx_wqe_info *wi   = &sq->wqe_info[pi];
 
 	struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
 	struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
 	struct mlx5_wqe_data_seg *dseg;
 
+	unsigned char *skb_data = skb->data;
+	unsigned int skb_len = skb->len;
 	u8  opcode = MLX5_OPCODE_SEND;
 	dma_addr_t dma_addr = 0;
 	bool bf = false;
@@ -192,8 +204,8 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		opcode       = MLX5_OPCODE_LSO;
 		ihs          = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		payload_len  = skb->len - ihs;
-		MLX5E_TX_SKB_CB(skb)->num_bytes = skb->len +
-					(skb_shinfo(skb)->gso_segs - 1) * ihs;
+		wi->num_bytes = skb->len +
+				(skb_shinfo(skb)->gso_segs - 1) * ihs;
 		sq->stats.tso_packets++;
 		sq->stats.tso_bytes += payload_len;
 	} else {
@@ -201,16 +213,16 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		     !skb->xmit_more &&
 		     !skb_shinfo(skb)->nr_frags;
 		ihs = mlx5e_get_inline_hdr_size(sq, skb, bf);
-		MLX5E_TX_SKB_CB(skb)->num_bytes = max_t(unsigned int, skb->len,
-							ETH_ZLEN);
+		wi->num_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 	}
 
 	if (skb_vlan_tag_present(skb)) {
-		mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs);
+		mlx5e_insert_vlan(eseg->inline_hdr_start, skb, ihs, &skb_data,
+				  &skb_len);
 		ihs += VLAN_HLEN;
 	} else {
-		skb_copy_from_linear_data(skb, eseg->inline_hdr_start, ihs);
-		skb_pull_inline(skb, ihs);
+		memcpy(eseg->inline_hdr_start, skb_data, ihs);
+		mlx5e_tx_skb_pull_inline(&skb_data, &skb_len, ihs);
 	}
 
 	eseg->inline_hdr_sz = cpu_to_be16(ihs);
@@ -220,11 +232,11 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 			       MLX5_SEND_WQE_DS);
 	dseg    = (struct mlx5_wqe_data_seg *)cseg + ds_cnt;
 
-	MLX5E_TX_SKB_CB(skb)->num_dma = 0;
+	wi->num_dma = 0;
 
-	headlen = skb_headlen(skb);
+	headlen = skb_len - skb->data_len;
 	if (headlen) {
-		dma_addr = dma_map_single(sq->pdev, skb->data, headlen,
+		dma_addr = dma_map_single(sq->pdev, skb_data, headlen,
 					  DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(sq->pdev, dma_addr)))
 			goto dma_unmap_wqe_err;
@@ -234,7 +246,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		dseg->byte_count = cpu_to_be32(headlen);
 
 		mlx5e_dma_push(sq, dma_addr, headlen, MLX5E_DMA_MAP_SINGLE);
-		MLX5E_TX_SKB_CB(skb)->num_dma++;
+		wi->num_dma++;
 
 		dseg++;
 	}
@@ -253,23 +265,25 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		dseg->byte_count = cpu_to_be32(fsz);
 
 		mlx5e_dma_push(sq, dma_addr, fsz, MLX5E_DMA_MAP_PAGE);
-		MLX5E_TX_SKB_CB(skb)->num_dma++;
+		wi->num_dma++;
 
 		dseg++;
 	}
 
-	ds_cnt += MLX5E_TX_SKB_CB(skb)->num_dma;
+	ds_cnt += wi->num_dma;
 
 	cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode);
 	cseg->qpn_ds           = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 
 	sq->skb[pi] = skb;
 
-	MLX5E_TX_SKB_CB(skb)->num_wqebbs = DIV_ROUND_UP(ds_cnt,
-							MLX5_SEND_WQEBB_NUM_DS);
-	sq->pc += MLX5E_TX_SKB_CB(skb)->num_wqebbs;
+	wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
+	sq->pc += wi->num_wqebbs;
+
+	netdev_tx_sent_queue(sq->txq, wi->num_bytes);
 
-	netdev_tx_sent_queue(sq->txq, MLX5E_TX_SKB_CB(skb)->num_bytes);
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
 	if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_SQ_STOP_ROOM))) {
 		netif_tx_stop_queue(sq->txq);
@@ -280,7 +294,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 		int bf_sz = 0;
 
 		if (bf && sq->uar_bf_map)
-			bf_sz = MLX5E_TX_SKB_CB(skb)->num_wqebbs << 3;
+			bf_sz = wi->num_wqebbs << 3;
 
 		cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
 		mlx5e_tx_notify_hw(sq, wqe, bf_sz);
@@ -297,7 +311,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb)
 
 dma_unmap_wqe_err:
 	sq->stats.dropped++;
-	mlx5e_dma_unmap_wqe_err(sq, skb);
+	mlx5e_dma_unmap_wqe_err(sq, wi->num_dma);
 
 	dev_kfree_skb_any(skb);
 
@@ -352,6 +366,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
 		do {
+			struct mlx5e_tx_wqe_info *wi;
 			struct sk_buff *skb;
 			u16 ci;
 			int j;
@@ -360,6 +375,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
 
 			ci = sqcc & sq->wq.sz_m1;
 			skb = sq->skb[ci];
+			wi = &sq->wqe_info[ci];
 
 			if (unlikely(!skb)) { /* nop */
 				sq->stats.nop++;
@@ -367,7 +383,16 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
 				continue;
 			}
 
-			for (j = 0; j < MLX5E_TX_SKB_CB(skb)->num_dma; j++) {
+			if (unlikely(skb_shinfo(skb)->tx_flags &
+				     SKBTX_HW_TSTAMP)) {
+				struct skb_shared_hwtstamps hwts = {};
+
+				mlx5e_fill_hwstamp(sq->tstamp,
+						   get_cqe_ts(cqe), &hwts);
+				skb_tstamp_tx(skb, &hwts);
+			}
+
+			for (j = 0; j < wi->num_dma; j++) {
 				struct mlx5e_sq_dma *dma =
 					mlx5e_dma_get(sq, dma_fifo_cc++);
 
@@ -375,8 +400,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq)
 			}
 
 			npkts++;
-			nbytes += MLX5E_TX_SKB_CB(skb)->num_bytes;
-			sqcc += MLX5E_TX_SKB_CB(skb)->num_wqebbs;
+			nbytes += wi->num_bytes;
+			sqcc += wi->num_wqebbs;
 			dev_kfree_skb(skb);
 		} while (!last_wqe);
 	}

+ 13 - 0
drivers/net/ethernet/mellanox/mlx5/core/main.c

@@ -504,6 +504,19 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 	return mlx5_cmd_status_to_err_v2(out);
 }
 
+cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev)
+{
+	u32 timer_h, timer_h1, timer_l;
+
+	timer_h = ioread32be(&dev->iseg->internal_timer_h);
+	timer_l = ioread32be(&dev->iseg->internal_timer_l);
+	timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
+	if (timer_h != timer_h1) /* wrap around */
+		timer_l = ioread32be(&dev->iseg->internal_timer_l);
+
+	return (cycle_t)timer_l | (cycle_t)timer_h1 << 32;
+}
+
 static int mlx5_irq_set_affinity_hint(struct mlx5_core_dev *mdev, int i)
 {
 	struct mlx5_priv *priv  = &mdev->priv;

+ 1 - 0
drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h

@@ -98,6 +98,7 @@ int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
+cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev);
 
 void mlx5e_init(void);
 void mlx5e_cleanup(void);

+ 17 - 3
include/linux/mlx5/device.h

@@ -443,9 +443,12 @@ struct mlx5_init_seg {
 	__be32			rsvd1[120];
 	__be32			initializing;
 	struct health_buffer	health;
-	__be32			rsvd2[884];
+	__be32			rsvd2[880];
+	__be32			internal_timer_h;
+	__be32			internal_timer_l;
+	__be32			rsrv3[2];
 	__be32			health_counter;
-	__be32			rsvd3[1019];
+	__be32			rsvd4[1019];
 	__be64			ieee1588_clk;
 	__be32			ieee1588_clk_type;
 	__be32			clr_intx;
@@ -601,7 +604,8 @@ struct mlx5_cqe64 {
 	__be32		imm_inval_pkey;
 	u8		rsvd40[4];
 	__be32		byte_cnt;
-	__be64		timestamp;
+	__be32		timestamp_h;
+	__be32		timestamp_l;
 	__be32		sop_drop_qpn;
 	__be16		wqe_counter;
 	u8		signature;
@@ -623,6 +627,16 @@ static inline int cqe_has_vlan(struct mlx5_cqe64 *cqe)
 	return !!(cqe->l4_hdr_type_etc & 0x1);
 }
 
+static inline u64 get_cqe_ts(struct mlx5_cqe64 *cqe)
+{
+	u32 hi, lo;
+
+	hi = be32_to_cpu(cqe->timestamp_h);
+	lo = be32_to_cpu(cqe->timestamp_l);
+
+	return (u64)lo | ((u64)hi << 32);
+}
+
 enum {
 	CQE_L4_HDR_TYPE_NONE			= 0x0,
 	CQE_L4_HDR_TYPE_TCP_NO_ACK		= 0x1,

+ 3 - 3
include/linux/mlx5/mlx5_ifc.h

@@ -829,9 +829,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_66[0x8];
 	u8         log_uar_page_sz[0x10];
 
-	u8         reserved_67[0xe0];
-
-	u8         reserved_68[0x1f];
+	u8         reserved_67[0x40];
+	u8         device_frequency_khz[0x20];
+	u8         reserved_68[0x5f];
 	u8         cqe_zip[0x1];
 
 	u8         cqe_zip_timeout[0x10];