Selaa lähdekoodia

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull infiniband updates from Roland Dreier:
 "Main batch of InfiniBand/RDMA changes for 3.14:
   - Flow steering for InfiniBand UD traffic
   - IP-based addressing for IBoE aka RoCE
   - Pass SRP submaintainership from Dave to Bart
   - SRP transport fixes from Bart
   - Add the new Cisco usNIC low-level device driver
   - Various other fixes"

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (75 commits)
  IB/mlx5: Verify reserved fields are cleared
  IB/mlx5: Remove old field for create mkey mailbox
  IB/mlx5: Abort driver cleanup if teardown hca fails
  IB/mlx5: Allow creation of QPs with zero-length work queues
  mlx5_core: Fix PowerPC support
  mlx5_core: Improve debugfs readability
  IB/mlx5: Add support for resize CQ
  IB/mlx5: Implement modify CQ
  IB/mlx5: Make sure doorbell record is visible before doorbell
  mlx5_core: Use mlx5 core style warning
  IB/mlx5: Clear out struct before create QP command
  mlx5_core: Fix out arg size in access_register command
  RDMA/nes: Slight optimization of Ethernet address compare
  IB/qib: Fix QP check when looping back to/from QP1
  RDMA/cxgb4: Fix gcc warning on 32-bit arch
  IB/usnic: Remove unused includes of <linux/version.h>
  RDMA/amso1100: Add check if cache memory was allocated before freeing it
  IPoIB: Report operstate consistently when brought up without a link
  IB/core: Fix unused variable warning
  RDMA/cma: Handle global/non-linklocal IPv6 addresses in cma_check_linklocal()
  ...
Linus Torvalds 11 vuotta sitten
vanhempi
commit
8e585a6c4a
100 muutettua tiedostoa jossa 7663 lisäystä ja 567 poistoa
  1. 7 0
      Documentation/scsi/scsi_transport_srp/Makefile
  2. 26 0
      Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot
  3. 6 1
      MAINTAINERS
  4. 4 2
      drivers/infiniband/Kconfig
  5. 1 0
      drivers/infiniband/Makefile
  6. 3 2
      drivers/infiniband/core/Makefile
  7. 94 3
      drivers/infiniband/core/addr.c
  8. 52 0
      drivers/infiniband/core/cm.c
  9. 66 15
      drivers/infiniband/core/cma.c
  10. 2 0
      drivers/infiniband/core/core_priv.h
  11. 1 2
      drivers/infiniband/core/iwcm.c
  12. 11 1
      drivers/infiniband/core/sa_query.c
  13. 1 0
      drivers/infiniband/core/sysfs.c
  14. 4 14
      drivers/infiniband/core/ucma.c
  15. 4 0
      drivers/infiniband/core/uverbs_cmd.c
  16. 99 2
      drivers/infiniband/core/verbs.c
  17. 2 1
      drivers/infiniband/hw/amso1100/c2_intr.c
  18. 1 1
      drivers/infiniband/hw/cxgb4/mem.c
  19. 1 1
      drivers/infiniband/hw/ehca/ehca_qp.c
  20. 1 1
      drivers/infiniband/hw/ipath/ipath_qp.c
  21. 1 1
      drivers/infiniband/hw/mlx4/Kconfig
  22. 9 31
      drivers/infiniband/hw/mlx4/ah.c
  23. 9 0
      drivers/infiniband/hw/mlx4/cq.c
  24. 590 157
      drivers/infiniband/hw/mlx4/main.c
  25. 15 3
      drivers/infiniband/hw/mlx4/mlx4_ib.h
  26. 130 27
      drivers/infiniband/hw/mlx4/qp.c
  27. 6 2
      drivers/infiniband/hw/mlx4/sysfs.c
  28. 293 17
      drivers/infiniband/hw/mlx5/cq.c
  29. 8 5
      drivers/infiniband/hw/mlx5/main.c
  30. 2 2
      drivers/infiniband/hw/mlx5/mlx5_ib.h
  31. 0 1
      drivers/infiniband/hw/mlx5/mr.c
  32. 93 40
      drivers/infiniband/hw/mlx5/qp.c
  33. 3 0
      drivers/infiniband/hw/mlx5/user.h
  34. 2 1
      drivers/infiniband/hw/mthca/mthca_qp.c
  35. 1 2
      drivers/infiniband/hw/nes/nes_cm.c
  36. 1 1
      drivers/infiniband/hw/ocrdma/Kconfig
  37. 12 0
      drivers/infiniband/hw/ocrdma/ocrdma.h
  38. 4 2
      drivers/infiniband/hw/ocrdma/ocrdma_ah.c
  39. 2 19
      drivers/infiniband/hw/ocrdma/ocrdma_hw.c
  40. 0 1
      drivers/infiniband/hw/ocrdma/ocrdma_hw.h
  41. 40 98
      drivers/infiniband/hw/ocrdma/ocrdma_main.c
  42. 2 2
      drivers/infiniband/hw/ocrdma/ocrdma_sli.h
  43. 2 1
      drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
  44. 1 1
      drivers/infiniband/hw/qib/qib_qp.c
  45. 8 1
      drivers/infiniband/hw/qib/qib_ud.c
  46. 10 0
      drivers/infiniband/hw/usnic/Kconfig
  47. 15 0
      drivers/infiniband/hw/usnic/Makefile
  48. 29 0
      drivers/infiniband/hw/usnic/usnic.h
  49. 73 0
      drivers/infiniband/hw/usnic/usnic_abi.h
  50. 27 0
      drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h
  51. 68 0
      drivers/infiniband/hw/usnic/usnic_common_util.h
  52. 154 0
      drivers/infiniband/hw/usnic/usnic_debugfs.c
  53. 29 0
      drivers/infiniband/hw/usnic/usnic_debugfs.h
  54. 350 0
      drivers/infiniband/hw/usnic/usnic_fwd.c
  55. 113 0
      drivers/infiniband/hw/usnic/usnic_fwd.h
  56. 118 0
      drivers/infiniband/hw/usnic/usnic_ib.h
  57. 682 0
      drivers/infiniband/hw/usnic/usnic_ib_main.c
  58. 754 0
      drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c
  59. 117 0
      drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h
  60. 341 0
      drivers/infiniband/hw/usnic/usnic_ib_sysfs.c
  61. 29 0
      drivers/infiniband/hw/usnic/usnic_ib_sysfs.h
  62. 765 0
      drivers/infiniband/hw/usnic/usnic_ib_verbs.c
  63. 72 0
      drivers/infiniband/hw/usnic/usnic_ib_verbs.h
  64. 58 0
      drivers/infiniband/hw/usnic/usnic_log.h
  65. 202 0
      drivers/infiniband/hw/usnic/usnic_transport.c
  66. 51 0
      drivers/infiniband/hw/usnic/usnic_transport.h
  67. 604 0
      drivers/infiniband/hw/usnic/usnic_uiom.c
  68. 80 0
      drivers/infiniband/hw/usnic/usnic_uiom.h
  69. 236 0
      drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c
  70. 73 0
      drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h
  71. 467 0
      drivers/infiniband/hw/usnic/usnic_vnic.c
  72. 103 0
      drivers/infiniband/hw/usnic/usnic_vnic.h
  73. 2 2
      drivers/infiniband/ulp/ipoib/ipoib_main.c
  74. 3 0
      drivers/infiniband/ulp/ipoib/ipoib_verbs.c
  75. 1 0
      drivers/infiniband/ulp/srp/ib_srp.c
  76. 9 0
      drivers/net/ethernet/mellanox/mlx4/cmd.c
  77. 10 0
      drivers/net/ethernet/mellanox/mlx4/fw.c
  78. 17 0
      drivers/net/ethernet/mellanox/mlx4/mcg.c
  79. 5 0
      drivers/net/ethernet/mellanox/mlx4/mlx4.h
  80. 20 0
      drivers/net/ethernet/mellanox/mlx4/port.c
  81. 10 0
      drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
  82. 15 2
      drivers/net/ethernet/mellanox/mlx5/core/cq.c
  83. 33 6
      drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
  84. 8 2
      drivers/net/ethernet/mellanox/mlx5/core/main.c
  85. 6 7
      drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
  86. 1 1
      drivers/net/ethernet/mellanox/mlx5/core/port.c
  87. 3 2
      drivers/net/ethernet/mellanox/mlx5/core/qp.c
  88. 57 38
      drivers/scsi/scsi_transport_srp.c
  89. 1 0
      include/linux/mlx4/cmd.h
  90. 11 4
      include/linux/mlx4/cq.h
  91. 6 1
      include/linux/mlx4/device.h
  92. 13 5
      include/linux/mlx5/cq.h
  93. 25 6
      include/linux/mlx5/device.h
  94. 45 0
      include/linux/mlx5/qp.h
  95. 49 20
      include/rdma/ib_addr.h
  96. 1 0
      include/rdma/ib_cm.h
  97. 1 0
      include/rdma/ib_pack.h
  98. 3 0
      include/rdma/ib_sa.h
  99. 38 4
      include/rdma/ib_verbs.h
  100. 30 6
      include/scsi/scsi_transport_srp.h

+ 7 - 0
Documentation/scsi/scsi_transport_srp/Makefile

@@ -0,0 +1,7 @@
+all: rport_state_diagram.svg rport_state_diagram.png
+
+rport_state_diagram.svg: rport_state_diagram.dot
+	dot -Tsvg -o $@ $<
+
+rport_state_diagram.png: rport_state_diagram.dot
+	dot -Tpng -o $@ $<

+ 26 - 0
Documentation/scsi/scsi_transport_srp/rport_state_diagram.dot

@@ -0,0 +1,26 @@
+digraph srp_initiator {
+	node [shape = doublecircle]; running lost;
+	node [shape = circle];
+
+	{
+	  rank = min;
+	  running_rta	[ label = "running;\nreconnect\ntimer\nactive" ];
+	};
+	running		[ label = "running;\nreconnect\ntimer\nstopped" ];
+	blocked;
+	failfast	[ label = "fail I/O\nfast" ];
+	lost;
+
+	running -> running_rta		[ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nsrp_start_tl_fail_timers()" ];
+	running_rta -> running		[ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting succeeded" ];
+	running -> blocked		[ label = "fast_io_fail_tmo >= 0 or\ndev_loss_tmo >= 0;\nsrp_start_tl_fail_timers()" ];
+	running -> failfast		[ label = "fast_io_fail_tmo = off and\ndev_loss_tmo = off;\nreconnecting failed\n" ];
+	blocked -> failfast		[ label = "fast_io_fail_tmo\nexpired or\nreconnecting\nfailed" ];
+	blocked -> lost			[ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ];
+	failfast -> lost		[ label = "dev_loss_tmo\nexpired or\nsrp_stop_rport_timers()" ];
+	blocked -> running		[ label = "reconnecting\nsucceeded" ];
+	failfast -> failfast		[ label = "reconnecting\nfailed" ];
+	failfast -> running		[ label = "reconnecting\nsucceeded" ];
+	running -> lost			[ label = "srp_stop_rport_timers()" ];
+	running_rta -> lost		[ label = "srp_stop_rport_timers()" ];
+}

+ 6 - 1
MAINTAINERS

@@ -2195,6 +2195,11 @@ M:	Nishank Trivedi <nistrive@cisco.com>
 S:	Supported
 S:	Supported
 F:	drivers/net/ethernet/cisco/enic/
 F:	drivers/net/ethernet/cisco/enic/
 
 
+CISCO VIC LOW LATENCY NIC DRIVER
+M:      Upinder Malhi <umalhi@cisco.com>
+S:      Supported
+F:      drivers/infiniband/hw/usnic
+
 CIRRUS LOGIC EP93XX ETHERNET DRIVER
 CIRRUS LOGIC EP93XX ETHERNET DRIVER
 M:	Hartley Sweeten <hsweeten@visionengravers.com>
 M:	Hartley Sweeten <hsweeten@visionengravers.com>
 L:	netdev@vger.kernel.org
 L:	netdev@vger.kernel.org
@@ -7528,7 +7533,7 @@ S:	Maintained
 F:	drivers/scsi/sr*
 F:	drivers/scsi/sr*
 
 
 SCSI RDMA PROTOCOL (SRP) INITIATOR
 SCSI RDMA PROTOCOL (SRP) INITIATOR
-M:	David Dillow <dillowda@ornl.gov>
+M:	Bart Van Assche <bvanassche@acm.org>
 L:	linux-rdma@vger.kernel.org
 L:	linux-rdma@vger.kernel.org
 S:	Supported
 S:	Supported
 W:	http://www.openfabrics.org
 W:	http://www.openfabrics.org

+ 4 - 2
drivers/infiniband/Kconfig

@@ -3,6 +3,8 @@ menuconfig INFINIBAND
 	depends on PCI || BROKEN
 	depends on PCI || BROKEN
 	depends on HAS_IOMEM
 	depends on HAS_IOMEM
 	depends on NET
 	depends on NET
+	depends on INET
+	depends on m || IPV6 != m
 	---help---
 	---help---
 	  Core support for InfiniBand (IB).  Make sure to also select
 	  Core support for InfiniBand (IB).  Make sure to also select
 	  any protocols you wish to use as well as drivers for your
 	  any protocols you wish to use as well as drivers for your
@@ -38,8 +40,7 @@ config INFINIBAND_USER_MEM
 
 
 config INFINIBAND_ADDR_TRANS
 config INFINIBAND_ADDR_TRANS
 	bool
 	bool
-	depends on INET
-	depends on !(INFINIBAND = y && IPV6 = m)
+	depends on INFINIBAND
 	default y
 	default y
 
 
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/mthca/Kconfig"
@@ -53,6 +54,7 @@ source "drivers/infiniband/hw/mlx4/Kconfig"
 source "drivers/infiniband/hw/mlx5/Kconfig"
 source "drivers/infiniband/hw/mlx5/Kconfig"
 source "drivers/infiniband/hw/nes/Kconfig"
 source "drivers/infiniband/hw/nes/Kconfig"
 source "drivers/infiniband/hw/ocrdma/Kconfig"
 source "drivers/infiniband/hw/ocrdma/Kconfig"
+source "drivers/infiniband/hw/usnic/Kconfig"
 
 
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 source "drivers/infiniband/ulp/ipoib/Kconfig"
 
 

+ 1 - 0
drivers/infiniband/Makefile

@@ -10,6 +10,7 @@ obj-$(CONFIG_MLX4_INFINIBAND)		+= hw/mlx4/
 obj-$(CONFIG_MLX5_INFINIBAND)		+= hw/mlx5/
 obj-$(CONFIG_MLX5_INFINIBAND)		+= hw/mlx5/
 obj-$(CONFIG_INFINIBAND_NES)		+= hw/nes/
 obj-$(CONFIG_INFINIBAND_NES)		+= hw/nes/
 obj-$(CONFIG_INFINIBAND_OCRDMA)		+= hw/ocrdma/
 obj-$(CONFIG_INFINIBAND_OCRDMA)		+= hw/ocrdma/
+obj-$(CONFIG_INFINIBAND_USNIC)		+= hw/usnic/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_IPOIB)		+= ulp/ipoib/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_SRP)		+= ulp/srp/
 obj-$(CONFIG_INFINIBAND_SRPT)		+= ulp/srpt/
 obj-$(CONFIG_INFINIBAND_SRPT)		+= ulp/srpt/

+ 3 - 2
drivers/infiniband/core/Makefile

@@ -1,8 +1,9 @@
-infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= ib_addr.o rdma_cm.o
+infiniband-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= rdma_cm.o
 user_access-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= rdma_ucm.o
 user_access-$(CONFIG_INFINIBAND_ADDR_TRANS)	:= rdma_ucm.o
 
 
 obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
 obj-$(CONFIG_INFINIBAND) +=		ib_core.o ib_mad.o ib_sa.o \
-					ib_cm.o iw_cm.o $(infiniband-y)
+					ib_cm.o iw_cm.o ib_addr.o \
+					$(infiniband-y)
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_MAD) +=	ib_umad.o
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \
 obj-$(CONFIG_INFINIBAND_USER_ACCESS) +=	ib_uverbs.o ib_ucm.o \
 					$(user_access-y)
 					$(user_access-y)

+ 94 - 3
drivers/infiniband/core/addr.c

@@ -86,6 +86,8 @@ int rdma_addr_size(struct sockaddr *addr)
 }
 }
 EXPORT_SYMBOL(rdma_addr_size);
 EXPORT_SYMBOL(rdma_addr_size);
 
 
+static struct rdma_addr_client self;
+
 void rdma_addr_register_client(struct rdma_addr_client *client)
 void rdma_addr_register_client(struct rdma_addr_client *client)
 {
 {
 	atomic_set(&client->refcount, 1);
 	atomic_set(&client->refcount, 1);
@@ -119,7 +121,8 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 }
 }
 EXPORT_SYMBOL(rdma_copy_addr);
 EXPORT_SYMBOL(rdma_copy_addr);
 
 
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+		      u16 *vlan_id)
 {
 {
 	struct net_device *dev;
 	struct net_device *dev;
 	int ret = -EADDRNOTAVAIL;
 	int ret = -EADDRNOTAVAIL;
@@ -142,6 +145,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 			return ret;
 			return ret;
 
 
 		ret = rdma_copy_addr(dev_addr, dev, NULL);
 		ret = rdma_copy_addr(dev_addr, dev, NULL);
+		if (vlan_id)
+			*vlan_id = rdma_vlan_dev_vlan_id(dev);
 		dev_put(dev);
 		dev_put(dev);
 		break;
 		break;
 
 
@@ -153,6 +158,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
 					  &((struct sockaddr_in6 *) addr)->sin6_addr,
 					  &((struct sockaddr_in6 *) addr)->sin6_addr,
 					  dev, 1)) {
 					  dev, 1)) {
 				ret = rdma_copy_addr(dev_addr, dev, NULL);
 				ret = rdma_copy_addr(dev_addr, dev, NULL);
+				if (vlan_id)
+					*vlan_id = rdma_vlan_dev_vlan_id(dev);
 				break;
 				break;
 			}
 			}
 		}
 		}
@@ -238,7 +245,7 @@ static int addr4_resolve(struct sockaddr_in *src_in,
 	src_in->sin_addr.s_addr = fl4.saddr;
 	src_in->sin_addr.s_addr = fl4.saddr;
 
 
 	if (rt->dst.dev->flags & IFF_LOOPBACK) {
 	if (rt->dst.dev->flags & IFF_LOOPBACK) {
-		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
 		if (!ret)
 		if (!ret)
 			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 		goto put;
 		goto put;
@@ -286,7 +293,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 	}
 	}
 
 
 	if (dst->dev->flags & IFF_LOOPBACK) {
 	if (dst->dev->flags & IFF_LOOPBACK) {
-		ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
+		ret = rdma_translate_ip((struct sockaddr *)dst_in, addr, NULL);
 		if (!ret)
 		if (!ret)
 			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 			memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
 		goto put;
 		goto put;
@@ -437,6 +444,88 @@ void rdma_addr_cancel(struct rdma_dev_addr *addr)
 }
 }
 EXPORT_SYMBOL(rdma_addr_cancel);
 EXPORT_SYMBOL(rdma_addr_cancel);
 
 
+struct resolve_cb_context {
+	struct rdma_dev_addr *addr;
+	struct completion comp;
+};
+
+static void resolve_cb(int status, struct sockaddr *src_addr,
+	     struct rdma_dev_addr *addr, void *context)
+{
+	memcpy(((struct resolve_cb_context *)context)->addr, addr, sizeof(struct
+				rdma_dev_addr));
+	complete(&((struct resolve_cb_context *)context)->comp);
+}
+
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *dmac,
+			       u16 *vlan_id)
+{
+	int ret = 0;
+	struct rdma_dev_addr dev_addr;
+	struct resolve_cb_context ctx;
+	struct net_device *dev;
+
+	union {
+		struct sockaddr     _sockaddr;
+		struct sockaddr_in  _sockaddr_in;
+		struct sockaddr_in6 _sockaddr_in6;
+	} sgid_addr, dgid_addr;
+
+
+	ret = rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+	if (ret)
+		return ret;
+
+	ret = rdma_gid2ip(&dgid_addr._sockaddr, dgid);
+	if (ret)
+		return ret;
+
+	memset(&dev_addr, 0, sizeof(dev_addr));
+
+	ctx.addr = &dev_addr;
+	init_completion(&ctx.comp);
+	ret = rdma_resolve_ip(&self, &sgid_addr._sockaddr, &dgid_addr._sockaddr,
+			&dev_addr, 1000, resolve_cb, &ctx);
+	if (ret)
+		return ret;
+
+	wait_for_completion(&ctx.comp);
+
+	memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN);
+	dev = dev_get_by_index(&init_net, dev_addr.bound_dev_if);
+	if (!dev)
+		return -ENODEV;
+	if (vlan_id)
+		*vlan_id = rdma_vlan_dev_vlan_id(dev);
+	dev_put(dev);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_dmac_by_grh);
+
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id)
+{
+	int ret = 0;
+	struct rdma_dev_addr dev_addr;
+	union {
+		struct sockaddr     _sockaddr;
+		struct sockaddr_in  _sockaddr_in;
+		struct sockaddr_in6 _sockaddr_in6;
+	} gid_addr;
+
+	ret = rdma_gid2ip(&gid_addr._sockaddr, sgid);
+
+	if (ret)
+		return ret;
+	memset(&dev_addr, 0, sizeof(dev_addr));
+	ret = rdma_translate_ip(&gid_addr._sockaddr, &dev_addr, vlan_id);
+	if (ret)
+		return ret;
+
+	memcpy(smac, dev_addr.src_dev_addr, ETH_ALEN);
+	return ret;
+}
+EXPORT_SYMBOL(rdma_addr_find_smac_by_sgid);
+
 static int netevent_callback(struct notifier_block *self, unsigned long event,
 static int netevent_callback(struct notifier_block *self, unsigned long event,
 	void *ctx)
 	void *ctx)
 {
 {
@@ -461,11 +550,13 @@ static int __init addr_init(void)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
 	register_netevent_notifier(&nb);
 	register_netevent_notifier(&nb);
+	rdma_addr_register_client(&self);
 	return 0;
 	return 0;
 }
 }
 
 
 static void __exit addr_cleanup(void)
 static void __exit addr_cleanup(void)
 {
 {
+	rdma_addr_unregister_client(&self);
 	unregister_netevent_notifier(&nb);
 	unregister_netevent_notifier(&nb);
 	destroy_workqueue(addr_wq);
 	destroy_workqueue(addr_wq);
 }
 }

+ 52 - 0
drivers/infiniband/core/cm.c

@@ -47,6 +47,7 @@
 #include <linux/sysfs.h>
 #include <linux/sysfs.h>
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
 #include <linux/kdev_t.h>
 #include <linux/kdev_t.h>
+#include <linux/etherdevice.h>
 
 
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cm.h>
 #include <rdma/ib_cm.h>
@@ -177,6 +178,8 @@ struct cm_av {
 	struct ib_ah_attr ah_attr;
 	struct ib_ah_attr ah_attr;
 	u16 pkey_index;
 	u16 pkey_index;
 	u8 timeout;
 	u8 timeout;
+	u8  valid;
+	u8  smac[ETH_ALEN];
 };
 };
 
 
 struct cm_work {
 struct cm_work {
@@ -346,6 +349,23 @@ static void cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
 			   grh, &av->ah_attr);
 			   grh, &av->ah_attr);
 }
 }
 
 
+int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac)
+{
+	struct cm_id_private *cm_id_priv;
+
+	cm_id_priv = container_of(id, struct cm_id_private, id);
+
+	if (smac != NULL)
+		memcpy(cm_id_priv->av.smac, smac, sizeof(cm_id_priv->av.smac));
+
+	if (alt_smac != NULL)
+		memcpy(cm_id_priv->alt_av.smac, alt_smac,
+		       sizeof(cm_id_priv->alt_av.smac));
+
+	return 0;
+}
+EXPORT_SYMBOL(ib_update_cm_av);
+
 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 {
 {
 	struct cm_device *cm_dev;
 	struct cm_device *cm_dev;
@@ -376,6 +396,9 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 	ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
 	ib_init_ah_from_path(cm_dev->ib_device, port->port_num, path,
 			     &av->ah_attr);
 			     &av->ah_attr);
 	av->timeout = path->packet_life_time + 1;
 	av->timeout = path->packet_life_time + 1;
+	memcpy(av->smac, path->smac, sizeof(av->smac));
+
+	av->valid = 1;
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1554,6 +1577,9 @@ static int cm_req_handler(struct cm_work *work)
 
 
 	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
 	cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
 	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
 	cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
+
+	memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
+	work->path[0].vlan_id = cm_id_priv->av.ah_attr.vlan_id;
 	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
 	ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av);
 	if (ret) {
 	if (ret) {
 		ib_get_cached_gid(work->port->cm_dev->ib_device,
 		ib_get_cached_gid(work->port->cm_dev->ib_device,
@@ -3500,6 +3526,32 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
 		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
 		*qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
 				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
 				IB_QP_DEST_QPN | IB_QP_RQ_PSN;
 		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
 		qp_attr->ah_attr = cm_id_priv->av.ah_attr;
+		if (!cm_id_priv->av.valid) {
+			spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+			return -EINVAL;
+		}
+		if (cm_id_priv->av.ah_attr.vlan_id != 0xffff) {
+			qp_attr->vlan_id = cm_id_priv->av.ah_attr.vlan_id;
+			*qp_attr_mask |= IB_QP_VID;
+		}
+		if (!is_zero_ether_addr(cm_id_priv->av.smac)) {
+			memcpy(qp_attr->smac, cm_id_priv->av.smac,
+			       sizeof(qp_attr->smac));
+			*qp_attr_mask |= IB_QP_SMAC;
+		}
+		if (cm_id_priv->alt_av.valid) {
+			if (cm_id_priv->alt_av.ah_attr.vlan_id != 0xffff) {
+				qp_attr->alt_vlan_id =
+					cm_id_priv->alt_av.ah_attr.vlan_id;
+				*qp_attr_mask |= IB_QP_ALT_VID;
+			}
+			if (!is_zero_ether_addr(cm_id_priv->alt_av.smac)) {
+				memcpy(qp_attr->alt_smac,
+				       cm_id_priv->alt_av.smac,
+				       sizeof(qp_attr->alt_smac));
+				*qp_attr_mask |= IB_QP_ALT_SMAC;
+			}
+		}
 		qp_attr->path_mtu = cm_id_priv->path_mtu;
 		qp_attr->path_mtu = cm_id_priv->path_mtu;
 		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
 		qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
 		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
 		qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);

+ 66 - 15
drivers/infiniband/core/cma.c

@@ -340,7 +340,7 @@ static int cma_translate_addr(struct sockaddr *addr, struct rdma_dev_addr *dev_a
 	int ret;
 	int ret;
 
 
 	if (addr->sa_family != AF_IB) {
 	if (addr->sa_family != AF_IB) {
-		ret = rdma_translate_ip(addr, dev_addr);
+		ret = rdma_translate_ip(addr, dev_addr, NULL);
 	} else {
 	} else {
 		cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
 		cma_translate_ib((struct sockaddr_ib *) addr, dev_addr);
 		ret = 0;
 		ret = 0;
@@ -365,7 +365,9 @@ static int cma_acquire_dev(struct rdma_id_private *id_priv,
 		return -EINVAL;
 		return -EINVAL;
 
 
 	mutex_lock(&lock);
 	mutex_lock(&lock);
-	iboe_addr_get_sgid(dev_addr, &iboe_gid);
+	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+		    &iboe_gid);
+
 	memcpy(&gid, dev_addr->src_dev_addr +
 	memcpy(&gid, dev_addr->src_dev_addr +
 	       rdma_addr_gid_offset(dev_addr), sizeof gid);
 	       rdma_addr_gid_offset(dev_addr), sizeof gid);
 	if (listen_id_priv &&
 	if (listen_id_priv &&
@@ -603,6 +605,7 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
 {
 {
 	struct ib_qp_attr qp_attr;
 	struct ib_qp_attr qp_attr;
 	int qp_attr_mask, ret;
 	int qp_attr_mask, ret;
+	union ib_gid sgid;
 
 
 	mutex_lock(&id_priv->qp_mutex);
 	mutex_lock(&id_priv->qp_mutex);
 	if (!id_priv->id.qp) {
 	if (!id_priv->id.qp) {
@@ -625,6 +628,20 @@ static int cma_modify_qp_rtr(struct rdma_id_private *id_priv,
 	if (ret)
 	if (ret)
 		goto out;
 		goto out;
 
 
+	ret = ib_query_gid(id_priv->id.device, id_priv->id.port_num,
+			   qp_attr.ah_attr.grh.sgid_index, &sgid);
+	if (ret)
+		goto out;
+
+	if (rdma_node_get_transport(id_priv->cma_dev->device->node_type)
+	    == RDMA_TRANSPORT_IB &&
+	    rdma_port_get_link_layer(id_priv->id.device, id_priv->id.port_num)
+	    == IB_LINK_LAYER_ETHERNET) {
+		ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr.smac, NULL);
+
+		if (ret)
+			goto out;
+	}
 	if (conn_param)
 	if (conn_param)
 		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
 		qp_attr.max_dest_rd_atomic = conn_param->responder_resources;
 	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
 	ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
@@ -725,6 +742,7 @@ int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
 		else
 		else
 			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
 			ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
 						 qp_attr_mask);
 						 qp_attr_mask);
+
 		if (qp_attr->qp_state == IB_QPS_RTR)
 		if (qp_attr->qp_state == IB_QPS_RTR)
 			qp_attr->rq_psn = id_priv->seq_num;
 			qp_attr->rq_psn = id_priv->seq_num;
 		break;
 		break;
@@ -1266,6 +1284,15 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	struct rdma_id_private *listen_id, *conn_id;
 	struct rdma_id_private *listen_id, *conn_id;
 	struct rdma_cm_event event;
 	struct rdma_cm_event event;
 	int offset, ret;
 	int offset, ret;
+	u8 smac[ETH_ALEN];
+	u8 alt_smac[ETH_ALEN];
+	u8 *psmac = smac;
+	u8 *palt_smac = alt_smac;
+	int is_iboe = ((rdma_node_get_transport(cm_id->device->node_type) ==
+			RDMA_TRANSPORT_IB) &&
+		       (rdma_port_get_link_layer(cm_id->device,
+			ib_event->param.req_rcvd.port) ==
+			IB_LINK_LAYER_ETHERNET));
 
 
 	listen_id = cm_id->context;
 	listen_id = cm_id->context;
 	if (!cma_check_req_qp_type(&listen_id->id, ib_event))
 	if (!cma_check_req_qp_type(&listen_id->id, ib_event))
@@ -1310,12 +1337,29 @@ static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 	if (ret)
 	if (ret)
 		goto err3;
 		goto err3;
 
 
+	if (is_iboe) {
+		if (ib_event->param.req_rcvd.primary_path != NULL)
+			rdma_addr_find_smac_by_sgid(
+				&ib_event->param.req_rcvd.primary_path->sgid,
+				psmac, NULL);
+		else
+			psmac = NULL;
+		if (ib_event->param.req_rcvd.alternate_path != NULL)
+			rdma_addr_find_smac_by_sgid(
+				&ib_event->param.req_rcvd.alternate_path->sgid,
+				palt_smac, NULL);
+		else
+			palt_smac = NULL;
+	}
 	/*
 	/*
 	 * Acquire mutex to prevent user executing rdma_destroy_id()
 	 * Acquire mutex to prevent user executing rdma_destroy_id()
 	 * while we're accessing the cm_id.
 	 * while we're accessing the cm_id.
 	 */
 	 */
 	mutex_lock(&lock);
 	mutex_lock(&lock);
-	if (cma_comp(conn_id, RDMA_CM_CONNECT) && (conn_id->id.qp_type != IB_QPT_UD))
+	if (is_iboe)
+		ib_update_cm_av(cm_id, psmac, palt_smac);
+	if (cma_comp(conn_id, RDMA_CM_CONNECT) &&
+	    (conn_id->id.qp_type != IB_QPT_UD))
 		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
 		ib_send_cm_mra(cm_id, CMA_CM_MRA_SETTING, NULL, 0);
 	mutex_unlock(&lock);
 	mutex_unlock(&lock);
 	mutex_unlock(&conn_id->handler_mutex);
 	mutex_unlock(&conn_id->handler_mutex);
@@ -1474,7 +1518,7 @@ static int iw_conn_req_handler(struct iw_cm_id *cm_id,
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
 	mutex_lock_nested(&conn_id->handler_mutex, SINGLE_DEPTH_NESTING);
 	conn_id->state = RDMA_CM_CONNECT;
 	conn_id->state = RDMA_CM_CONNECT;
 
 
-	ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr);
+	ret = rdma_translate_ip(laddr, &conn_id->id.route.addr.dev_addr, NULL);
 	if (ret) {
 	if (ret) {
 		mutex_unlock(&conn_id->handler_mutex);
 		mutex_unlock(&conn_id->handler_mutex);
 		rdma_destroy_id(new_cm_id);
 		rdma_destroy_id(new_cm_id);
@@ -1873,7 +1917,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 	struct cma_work *work;
 	struct cma_work *work;
 	int ret;
 	int ret;
 	struct net_device *ndev = NULL;
 	struct net_device *ndev = NULL;
-	u16 vid;
+
 
 
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	work = kzalloc(sizeof *work, GFP_KERNEL);
 	if (!work)
 	if (!work)
@@ -1897,10 +1941,14 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv)
 		goto err2;
 		goto err2;
 	}
 	}
 
 
-	vid = rdma_vlan_dev_vlan_id(ndev);
+	route->path_rec->vlan_id = rdma_vlan_dev_vlan_id(ndev);
+	memcpy(route->path_rec->dmac, addr->dev_addr.dst_dev_addr, ETH_ALEN);
+	memcpy(route->path_rec->smac, ndev->dev_addr, ndev->addr_len);
 
 
-	iboe_mac_vlan_to_ll(&route->path_rec->sgid, addr->dev_addr.src_dev_addr, vid);
-	iboe_mac_vlan_to_ll(&route->path_rec->dgid, addr->dev_addr.dst_dev_addr, vid);
+	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+		    &route->path_rec->sgid);
+	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.dst_addr,
+		    &route->path_rec->dgid);
 
 
 	route->path_rec->hop_limit = 1;
 	route->path_rec->hop_limit = 1;
 	route->path_rec->reversible = 1;
 	route->path_rec->reversible = 1;
@@ -2063,6 +2111,7 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 			   RDMA_CM_ADDR_RESOLVED))
 			   RDMA_CM_ADDR_RESOLVED))
 		goto out;
 		goto out;
 
 
+	memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
 	if (!status && !id_priv->cma_dev)
 	if (!status && !id_priv->cma_dev)
 		status = cma_acquire_dev(id_priv, NULL);
 		status = cma_acquire_dev(id_priv, NULL);
 
 
@@ -2072,10 +2121,8 @@ static void addr_handler(int status, struct sockaddr *src_addr,
 			goto out;
 			goto out;
 		event.event = RDMA_CM_EVENT_ADDR_ERROR;
 		event.event = RDMA_CM_EVENT_ADDR_ERROR;
 		event.status = status;
 		event.status = status;
-	} else {
-		memcpy(cma_src_addr(id_priv), src_addr, rdma_addr_size(src_addr));
+	} else
 		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
 		event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
-	}
 
 
 	if (id_priv->id.event_handler(&id_priv->id, &event)) {
 	if (id_priv->id.event_handler(&id_priv->id, &event)) {
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
 		cma_exch(id_priv, RDMA_CM_DESTROYING);
@@ -2480,8 +2527,11 @@ static int cma_check_linklocal(struct rdma_dev_addr *dev_addr,
 		return 0;
 		return 0;
 
 
 	sin6 = (struct sockaddr_in6 *) addr;
 	sin6 = (struct sockaddr_in6 *) addr;
-	if ((ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) &&
-	    !sin6->sin6_scope_id)
+
+	if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL))
+		return 0;
+
+	if (!sin6->sin6_scope_id)
 			return -EINVAL;
 			return -EINVAL;
 
 
 	dev_addr->bound_dev_if = sin6->sin6_scope_id;
 	dev_addr->bound_dev_if = sin6->sin6_scope_id;
@@ -2556,6 +2606,7 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 	if (ret)
 	if (ret)
 		goto err1;
 		goto err1;
 
 
+	memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
 	if (!cma_any_addr(addr)) {
 	if (!cma_any_addr(addr)) {
 		ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
 		ret = cma_translate_addr(addr, &id->route.addr.dev_addr);
 		if (ret)
 		if (ret)
@@ -2566,7 +2617,6 @@ int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
 			goto err1;
 			goto err1;
 	}
 	}
 
 
-	memcpy(cma_src_addr(id_priv), addr, rdma_addr_size(addr));
 	if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
 	if (!(id_priv->options & (1 << CMA_OPTION_AFONLY))) {
 		if (addr->sa_family == AF_INET)
 		if (addr->sa_family == AF_INET)
 			id_priv->afonly = 1;
 			id_priv->afonly = 1;
@@ -3295,7 +3345,8 @@ static int cma_iboe_join_multicast(struct rdma_id_private *id_priv,
 		err = -EINVAL;
 		err = -EINVAL;
 		goto out2;
 		goto out2;
 	}
 	}
-	iboe_addr_get_sgid(dev_addr, &mc->multicast.ib->rec.port_gid);
+	rdma_ip2gid((struct sockaddr *)&id_priv->id.route.addr.src_addr,
+		    &mc->multicast.ib->rec.port_gid);
 	work->id = id_priv;
 	work->id = id_priv;
 	work->mc = mc;
 	work->mc = mc;
 	INIT_WORK(&work->work, iboe_mcast_work_handler);
 	INIT_WORK(&work->work, iboe_mcast_work_handler);

+ 2 - 0
drivers/infiniband/core/core_priv.h

@@ -49,4 +49,6 @@ void ib_sysfs_cleanup(void);
 int  ib_cache_setup(void);
 int  ib_cache_setup(void);
 void ib_cache_cleanup(void);
 void ib_cache_cleanup(void);
 
 
+int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
+			    struct ib_qp_attr *qp_attr, int *qp_attr_mask);
 #endif /* _CORE_PRIV_H */
 #endif /* _CORE_PRIV_H */

+ 1 - 2
drivers/infiniband/core/iwcm.c

@@ -334,7 +334,6 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
 {
 {
 	struct iwcm_id_private *cm_id_priv;
 	struct iwcm_id_private *cm_id_priv;
 	unsigned long flags;
 	unsigned long flags;
-	int ret;
 
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	/*
 	/*
@@ -350,7 +349,7 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
 		cm_id_priv->state = IW_CM_STATE_DESTROYING;
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		/* destroy the listening endpoint */
 		/* destroy the listening endpoint */
-		ret = cm_id->device->iwcm->destroy_listen(cm_id);
+		cm_id->device->iwcm->destroy_listen(cm_id);
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		break;
 		break;
 	case IW_CM_STATE_ESTABLISHED:
 	case IW_CM_STATE_ESTABLISHED:

+ 11 - 1
drivers/infiniband/core/sa_query.c

@@ -42,7 +42,7 @@
 #include <linux/kref.h>
 #include <linux/kref.h>
 #include <linux/idr.h>
 #include <linux/idr.h>
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
-
+#include <uapi/linux/if_ether.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cache.h>
 #include "sa.h"
 #include "sa.h"
@@ -556,6 +556,13 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 		ah_attr->grh.hop_limit     = rec->hop_limit;
 		ah_attr->grh.hop_limit     = rec->hop_limit;
 		ah_attr->grh.traffic_class = rec->traffic_class;
 		ah_attr->grh.traffic_class = rec->traffic_class;
 	}
 	}
+	if (force_grh) {
+		memcpy(ah_attr->dmac, rec->dmac, ETH_ALEN);
+		ah_attr->vlan_id = rec->vlan_id;
+	} else {
+		ah_attr->vlan_id = 0xffff;
+	}
+
 	return 0;
 	return 0;
 }
 }
 EXPORT_SYMBOL(ib_init_ah_from_path);
 EXPORT_SYMBOL(ib_init_ah_from_path);
@@ -670,6 +677,9 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 
 
 		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
 		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
 			  mad->data, &rec);
 			  mad->data, &rec);
+		rec.vlan_id = 0xffff;
+		memset(rec.dmac, 0, ETH_ALEN);
+		memset(rec.smac, 0, ETH_ALEN);
 		query->callback(status, &rec, query->context);
 		query->callback(status, &rec, query->context);
 	} else
 	} else
 		query->callback(status, NULL, query->context);
 		query->callback(status, NULL, query->context);

+ 1 - 0
drivers/infiniband/core/sysfs.c

@@ -613,6 +613,7 @@ static ssize_t show_node_type(struct device *device,
 	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);
 	case RDMA_NODE_IB_CA:	  return sprintf(buf, "%d: CA\n", dev->node_type);
 	case RDMA_NODE_RNIC:	  return sprintf(buf, "%d: RNIC\n", dev->node_type);
 	case RDMA_NODE_RNIC:	  return sprintf(buf, "%d: RNIC\n", dev->node_type);
 	case RDMA_NODE_USNIC:	  return sprintf(buf, "%d: usNIC\n", dev->node_type);
 	case RDMA_NODE_USNIC:	  return sprintf(buf, "%d: usNIC\n", dev->node_type);
+	case RDMA_NODE_USNIC_UDP: return sprintf(buf, "%d: usNIC UDP\n", dev->node_type);
 	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
 	case RDMA_NODE_IB_SWITCH: return sprintf(buf, "%d: switch\n", dev->node_type);
 	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
 	case RDMA_NODE_IB_ROUTER: return sprintf(buf, "%d: router\n", dev->node_type);
 	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type);
 	default:		  return sprintf(buf, "%d: <unknown>\n", dev->node_type);

+ 4 - 14
drivers/infiniband/core/ucma.c

@@ -655,24 +655,14 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp,
 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
 static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp,
 				 struct rdma_route *route)
 				 struct rdma_route *route)
 {
 {
-	struct rdma_dev_addr *dev_addr;
-	struct net_device *dev;
-	u16 vid = 0;
 
 
 	resp->num_paths = route->num_paths;
 	resp->num_paths = route->num_paths;
 	switch (route->num_paths) {
 	switch (route->num_paths) {
 	case 0:
 	case 0:
-		dev_addr = &route->addr.dev_addr;
-		dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
-			if (dev) {
-				vid = rdma_vlan_dev_vlan_id(dev);
-				dev_put(dev);
-			}
-
-		iboe_mac_vlan_to_ll((union ib_gid *) &resp->ib_route[0].dgid,
-				    dev_addr->dst_dev_addr, vid);
-		iboe_addr_get_sgid(dev_addr,
-				   (union ib_gid *) &resp->ib_route[0].sgid);
+		rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr,
+			    (union ib_gid *)&resp->ib_route[0].dgid);
+		rdma_ip2gid((struct sockaddr *)&route->addr.src_addr,
+			    (union ib_gid *)&resp->ib_route[0].sgid);
 		resp->ib_route[0].pkey = cpu_to_be16(0xffff);
 		resp->ib_route[0].pkey = cpu_to_be16(0xffff);
 		break;
 		break;
 	case 2:
 	case 2:

+ 4 - 0
drivers/infiniband/core/uverbs_cmd.c

@@ -40,6 +40,7 @@
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
 
 
 #include "uverbs.h"
 #include "uverbs.h"
+#include "core_priv.h"
 
 
 struct uverbs_lock_class {
 struct uverbs_lock_class {
 	struct lock_class_key	key;
 	struct lock_class_key	key;
@@ -1961,6 +1962,9 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file,
 	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;
 	attr->alt_ah_attr.port_num 	    = cmd.alt_dest.port_num;
 
 
 	if (qp->real_qp == qp) {
 	if (qp->real_qp == qp) {
+		ret = ib_resolve_eth_l2_attrs(qp, attr, &cmd.attr_mask);
+		if (ret)
+			goto out;
 		ret = qp->device->modify_qp(qp, attr,
 		ret = qp->device->modify_qp(qp, attr,
 			modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
 			modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata);
 	} else {
 	} else {

+ 99 - 2
drivers/infiniband/core/verbs.c

@@ -44,6 +44,9 @@
 
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_cache.h>
 #include <rdma/ib_cache.h>
+#include <rdma/ib_addr.h>
+
+#include "core_priv.h"
 
 
 int ib_rate_to_mult(enum ib_rate rate)
 int ib_rate_to_mult(enum ib_rate rate)
 {
 {
@@ -116,6 +119,8 @@ rdma_node_get_transport(enum rdma_node_type node_type)
 		return RDMA_TRANSPORT_IWARP;
 		return RDMA_TRANSPORT_IWARP;
 	case RDMA_NODE_USNIC:
 	case RDMA_NODE_USNIC:
 		return RDMA_TRANSPORT_USNIC;
 		return RDMA_TRANSPORT_USNIC;
+	case RDMA_NODE_USNIC_UDP:
+		return RDMA_TRANSPORT_USNIC_UDP;
 	default:
 	default:
 		BUG();
 		BUG();
 		return 0;
 		return 0;
@@ -133,6 +138,7 @@ enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_
 		return IB_LINK_LAYER_INFINIBAND;
 		return IB_LINK_LAYER_INFINIBAND;
 	case RDMA_TRANSPORT_IWARP:
 	case RDMA_TRANSPORT_IWARP:
 	case RDMA_TRANSPORT_USNIC:
 	case RDMA_TRANSPORT_USNIC:
+	case RDMA_TRANSPORT_USNIC_UDP:
 		return IB_LINK_LAYER_ETHERNET;
 		return IB_LINK_LAYER_ETHERNET;
 	default:
 	default:
 		return IB_LINK_LAYER_UNSPECIFIED;
 		return IB_LINK_LAYER_UNSPECIFIED;
@@ -192,8 +198,28 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, struct ib_wc *wc,
 	u32 flow_class;
 	u32 flow_class;
 	u16 gid_index;
 	u16 gid_index;
 	int ret;
 	int ret;
+	int is_eth = (rdma_port_get_link_layer(device, port_num) ==
+			IB_LINK_LAYER_ETHERNET);
 
 
 	memset(ah_attr, 0, sizeof *ah_attr);
 	memset(ah_attr, 0, sizeof *ah_attr);
+	if (is_eth) {
+		if (!(wc->wc_flags & IB_WC_GRH))
+			return -EPROTOTYPE;
+
+		if (wc->wc_flags & IB_WC_WITH_SMAC &&
+		    wc->wc_flags & IB_WC_WITH_VLAN) {
+			memcpy(ah_attr->dmac, wc->smac, ETH_ALEN);
+			ah_attr->vlan_id = wc->vlan_id;
+		} else {
+			ret = rdma_addr_find_dmac_by_grh(&grh->dgid, &grh->sgid,
+					ah_attr->dmac, &ah_attr->vlan_id);
+			if (ret)
+				return ret;
+		}
+	} else {
+		ah_attr->vlan_id = 0xffff;
+	}
+
 	ah_attr->dlid = wc->slid;
 	ah_attr->dlid = wc->slid;
 	ah_attr->sl = wc->sl;
 	ah_attr->sl = wc->sl;
 	ah_attr->src_path_bits = wc->dlid_path_bits;
 	ah_attr->src_path_bits = wc->dlid_path_bits;
@@ -476,7 +502,9 @@ EXPORT_SYMBOL(ib_create_qp);
 static const struct {
 static const struct {
 	int			valid;
 	int			valid;
 	enum ib_qp_attr_mask	req_param[IB_QPT_MAX];
 	enum ib_qp_attr_mask	req_param[IB_QPT_MAX];
+	enum ib_qp_attr_mask	req_param_add_eth[IB_QPT_MAX];
 	enum ib_qp_attr_mask	opt_param[IB_QPT_MAX];
 	enum ib_qp_attr_mask	opt_param[IB_QPT_MAX];
+	enum ib_qp_attr_mask	opt_param_add_eth[IB_QPT_MAX];
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
 	[IB_QPS_RESET] = {
 	[IB_QPS_RESET] = {
 		[IB_QPS_RESET] = { .valid = 1 },
 		[IB_QPS_RESET] = { .valid = 1 },
@@ -557,6 +585,12 @@ static const struct {
 						IB_QP_MAX_DEST_RD_ATOMIC	|
 						IB_QP_MAX_DEST_RD_ATOMIC	|
 						IB_QP_MIN_RNR_TIMER),
 						IB_QP_MIN_RNR_TIMER),
 			},
 			},
+			.req_param_add_eth = {
+				[IB_QPT_RC]  = (IB_QP_SMAC),
+				[IB_QPT_UC]  = (IB_QP_SMAC),
+				[IB_QPT_XRC_INI]  = (IB_QP_SMAC),
+				[IB_QPT_XRC_TGT]  = (IB_QP_SMAC)
+			},
 			.opt_param = {
 			.opt_param = {
 				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
 				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
 						 IB_QP_QKEY),
 						 IB_QP_QKEY),
@@ -576,7 +610,21 @@ static const struct {
 						 IB_QP_QKEY),
 						 IB_QP_QKEY),
 				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
 				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
 						 IB_QP_QKEY),
 						 IB_QP_QKEY),
-			 }
+			 },
+			.opt_param_add_eth = {
+				[IB_QPT_RC]  = (IB_QP_ALT_SMAC			|
+						IB_QP_VID			|
+						IB_QP_ALT_VID),
+				[IB_QPT_UC]  = (IB_QP_ALT_SMAC			|
+						IB_QP_VID			|
+						IB_QP_ALT_VID),
+				[IB_QPT_XRC_INI]  = (IB_QP_ALT_SMAC			|
+						IB_QP_VID			|
+						IB_QP_ALT_VID),
+				[IB_QPT_XRC_TGT]  = (IB_QP_ALT_SMAC			|
+						IB_QP_VID			|
+						IB_QP_ALT_VID)
+			}
 		}
 		}
 	},
 	},
 	[IB_QPS_RTR]   = {
 	[IB_QPS_RTR]   = {
@@ -779,7 +827,8 @@ static const struct {
 };
 };
 
 
 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
-		       enum ib_qp_type type, enum ib_qp_attr_mask mask)
+		       enum ib_qp_type type, enum ib_qp_attr_mask mask,
+		       enum rdma_link_layer ll)
 {
 {
 	enum ib_qp_attr_mask req_param, opt_param;
 	enum ib_qp_attr_mask req_param, opt_param;
 
 
@@ -798,6 +847,13 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
 	req_param = qp_state_table[cur_state][next_state].req_param[type];
 	req_param = qp_state_table[cur_state][next_state].req_param[type];
 	opt_param = qp_state_table[cur_state][next_state].opt_param[type];
 	opt_param = qp_state_table[cur_state][next_state].opt_param[type];
 
 
+	if (ll == IB_LINK_LAYER_ETHERNET) {
+		req_param |= qp_state_table[cur_state][next_state].
+			req_param_add_eth[type];
+		opt_param |= qp_state_table[cur_state][next_state].
+			opt_param_add_eth[type];
+	}
+
 	if ((mask & req_param) != req_param)
 	if ((mask & req_param) != req_param)
 		return 0;
 		return 0;
 
 
@@ -808,10 +864,51 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
 }
 }
 EXPORT_SYMBOL(ib_modify_qp_is_ok);
 EXPORT_SYMBOL(ib_modify_qp_is_ok);
 
 
+int ib_resolve_eth_l2_attrs(struct ib_qp *qp,
+			    struct ib_qp_attr *qp_attr, int *qp_attr_mask)
+{
+	int           ret = 0;
+	union ib_gid  sgid;
+
+	if ((*qp_attr_mask & IB_QP_AV)  &&
+	    (rdma_port_get_link_layer(qp->device, qp_attr->ah_attr.port_num) == IB_LINK_LAYER_ETHERNET)) {
+		ret = ib_query_gid(qp->device, qp_attr->ah_attr.port_num,
+				   qp_attr->ah_attr.grh.sgid_index, &sgid);
+		if (ret)
+			goto out;
+		if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
+			rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
+			rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
+			qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+		} else {
+			ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
+					qp_attr->ah_attr.dmac, &qp_attr->vlan_id);
+			if (ret)
+				goto out;
+			ret = rdma_addr_find_smac_by_sgid(&sgid, qp_attr->smac, NULL);
+			if (ret)
+				goto out;
+		}
+		*qp_attr_mask |= IB_QP_SMAC;
+		if (qp_attr->vlan_id < 0xFFFF)
+			*qp_attr_mask |= IB_QP_VID;
+	}
+out:
+	return ret;
+}
+EXPORT_SYMBOL(ib_resolve_eth_l2_attrs);
+
+
 int ib_modify_qp(struct ib_qp *qp,
 int ib_modify_qp(struct ib_qp *qp,
 		 struct ib_qp_attr *qp_attr,
 		 struct ib_qp_attr *qp_attr,
 		 int qp_attr_mask)
 		 int qp_attr_mask)
 {
 {
+	int ret;
+
+	ret = ib_resolve_eth_l2_attrs(qp, qp_attr, &qp_attr_mask);
+	if (ret)
+		return ret;
+
 	return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
 	return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
 }
 }
 EXPORT_SYMBOL(ib_modify_qp);
 EXPORT_SYMBOL(ib_modify_qp);

+ 2 - 1
drivers/infiniband/hw/amso1100/c2_intr.c

@@ -169,7 +169,8 @@ static void handle_vq(struct c2_dev *c2dev, u32 mq_index)
 		 * We should never get here, as the adapter should
 		 * We should never get here, as the adapter should
 		 * never send us a reply that we're not expecting.
 		 * never send us a reply that we're not expecting.
 		 */
 		 */
-		vq_repbuf_free(c2dev, host_msg);
+		if (reply_msg != NULL)
+			vq_repbuf_free(c2dev, host_msg);
 		pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
 		pr_debug("handle_vq: UNEXPECTEDLY got NULL req\n");
 		return;
 		return;
 	}
 	}

+ 1 - 1
drivers/infiniband/hw/cxgb4/mem.c

@@ -76,7 +76,7 @@ static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
 	INIT_ULPTX_WR(req, wr_len, 0, 0);
 	INIT_ULPTX_WR(req, wr_len, 0, 0);
 	req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
 	req->wr.wr_hi = cpu_to_be32(FW_WR_OP(FW_ULPTX_WR) |
 			(wait ? FW_WR_COMPL(1) : 0));
 			(wait ? FW_WR_COMPL(1) : 0));
-	req->wr.wr_lo = wait ? (__force __be64)&wr_wait : 0;
+	req->wr.wr_lo = wait ? (__force __be64)(unsigned long) &wr_wait : 0L;
 	req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
 	req->wr.wr_mid = cpu_to_be32(FW_WR_LEN16(DIV_ROUND_UP(wr_len, 16)));
 	req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE));
 	req->cmd = cpu_to_be32(ULPTX_CMD(ULP_TX_MEM_WRITE));
 	req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1));
 	req->cmd |= cpu_to_be32(V_T5_ULP_MEMIO_ORDER(1));

+ 1 - 1
drivers/infiniband/hw/ehca/ehca_qp.c

@@ -1329,7 +1329,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 	qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
 	qp_new_state = attr_mask & IB_QP_STATE ? attr->qp_state : qp_cur_state;
 	if (!smi_reset2init &&
 	if (!smi_reset2init &&
 	    !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
 	    !ib_modify_qp_is_ok(qp_cur_state, qp_new_state, ibqp->qp_type,
-				attr_mask)) {
+				attr_mask, IB_LINK_LAYER_UNSPECIFIED)) {
 		ret = -EINVAL;
 		ret = -EINVAL;
 		ehca_err(ibqp->device,
 		ehca_err(ibqp->device,
 			 "Invalid qp transition new_state=%x cur_state=%x "
 			 "Invalid qp transition new_state=%x cur_state=%x "

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_qp.c

@@ -463,7 +463,7 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
 
 	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
 	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask))
+				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
 		goto inval;
 		goto inval;
 
 
 	if (attr_mask & IB_QP_AV) {
 	if (attr_mask & IB_QP_AV) {

+ 1 - 1
drivers/infiniband/hw/mlx4/Kconfig

@@ -1,6 +1,6 @@
 config MLX4_INFINIBAND
 config MLX4_INFINIBAND
 	tristate "Mellanox ConnectX HCA support"
 	tristate "Mellanox ConnectX HCA support"
-	depends on NETDEVICES && ETHERNET && PCI
+	depends on NETDEVICES && ETHERNET && PCI && INET
 	select NET_VENDOR_MELLANOX
 	select NET_VENDOR_MELLANOX
 	select MLX4_CORE
 	select MLX4_CORE
 	---help---
 	---help---

+ 9 - 31
drivers/infiniband/hw/mlx4/ah.c

@@ -39,25 +39,6 @@
 
 
 #include "mlx4_ib.h"
 #include "mlx4_ib.h"
 
 
-int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
-			u8 *mac, int *is_mcast, u8 port)
-{
-	struct in6_addr in6;
-
-	*is_mcast = 0;
-
-	memcpy(&in6, ah_attr->grh.dgid.raw, sizeof in6);
-	if (rdma_link_local_addr(&in6))
-		rdma_get_ll_mac(&in6, mac);
-	else if (rdma_is_multicast_addr(&in6)) {
-		rdma_get_mcast_mac(&in6, mac);
-		*is_mcast = 1;
-	} else
-		return -EINVAL;
-
-	return 0;
-}
-
 static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 static struct ib_ah *create_ib_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
 				  struct mlx4_ib_ah *ah)
 				  struct mlx4_ib_ah *ah)
 {
 {
@@ -92,21 +73,18 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr
 {
 {
 	struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
 	struct mlx4_ib_dev *ibdev = to_mdev(pd->device);
 	struct mlx4_dev *dev = ibdev->dev;
 	struct mlx4_dev *dev = ibdev->dev;
-	union ib_gid sgid;
-	u8 mac[6];
-	int err;
 	int is_mcast;
 	int is_mcast;
+	struct in6_addr in6;
 	u16 vlan_tag;
 	u16 vlan_tag;
 
 
-	err = mlx4_ib_resolve_grh(ibdev, ah_attr, mac, &is_mcast, ah_attr->port_num);
-	if (err)
-		return ERR_PTR(err);
-
-	memcpy(ah->av.eth.mac, mac, 6);
-	err = ib_get_cached_gid(pd->device, ah_attr->port_num, ah_attr->grh.sgid_index, &sgid);
-	if (err)
-		return ERR_PTR(err);
-	vlan_tag = rdma_get_vlan_id(&sgid);
+	memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+	if (rdma_is_multicast_addr(&in6)) {
+		is_mcast = 1;
+		rdma_get_mcast_mac(&in6, ah->av.eth.mac);
+	} else {
+		memcpy(ah->av.eth.mac, ah_attr->dmac, ETH_ALEN);
+	}
+	vlan_tag = ah_attr->vlan_id;
 	if (vlan_tag < 0x1000)
 	if (vlan_tag < 0x1000)
 		vlan_tag |= (ah_attr->sl & 7) << 13;
 		vlan_tag |= (ah_attr->sl & 7) << 13;
 	ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));
 	ah->av.eth.port_pd = cpu_to_be32(to_mpd(pd)->pdn | (ah_attr->port_num << 24));

+ 9 - 0
drivers/infiniband/hw/mlx4/cq.c

@@ -798,6 +798,15 @@ repoll:
 			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 13;
 			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 13;
 		else
 		else
 			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 12;
 			wc->sl  = be16_to_cpu(cqe->sl_vid) >> 12;
+		if (be32_to_cpu(cqe->vlan_my_qpn) & MLX4_CQE_VLAN_PRESENT_MASK) {
+			wc->vlan_id = be16_to_cpu(cqe->sl_vid) &
+				MLX4_CQE_VID_MASK;
+		} else {
+			wc->vlan_id = 0xffff;
+		}
+		wc->wc_flags |= IB_WC_WITH_VLAN;
+		memcpy(wc->smac, cqe->smac, ETH_ALEN);
+		wc->wc_flags |= IB_WC_WITH_SMAC;
 	}
 	}
 
 
 	return 0;
 	return 0;

+ 590 - 157
drivers/infiniband/hw/mlx4/main.c

@@ -39,6 +39,8 @@
 #include <linux/inetdevice.h>
 #include <linux/inetdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/if_vlan.h>
 #include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
 
 
 #include <rdma/ib_smi.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_user_verbs.h>
@@ -55,6 +57,7 @@
 #define DRV_RELDATE	"April 4, 2008"
 #define DRV_RELDATE	"April 4, 2008"
 
 
 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
 #define MLX4_IB_FLOW_MAX_PRIO 0xFFF
+#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
 
 
 MODULE_AUTHOR("Roland Dreier");
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
 MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
@@ -92,21 +95,27 @@ static union ib_gid zgid;
 
 
 static int check_flow_steering_support(struct mlx4_dev *dev)
 static int check_flow_steering_support(struct mlx4_dev *dev)
 {
 {
+	int eth_num_ports = 0;
 	int ib_num_ports = 0;
 	int ib_num_ports = 0;
-	int i;
-
-	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
-		ib_num_ports++;
 
 
-	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED) {
-		if (ib_num_ports || mlx4_is_mfunc(dev)) {
-			pr_warn("Device managed flow steering is unavailable "
-				"for IB ports or in multifunction env.\n");
-			return 0;
+	int dmfs = dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED;
+
+	if (dmfs) {
+		int i;
+		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH)
+			eth_num_ports++;
+		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+			ib_num_ports++;
+		dmfs &= (!ib_num_ports ||
+			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB)) &&
+			(!eth_num_ports ||
+			 (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN));
+		if (ib_num_ports && mlx4_is_mfunc(dev)) {
+			pr_warn("Device managed flow steering is unavailable for IB port in multifunction env.\n");
+			dmfs = 0;
 		}
 		}
-		return 1;
 	}
 	}
-	return 0;
+	return dmfs;
 }
 }
 
 
 static int mlx4_ib_query_device(struct ib_device *ibdev,
 static int mlx4_ib_query_device(struct ib_device *ibdev,
@@ -165,7 +174,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2B;
 		else
 		else
 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
 			props->device_cap_flags |= IB_DEVICE_MEM_WINDOW_TYPE_2A;
-	if (check_flow_steering_support(dev->dev))
+	if (dev->steering_support ==  MLX4_STEERING_MODE_DEVICE_MANAGED)
 		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
 		props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
 	}
 	}
 
 
@@ -787,7 +796,6 @@ static int add_gid_entry(struct ib_qp *ibqp, union ib_gid *gid)
 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
 int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
 		   union ib_gid *gid)
 		   union ib_gid *gid)
 {
 {
-	u8 mac[6];
 	struct net_device *ndev;
 	struct net_device *ndev;
 	int ret = 0;
 	int ret = 0;
 
 
@@ -801,11 +809,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
 	spin_unlock(&mdev->iboe.lock);
 	spin_unlock(&mdev->iboe.lock);
 
 
 	if (ndev) {
 	if (ndev) {
-		rdma_get_mcast_mac((struct in6_addr *)gid, mac);
-		rtnl_lock();
-		dev_mc_add(mdev->iboe.netdevs[mqp->port - 1], mac);
 		ret = 1;
 		ret = 1;
-		rtnl_unlock();
 		dev_put(ndev);
 		dev_put(ndev);
 	}
 	}
 
 
@@ -819,6 +823,7 @@ struct mlx4_ib_steering {
 };
 };
 
 
 static int parse_flow_attr(struct mlx4_dev *dev,
 static int parse_flow_attr(struct mlx4_dev *dev,
+			   u32 qp_num,
 			   union ib_flow_spec *ib_spec,
 			   union ib_flow_spec *ib_spec,
 			   struct _rule_hw *mlx4_spec)
 			   struct _rule_hw *mlx4_spec)
 {
 {
@@ -834,6 +839,14 @@ static int parse_flow_attr(struct mlx4_dev *dev,
 		mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
 		mlx4_spec->eth.vlan_tag = ib_spec->eth.val.vlan_tag;
 		mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
 		mlx4_spec->eth.vlan_tag_msk = ib_spec->eth.mask.vlan_tag;
 		break;
 		break;
+	case IB_FLOW_SPEC_IB:
+		type = MLX4_NET_TRANS_RULE_ID_IB;
+		mlx4_spec->ib.l3_qpn =
+			cpu_to_be32(qp_num);
+		mlx4_spec->ib.qpn_mask =
+			cpu_to_be32(MLX4_IB_FLOW_QPN_MASK);
+		break;
+
 
 
 	case IB_FLOW_SPEC_IPV4:
 	case IB_FLOW_SPEC_IPV4:
 		type = MLX4_NET_TRANS_RULE_ID_IPV4;
 		type = MLX4_NET_TRANS_RULE_ID_IPV4;
@@ -865,6 +878,115 @@ static int parse_flow_attr(struct mlx4_dev *dev,
 	return mlx4_hw_rule_sz(dev, type);
 	return mlx4_hw_rule_sz(dev, type);
 }
 }
 
 
+struct default_rules {
+	__u32 mandatory_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+	__u32 mandatory_not_fields[IB_FLOW_SPEC_SUPPORT_LAYERS];
+	__u32 rules_create_list[IB_FLOW_SPEC_SUPPORT_LAYERS];
+	__u8  link_layer;
+};
+static const struct default_rules default_table[] = {
+	{
+		.mandatory_fields = {IB_FLOW_SPEC_IPV4},
+		.mandatory_not_fields = {IB_FLOW_SPEC_ETH},
+		.rules_create_list = {IB_FLOW_SPEC_IB},
+		.link_layer = IB_LINK_LAYER_INFINIBAND
+	}
+};
+
+static int __mlx4_ib_default_rules_match(struct ib_qp *qp,
+					 struct ib_flow_attr *flow_attr)
+{
+	int i, j, k;
+	void *ib_flow;
+	const struct default_rules *pdefault_rules = default_table;
+	u8 link_layer = rdma_port_get_link_layer(qp->device, flow_attr->port);
+
+	for (i = 0; i < sizeof(default_table)/sizeof(default_table[0]); i++,
+	     pdefault_rules++) {
+		__u32 field_types[IB_FLOW_SPEC_SUPPORT_LAYERS];
+		memset(&field_types, 0, sizeof(field_types));
+
+		if (link_layer != pdefault_rules->link_layer)
+			continue;
+
+		ib_flow = flow_attr + 1;
+		/* we assume the specs are sorted */
+		for (j = 0, k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS &&
+		     j < flow_attr->num_of_specs; k++) {
+			union ib_flow_spec *current_flow =
+				(union ib_flow_spec *)ib_flow;
+
+			/* same layer but different type */
+			if (((current_flow->type & IB_FLOW_SPEC_LAYER_MASK) ==
+			     (pdefault_rules->mandatory_fields[k] &
+			      IB_FLOW_SPEC_LAYER_MASK)) &&
+			    (current_flow->type !=
+			     pdefault_rules->mandatory_fields[k]))
+				goto out;
+
+			/* same layer, try match next one */
+			if (current_flow->type ==
+			    pdefault_rules->mandatory_fields[k]) {
+				j++;
+				ib_flow +=
+					((union ib_flow_spec *)ib_flow)->size;
+			}
+		}
+
+		ib_flow = flow_attr + 1;
+		for (j = 0; j < flow_attr->num_of_specs;
+		     j++, ib_flow += ((union ib_flow_spec *)ib_flow)->size)
+			for (k = 0; k < IB_FLOW_SPEC_SUPPORT_LAYERS; k++)
+				/* same layer and same type */
+				if (((union ib_flow_spec *)ib_flow)->type ==
+				    pdefault_rules->mandatory_not_fields[k])
+					goto out;
+
+		return i;
+	}
+out:
+	return -1;
+}
+
+static int __mlx4_ib_create_default_rules(
+		struct mlx4_ib_dev *mdev,
+		struct ib_qp *qp,
+		const struct default_rules *pdefault_rules,
+		struct _rule_hw *mlx4_spec) {
+	int size = 0;
+	int i;
+
+	for (i = 0; i < sizeof(pdefault_rules->rules_create_list)/
+			sizeof(pdefault_rules->rules_create_list[0]); i++) {
+		int ret;
+		union ib_flow_spec ib_spec;
+		switch (pdefault_rules->rules_create_list[i]) {
+		case 0:
+			/* no rule */
+			continue;
+		case IB_FLOW_SPEC_IB:
+			ib_spec.type = IB_FLOW_SPEC_IB;
+			ib_spec.size = sizeof(struct ib_flow_spec_ib);
+
+			break;
+		default:
+			/* invalid rule */
+			return -EINVAL;
+		}
+		/* We must put empty rule, qpn is being ignored */
+		ret = parse_flow_attr(mdev->dev, 0, &ib_spec,
+				      mlx4_spec);
+		if (ret < 0) {
+			pr_info("invalid parsing\n");
+			return -EINVAL;
+		}
+
+		mlx4_spec = (void *)mlx4_spec + ret;
+		size += ret;
+	}
+	return size;
+}
+
 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
 static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_attr,
 			  int domain,
 			  int domain,
 			  enum mlx4_net_trans_promisc_mode flow_type,
 			  enum mlx4_net_trans_promisc_mode flow_type,
@@ -876,6 +998,7 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
 	struct mlx4_ib_dev *mdev = to_mdev(qp->device);
 	struct mlx4_ib_dev *mdev = to_mdev(qp->device);
 	struct mlx4_cmd_mailbox *mailbox;
 	struct mlx4_cmd_mailbox *mailbox;
 	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
 	struct mlx4_net_trans_rule_hw_ctrl *ctrl;
+	int default_flow;
 
 
 	static const u16 __mlx4_domain[] = {
 	static const u16 __mlx4_domain[] = {
 		[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
 		[IB_FLOW_DOMAIN_USER] = MLX4_DOMAIN_UVERBS,
@@ -910,8 +1033,21 @@ static int __mlx4_ib_create_flow(struct ib_qp *qp, struct ib_flow_attr *flow_att
 
 
 	ib_flow = flow_attr + 1;
 	ib_flow = flow_attr + 1;
 	size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
 	size += sizeof(struct mlx4_net_trans_rule_hw_ctrl);
+	/* Add default flows */
+	default_flow = __mlx4_ib_default_rules_match(qp, flow_attr);
+	if (default_flow >= 0) {
+		ret = __mlx4_ib_create_default_rules(
+				mdev, qp, default_table + default_flow,
+				mailbox->buf + size);
+		if (ret < 0) {
+			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
+			return -EINVAL;
+		}
+		size += ret;
+	}
 	for (i = 0; i < flow_attr->num_of_specs; i++) {
 	for (i = 0; i < flow_attr->num_of_specs; i++) {
-		ret = parse_flow_attr(mdev->dev, ib_flow, mailbox->buf + size);
+		ret = parse_flow_attr(mdev->dev, qp->qp_num, ib_flow,
+				      mailbox->buf + size);
 		if (ret < 0) {
 		if (ret < 0) {
 			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 			mlx4_free_cmd_mailbox(mdev->dev, mailbox);
 			return -EINVAL;
 			return -EINVAL;
@@ -1025,6 +1161,8 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
 	u64 reg_id;
 	u64 reg_id;
 	struct mlx4_ib_steering *ib_steering = NULL;
 	struct mlx4_ib_steering *ib_steering = NULL;
+	enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+		MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
 
 
 	if (mdev->dev->caps.steering_mode ==
 	if (mdev->dev->caps.steering_mode ==
 	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
 	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1036,7 +1174,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
 	err = mlx4_multicast_attach(mdev->dev, &mqp->mqp, gid->raw, mqp->port,
 				    !!(mqp->flags &
 				    !!(mqp->flags &
 				       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
 				       MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK),
-				    MLX4_PROT_IB_IPV6, &reg_id);
+				    prot, &reg_id);
 	if (err)
 	if (err)
 		goto err_malloc;
 		goto err_malloc;
 
 
@@ -1055,7 +1193,7 @@ static int mlx4_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 
 
 err_add:
 err_add:
 	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
 	mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
-			      MLX4_PROT_IB_IPV6, reg_id);
+			      prot, reg_id);
 err_malloc:
 err_malloc:
 	kfree(ib_steering);
 	kfree(ib_steering);
 
 
@@ -1083,10 +1221,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	int err;
 	int err;
 	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
 	struct mlx4_ib_qp *mqp = to_mqp(ibqp);
-	u8 mac[6];
 	struct net_device *ndev;
 	struct net_device *ndev;
 	struct mlx4_ib_gid_entry *ge;
 	struct mlx4_ib_gid_entry *ge;
 	u64 reg_id = 0;
 	u64 reg_id = 0;
+	enum mlx4_protocol prot = (gid->raw[1] == 0x0e) ?
+		MLX4_PROT_IB_IPV4 : MLX4_PROT_IB_IPV6;
 
 
 	if (mdev->dev->caps.steering_mode ==
 	if (mdev->dev->caps.steering_mode ==
 	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
 	    MLX4_STEERING_MODE_DEVICE_MANAGED) {
@@ -1109,7 +1248,7 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	}
 	}
 
 
 	err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
 	err = mlx4_multicast_detach(mdev->dev, &mqp->mqp, gid->raw,
-				    MLX4_PROT_IB_IPV6, reg_id);
+				    prot, reg_id);
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
@@ -1121,13 +1260,8 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		if (ndev)
 		if (ndev)
 			dev_hold(ndev);
 			dev_hold(ndev);
 		spin_unlock(&mdev->iboe.lock);
 		spin_unlock(&mdev->iboe.lock);
-		rdma_get_mcast_mac((struct in6_addr *)gid, mac);
-		if (ndev) {
-			rtnl_lock();
-			dev_mc_del(mdev->iboe.netdevs[ge->port - 1], mac);
-			rtnl_unlock();
+		if (ndev)
 			dev_put(ndev);
 			dev_put(ndev);
-		}
 		list_del(&ge->list);
 		list_del(&ge->list);
 		kfree(ge);
 		kfree(ge);
 	} else
 	} else
@@ -1223,20 +1357,6 @@ static struct device_attribute *mlx4_class_attributes[] = {
 	&dev_attr_board_id
 	&dev_attr_board_id
 };
 };
 
 
-static void mlx4_addrconf_ifid_eui48(u8 *eui, u16 vlan_id, struct net_device *dev)
-{
-	memcpy(eui, dev->dev_addr, 3);
-	memcpy(eui + 5, dev->dev_addr + 3, 3);
-	if (vlan_id < 0x1000) {
-		eui[3] = vlan_id >> 8;
-		eui[4] = vlan_id & 0xff;
-	} else {
-		eui[3] = 0xff;
-		eui[4] = 0xfe;
-	}
-	eui[0] ^= 2;
-}
-
 static void update_gids_task(struct work_struct *work)
 static void update_gids_task(struct work_struct *work)
 {
 {
 	struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
 	struct update_gid_work *gw = container_of(work, struct update_gid_work, work);
@@ -1259,161 +1379,318 @@ static void update_gids_task(struct work_struct *work)
 		       MLX4_CMD_WRAPPED);
 		       MLX4_CMD_WRAPPED);
 	if (err)
 	if (err)
 		pr_warn("set port command failed\n");
 		pr_warn("set port command failed\n");
-	else {
-		memcpy(gw->dev->iboe.gid_table[gw->port - 1], gw->gids, sizeof gw->gids);
+	else
 		mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
 		mlx4_ib_dispatch_event(gw->dev, gw->port, IB_EVENT_GID_CHANGE);
-	}
 
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	kfree(gw);
 	kfree(gw);
 }
 }
 
 
-static int update_ipv6_gids(struct mlx4_ib_dev *dev, int port, int clear)
+static void reset_gids_task(struct work_struct *work)
 {
 {
-	struct net_device *ndev = dev->iboe.netdevs[port - 1];
-	struct update_gid_work *work;
-	struct net_device *tmp;
+	struct update_gid_work *gw =
+			container_of(work, struct update_gid_work, work);
+	struct mlx4_cmd_mailbox *mailbox;
+	union ib_gid *gids;
+	int err;
 	int i;
 	int i;
-	u8 *hits;
-	int ret;
-	union ib_gid gid;
-	int free;
-	int found;
-	int need_update = 0;
-	u16 vid;
+	struct mlx4_dev	*dev = gw->dev->dev;
 
 
-	work = kzalloc(sizeof *work, GFP_ATOMIC);
-	if (!work)
-		return -ENOMEM;
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox)) {
+		pr_warn("reset gid table failed\n");
+		goto free;
+	}
 
 
-	hits = kzalloc(128, GFP_ATOMIC);
-	if (!hits) {
-		ret = -ENOMEM;
-		goto out;
+	gids = mailbox->buf;
+	memcpy(gids, gw->gids, sizeof(gw->gids));
+
+	for (i = 1; i < gw->dev->num_ports + 1; i++) {
+		if (mlx4_ib_port_link_layer(&gw->dev->ib_dev, i) ==
+					    IB_LINK_LAYER_ETHERNET) {
+			err = mlx4_cmd(dev, mailbox->dma,
+				       MLX4_SET_PORT_GID_TABLE << 8 | i,
+				       1, MLX4_CMD_SET_PORT,
+				       MLX4_CMD_TIME_CLASS_B,
+				       MLX4_CMD_WRAPPED);
+			if (err)
+				pr_warn(KERN_WARNING
+					"set port %d command failed\n", i);
+		}
 	}
 	}
 
 
-	rcu_read_lock();
-	for_each_netdev_rcu(&init_net, tmp) {
-		if (ndev && (tmp == ndev || rdma_vlan_dev_real_dev(tmp) == ndev)) {
-			gid.global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
-			vid = rdma_vlan_dev_vlan_id(tmp);
-			mlx4_addrconf_ifid_eui48(&gid.raw[8], vid, ndev);
-			found = 0;
-			free = -1;
-			for (i = 0; i < 128; ++i) {
-				if (free < 0 &&
-				    !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
-					free = i;
-				if (!memcmp(&dev->iboe.gid_table[port - 1][i], &gid, sizeof gid)) {
-					hits[i] = 1;
-					found = 1;
-					break;
-				}
-			}
+	mlx4_free_cmd_mailbox(dev, mailbox);
+free:
+	kfree(gw);
+}
 
 
-			if (!found) {
-				if (tmp == ndev &&
-				    (memcmp(&dev->iboe.gid_table[port - 1][0],
-					    &gid, sizeof gid) ||
-				     !memcmp(&dev->iboe.gid_table[port - 1][0],
-					     &zgid, sizeof gid))) {
-					dev->iboe.gid_table[port - 1][0] = gid;
-					++need_update;
-					hits[0] = 1;
-				} else if (free >= 0) {
-					dev->iboe.gid_table[port - 1][free] = gid;
-					hits[free] = 1;
-					++need_update;
-				}
+static int update_gid_table(struct mlx4_ib_dev *dev, int port,
+			    union ib_gid *gid, int clear)
+{
+	struct update_gid_work *work;
+	int i;
+	int need_update = 0;
+	int free = -1;
+	int found = -1;
+	int max_gids;
+
+	max_gids = dev->dev->caps.gid_table_len[port];
+	for (i = 0; i < max_gids; ++i) {
+		if (!memcmp(&dev->iboe.gid_table[port - 1][i], gid,
+			    sizeof(*gid)))
+			found = i;
+
+		if (clear) {
+			if (found >= 0) {
+				need_update = 1;
+				dev->iboe.gid_table[port - 1][found] = zgid;
+				break;
 			}
 			}
+		} else {
+			if (found >= 0)
+				break;
+
+			if (free < 0 &&
+			    !memcmp(&dev->iboe.gid_table[port - 1][i], &zgid,
+				    sizeof(*gid)))
+				free = i;
 		}
 		}
 	}
 	}
-	rcu_read_unlock();
 
 
-	for (i = 0; i < 128; ++i)
-		if (!hits[i]) {
-			if (memcmp(&dev->iboe.gid_table[port - 1][i], &zgid, sizeof zgid))
-				++need_update;
-			dev->iboe.gid_table[port - 1][i] = zgid;
-		}
+	if (found == -1 && !clear && free >= 0) {
+		dev->iboe.gid_table[port - 1][free] = *gid;
+		need_update = 1;
+	}
 
 
-	if (need_update) {
-		memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof work->gids);
-		INIT_WORK(&work->work, update_gids_task);
-		work->port = port;
-		work->dev = dev;
-		queue_work(wq, &work->work);
-	} else
-		kfree(work);
+	if (!need_update)
+		return 0;
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return -ENOMEM;
+
+	memcpy(work->gids, dev->iboe.gid_table[port - 1], sizeof(work->gids));
+	INIT_WORK(&work->work, update_gids_task);
+	work->port = port;
+	work->dev = dev;
+	queue_work(wq, &work->work);
 
 
-	kfree(hits);
 	return 0;
 	return 0;
+}
 
 
-out:
-	kfree(work);
-	return ret;
+static int reset_gid_table(struct mlx4_ib_dev *dev)
+{
+	struct update_gid_work *work;
+
+
+	work = kzalloc(sizeof(*work), GFP_ATOMIC);
+	if (!work)
+		return -ENOMEM;
+	memset(dev->iboe.gid_table, 0, sizeof(dev->iboe.gid_table));
+	memset(work->gids, 0, sizeof(work->gids));
+	INIT_WORK(&work->work, reset_gids_task);
+	work->dev = dev;
+	queue_work(wq, &work->work);
+	return 0;
 }
 }
 
 
-static void handle_en_event(struct mlx4_ib_dev *dev, int port, unsigned long event)
+static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
+			      struct mlx4_ib_dev *ibdev, union ib_gid *gid)
 {
 {
-	switch (event) {
-	case NETDEV_UP:
-	case NETDEV_CHANGEADDR:
-		update_ipv6_gids(dev, port, 0);
-		break;
+	struct mlx4_ib_iboe *iboe;
+	int port = 0;
+	struct net_device *real_dev = rdma_vlan_dev_real_dev(event_netdev) ?
+				rdma_vlan_dev_real_dev(event_netdev) :
+				event_netdev;
+
+	if (event != NETDEV_DOWN && event != NETDEV_UP)
+		return 0;
+
+	if ((real_dev != event_netdev) &&
+	    (event == NETDEV_DOWN) &&
+	    rdma_link_local_addr((struct in6_addr *)gid))
+		return 0;
+
+	iboe = &ibdev->iboe;
+	spin_lock(&iboe->lock);
+
+	for (port = 1; port <= MLX4_MAX_PORTS; ++port)
+		if ((netif_is_bond_master(real_dev) &&
+		     (real_dev == iboe->masters[port - 1])) ||
+		     (!netif_is_bond_master(real_dev) &&
+		     (real_dev == iboe->netdevs[port - 1])))
+			update_gid_table(ibdev, port, gid,
+					 event == NETDEV_DOWN);
+
+	spin_unlock(&iboe->lock);
+	return 0;
 
 
-	case NETDEV_DOWN:
-		update_ipv6_gids(dev, port, 1);
-		dev->iboe.netdevs[port - 1] = NULL;
-	}
 }
 }
 
 
-static void netdev_added(struct mlx4_ib_dev *dev, int port)
+static u8 mlx4_ib_get_dev_port(struct net_device *dev,
+			       struct mlx4_ib_dev *ibdev)
 {
 {
-	update_ipv6_gids(dev, port, 0);
+	u8 port = 0;
+	struct mlx4_ib_iboe *iboe;
+	struct net_device *real_dev = rdma_vlan_dev_real_dev(dev) ?
+				rdma_vlan_dev_real_dev(dev) : dev;
+
+	iboe = &ibdev->iboe;
+	spin_lock(&iboe->lock);
+
+	for (port = 1; port <= MLX4_MAX_PORTS; ++port)
+		if ((netif_is_bond_master(real_dev) &&
+		     (real_dev == iboe->masters[port - 1])) ||
+		     (!netif_is_bond_master(real_dev) &&
+		     (real_dev == iboe->netdevs[port - 1])))
+			break;
+
+	spin_unlock(&iboe->lock);
+
+	if ((port == 0) || (port > MLX4_MAX_PORTS))
+		return 0;
+	else
+		return port;
 }
 }
 
 
-static void netdev_removed(struct mlx4_ib_dev *dev, int port)
+static int mlx4_ib_inet_event(struct notifier_block *this, unsigned long event,
+				void *ptr)
 {
 {
-	update_ipv6_gids(dev, port, 1);
+	struct mlx4_ib_dev *ibdev;
+	struct in_ifaddr *ifa = ptr;
+	union ib_gid gid;
+	struct net_device *event_netdev = ifa->ifa_dev->dev;
+
+	ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+
+	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet);
+
+	mlx4_ib_addr_event(event, event_netdev, ibdev, &gid);
+	return NOTIFY_DONE;
 }
 }
 
 
-static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event,
+#if IS_ENABLED(CONFIG_IPV6)
+static int mlx4_ib_inet6_event(struct notifier_block *this, unsigned long event,
 				void *ptr)
 				void *ptr)
 {
 {
-	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct mlx4_ib_dev *ibdev;
 	struct mlx4_ib_dev *ibdev;
-	struct net_device *oldnd;
+	struct inet6_ifaddr *ifa = ptr;
+	union  ib_gid *gid = (union ib_gid *)&ifa->addr;
+	struct net_device *event_netdev = ifa->idev->dev;
+
+	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb_inet6);
+
+	mlx4_ib_addr_event(event, event_netdev, ibdev, gid);
+	return NOTIFY_DONE;
+}
+#endif
+
+static void mlx4_ib_get_dev_addr(struct net_device *dev,
+				 struct mlx4_ib_dev *ibdev, u8 port)
+{
+	struct in_device *in_dev;
+#if IS_ENABLED(CONFIG_IPV6)
+	struct inet6_dev *in6_dev;
+	union ib_gid  *pgid;
+	struct inet6_ifaddr *ifp;
+#endif
+	union ib_gid gid;
+
+
+	if ((port == 0) || (port > MLX4_MAX_PORTS))
+		return;
+
+	/* IPv4 gids */
+	in_dev = in_dev_get(dev);
+	if (in_dev) {
+		for_ifa(in_dev) {
+			/*ifa->ifa_address;*/
+			ipv6_addr_set_v4mapped(ifa->ifa_address,
+					       (struct in6_addr *)&gid);
+			update_gid_table(ibdev, port, &gid, 0);
+		}
+		endfor_ifa(in_dev);
+		in_dev_put(in_dev);
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	/* IPv6 gids */
+	in6_dev = in6_dev_get(dev);
+	if (in6_dev) {
+		read_lock_bh(&in6_dev->lock);
+		list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
+			pgid = (union ib_gid *)&ifp->addr;
+			update_gid_table(ibdev, port, pgid, 0);
+		}
+		read_unlock_bh(&in6_dev->lock);
+		in6_dev_put(in6_dev);
+	}
+#endif
+}
+
+static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
+{
+	struct	net_device *dev;
+
+	if (reset_gid_table(ibdev))
+		return -1;
+
+	read_lock(&dev_base_lock);
+
+	for_each_netdev(&init_net, dev) {
+		u8 port = mlx4_ib_get_dev_port(dev, ibdev);
+		if (port)
+			mlx4_ib_get_dev_addr(dev, ibdev, port);
+	}
+
+	read_unlock(&dev_base_lock);
+
+	return 0;
+}
+
+static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev)
+{
 	struct mlx4_ib_iboe *iboe;
 	struct mlx4_ib_iboe *iboe;
 	int port;
 	int port;
 
 
-	if (!net_eq(dev_net(dev), &init_net))
-		return NOTIFY_DONE;
-
-	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
 	iboe = &ibdev->iboe;
 	iboe = &ibdev->iboe;
 
 
 	spin_lock(&iboe->lock);
 	spin_lock(&iboe->lock);
 	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
 	mlx4_foreach_ib_transport_port(port, ibdev->dev) {
-		oldnd = iboe->netdevs[port - 1];
+		struct net_device *old_master = iboe->masters[port - 1];
+		struct net_device *curr_master;
 		iboe->netdevs[port - 1] =
 		iboe->netdevs[port - 1] =
 			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
 			mlx4_get_protocol_dev(ibdev->dev, MLX4_PROT_ETH, port);
-		if (oldnd != iboe->netdevs[port - 1]) {
-			if (iboe->netdevs[port - 1])
-				netdev_added(ibdev, port);
-			else
-				netdev_removed(ibdev, port);
+
+		if (iboe->netdevs[port - 1] &&
+		    netif_is_bond_slave(iboe->netdevs[port - 1])) {
+			rtnl_lock();
+			iboe->masters[port - 1] = netdev_master_upper_dev_get(
+				iboe->netdevs[port - 1]);
+			rtnl_unlock();
 		}
 		}
-	}
+		curr_master = iboe->masters[port - 1];
 
 
-	if (dev == iboe->netdevs[0] ||
-	    (iboe->netdevs[0] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[0]))
-		handle_en_event(ibdev, 1, event);
-	else if (dev == iboe->netdevs[1]
-		 || (iboe->netdevs[1] && rdma_vlan_dev_real_dev(dev) == iboe->netdevs[1]))
-		handle_en_event(ibdev, 2, event);
+		/* if bonding is used it is possible that we add it to masters
+		    only after IP address is assigned to the net bonding
+		    interface */
+		if (curr_master && (old_master != curr_master))
+			mlx4_ib_get_dev_addr(curr_master, ibdev, port);
+	}
 
 
 	spin_unlock(&iboe->lock);
 	spin_unlock(&iboe->lock);
+}
+
+static int mlx4_ib_netdev_event(struct notifier_block *this,
+				unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct mlx4_ib_dev *ibdev;
+
+	if (!net_eq(dev_net(dev), &init_net))
+		return NOTIFY_DONE;
+
+	ibdev = container_of(this, struct mlx4_ib_dev, iboe.nb);
+	mlx4_ib_scan_netdevs(ibdev);
 
 
 	return NOTIFY_DONE;
 	return NOTIFY_DONE;
 }
 }
@@ -1682,6 +1959,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	}
 	}
 
 
 	if (check_flow_steering_support(dev)) {
 	if (check_flow_steering_support(dev)) {
+		ibdev->steering_support = MLX4_STEERING_MODE_DEVICE_MANAGED;
 		ibdev->ib_dev.create_flow	= mlx4_ib_create_flow;
 		ibdev->ib_dev.create_flow	= mlx4_ib_create_flow;
 		ibdev->ib_dev.destroy_flow	= mlx4_ib_destroy_flow;
 		ibdev->ib_dev.destroy_flow	= mlx4_ib_destroy_flow;
 
 
@@ -1710,8 +1988,35 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	spin_lock_init(&ibdev->sm_lock);
 	spin_lock_init(&ibdev->sm_lock);
 	mutex_init(&ibdev->cap_mask_mutex);
 	mutex_init(&ibdev->cap_mask_mutex);
 
 
+	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+		ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
+		err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
+					    MLX4_IB_UC_STEER_QPN_ALIGN,
+					    &ibdev->steer_qpn_base);
+		if (err)
+			goto err_counter;
+
+		ibdev->ib_uc_qpns_bitmap =
+			kmalloc(BITS_TO_LONGS(ibdev->steer_qpn_count) *
+				sizeof(long),
+				GFP_KERNEL);
+		if (!ibdev->ib_uc_qpns_bitmap) {
+			dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+			goto err_steer_qp_release;
+		}
+
+		bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count);
+
+		err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE(
+				dev, ibdev->steer_qpn_base,
+				ibdev->steer_qpn_base +
+				ibdev->steer_qpn_count - 1);
+		if (err)
+			goto err_steer_free_bitmap;
+	}
+
 	if (ib_register_device(&ibdev->ib_dev, NULL))
 	if (ib_register_device(&ibdev->ib_dev, NULL))
-		goto err_counter;
+		goto err_steer_free_bitmap;
 
 
 	if (mlx4_ib_mad_init(ibdev))
 	if (mlx4_ib_mad_init(ibdev))
 		goto err_reg;
 		goto err_reg;
@@ -1719,11 +2024,35 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	if (mlx4_ib_init_sriov(ibdev))
 	if (mlx4_ib_init_sriov(ibdev))
 		goto err_mad;
 		goto err_mad;
 
 
-	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) {
-		iboe->nb.notifier_call = mlx4_ib_netdev_event;
-		err = register_netdevice_notifier(&iboe->nb);
-		if (err)
-			goto err_sriov;
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) {
+		if (!iboe->nb.notifier_call) {
+			iboe->nb.notifier_call = mlx4_ib_netdev_event;
+			err = register_netdevice_notifier(&iboe->nb);
+			if (err) {
+				iboe->nb.notifier_call = NULL;
+				goto err_notif;
+			}
+		}
+		if (!iboe->nb_inet.notifier_call) {
+			iboe->nb_inet.notifier_call = mlx4_ib_inet_event;
+			err = register_inetaddr_notifier(&iboe->nb_inet);
+			if (err) {
+				iboe->nb_inet.notifier_call = NULL;
+				goto err_notif;
+			}
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+		if (!iboe->nb_inet6.notifier_call) {
+			iboe->nb_inet6.notifier_call = mlx4_ib_inet6_event;
+			err = register_inet6addr_notifier(&iboe->nb_inet6);
+			if (err) {
+				iboe->nb_inet6.notifier_call = NULL;
+				goto err_notif;
+			}
+		}
+#endif
+		mlx4_ib_scan_netdevs(ibdev);
+		mlx4_ib_init_gid_table(ibdev);
 	}
 	}
 
 
 	for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
 	for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
@@ -1749,11 +2078,25 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	return ibdev;
 	return ibdev;
 
 
 err_notif:
 err_notif:
-	if (unregister_netdevice_notifier(&ibdev->iboe.nb))
-		pr_warn("failure unregistering notifier\n");
+	if (ibdev->iboe.nb.notifier_call) {
+		if (unregister_netdevice_notifier(&ibdev->iboe.nb))
+			pr_warn("failure unregistering notifier\n");
+		ibdev->iboe.nb.notifier_call = NULL;
+	}
+	if (ibdev->iboe.nb_inet.notifier_call) {
+		if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+			pr_warn("failure unregistering notifier\n");
+		ibdev->iboe.nb_inet.notifier_call = NULL;
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	if (ibdev->iboe.nb_inet6.notifier_call) {
+		if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+			pr_warn("failure unregistering notifier\n");
+		ibdev->iboe.nb_inet6.notifier_call = NULL;
+	}
+#endif
 	flush_workqueue(wq);
 	flush_workqueue(wq);
 
 
-err_sriov:
 	mlx4_ib_close_sriov(ibdev);
 	mlx4_ib_close_sriov(ibdev);
 
 
 err_mad:
 err_mad:
@@ -1762,6 +2105,13 @@ err_mad:
 err_reg:
 err_reg:
 	ib_unregister_device(&ibdev->ib_dev);
 	ib_unregister_device(&ibdev->ib_dev);
 
 
+err_steer_free_bitmap:
+	kfree(ibdev->ib_uc_qpns_bitmap);
+
+err_steer_qp_release:
+	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED)
+		mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+				      ibdev->steer_qpn_count);
 err_counter:
 err_counter:
 	for (; i; --i)
 	for (; i; --i)
 		if (ibdev->counters[i - 1] != -1)
 		if (ibdev->counters[i - 1] != -1)
@@ -1782,6 +2132,69 @@ err_dealloc:
 	return NULL;
 	return NULL;
 }
 }
 
 
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn)
+{
+	int offset;
+
+	WARN_ON(!dev->ib_uc_qpns_bitmap);
+
+	offset = bitmap_find_free_region(dev->ib_uc_qpns_bitmap,
+					 dev->steer_qpn_count,
+					 get_count_order(count));
+	if (offset < 0)
+		return offset;
+
+	*qpn = dev->steer_qpn_base + offset;
+	return 0;
+}
+
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count)
+{
+	if (!qpn ||
+	    dev->steering_support != MLX4_STEERING_MODE_DEVICE_MANAGED)
+		return;
+
+	BUG_ON(qpn < dev->steer_qpn_base);
+
+	bitmap_release_region(dev->ib_uc_qpns_bitmap,
+			      qpn - dev->steer_qpn_base,
+			      get_count_order(count));
+}
+
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+			 int is_attach)
+{
+	int err;
+	size_t flow_size;
+	struct ib_flow_attr *flow = NULL;
+	struct ib_flow_spec_ib *ib_spec;
+
+	if (is_attach) {
+		flow_size = sizeof(struct ib_flow_attr) +
+			    sizeof(struct ib_flow_spec_ib);
+		flow = kzalloc(flow_size, GFP_KERNEL);
+		if (!flow)
+			return -ENOMEM;
+		flow->port = mqp->port;
+		flow->num_of_specs = 1;
+		flow->size = flow_size;
+		ib_spec = (struct ib_flow_spec_ib *)(flow + 1);
+		ib_spec->type = IB_FLOW_SPEC_IB;
+		ib_spec->size = sizeof(struct ib_flow_spec_ib);
+		/* Add an empty rule for IB L2 */
+		memset(&ib_spec->mask, 0, sizeof(ib_spec->mask));
+
+		err = __mlx4_ib_create_flow(&mqp->ibqp, flow,
+					    IB_FLOW_DOMAIN_NIC,
+					    MLX4_FS_REGULAR,
+					    &mqp->reg_id);
+	} else {
+		err = __mlx4_ib_destroy_flow(mdev->dev, mqp->reg_id);
+	}
+	kfree(flow);
+	return err;
+}
+
 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
 static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
 {
 {
 	struct mlx4_ib_dev *ibdev = ibdev_ptr;
 	struct mlx4_ib_dev *ibdev = ibdev_ptr;
@@ -1795,6 +2208,26 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
 			pr_warn("failure unregistering notifier\n");
 			pr_warn("failure unregistering notifier\n");
 		ibdev->iboe.nb.notifier_call = NULL;
 		ibdev->iboe.nb.notifier_call = NULL;
 	}
 	}
+
+	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED) {
+		mlx4_qp_release_range(dev, ibdev->steer_qpn_base,
+				      ibdev->steer_qpn_count);
+		kfree(ibdev->ib_uc_qpns_bitmap);
+	}
+
+	if (ibdev->iboe.nb_inet.notifier_call) {
+		if (unregister_inetaddr_notifier(&ibdev->iboe.nb_inet))
+			pr_warn("failure unregistering notifier\n");
+		ibdev->iboe.nb_inet.notifier_call = NULL;
+	}
+#if IS_ENABLED(CONFIG_IPV6)
+	if (ibdev->iboe.nb_inet6.notifier_call) {
+		if (unregister_inet6addr_notifier(&ibdev->iboe.nb_inet6))
+			pr_warn("failure unregistering notifier\n");
+		ibdev->iboe.nb_inet6.notifier_call = NULL;
+	}
+#endif
+
 	iounmap(ibdev->uar_map);
 	iounmap(ibdev->uar_map);
 	for (p = 0; p < ibdev->num_ports; ++p)
 	for (p = 0; p < ibdev->num_ports; ++p)
 		if (ibdev->counters[p] != -1)
 		if (ibdev->counters[p] != -1)

+ 15 - 3
drivers/infiniband/hw/mlx4/mlx4_ib.h

@@ -68,6 +68,8 @@ enum {
 /*module param to indicate if SM assigns the alias_GUID*/
 /*module param to indicate if SM assigns the alias_GUID*/
 extern int mlx4_ib_sm_guid_assign;
 extern int mlx4_ib_sm_guid_assign;
 
 
+#define MLX4_IB_UC_STEER_QPN_ALIGN 1
+#define MLX4_IB_UC_MAX_NUM_QPS     256
 struct mlx4_ib_ucontext {
 struct mlx4_ib_ucontext {
 	struct ib_ucontext	ibucontext;
 	struct ib_ucontext	ibucontext;
 	struct mlx4_uar		uar;
 	struct mlx4_uar		uar;
@@ -153,6 +155,7 @@ struct mlx4_ib_wq {
 enum mlx4_ib_qp_flags {
 enum mlx4_ib_qp_flags {
 	MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
 	MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO,
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
 	MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+	MLX4_IB_QP_NETIF = IB_QP_CREATE_NETIF_QP,
 	MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
 	MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30,
 	MLX4_IB_SRIOV_SQP = 1 << 31,
 	MLX4_IB_SRIOV_SQP = 1 << 31,
 };
 };
@@ -270,6 +273,7 @@ struct mlx4_ib_qp {
 	struct list_head	gid_list;
 	struct list_head	gid_list;
 	struct list_head	steering_rules;
 	struct list_head	steering_rules;
 	struct mlx4_ib_buf	*sqp_proxy_rcv;
 	struct mlx4_ib_buf	*sqp_proxy_rcv;
+	u64			reg_id;
 
 
 };
 };
 
 
@@ -428,7 +432,10 @@ struct mlx4_ib_sriov {
 struct mlx4_ib_iboe {
 struct mlx4_ib_iboe {
 	spinlock_t		lock;
 	spinlock_t		lock;
 	struct net_device      *netdevs[MLX4_MAX_PORTS];
 	struct net_device      *netdevs[MLX4_MAX_PORTS];
+	struct net_device      *masters[MLX4_MAX_PORTS];
 	struct notifier_block 	nb;
 	struct notifier_block 	nb;
+	struct notifier_block	nb_inet;
+	struct notifier_block	nb_inet6;
 	union ib_gid		gid_table[MLX4_MAX_PORTS][128];
 	union ib_gid		gid_table[MLX4_MAX_PORTS][128];
 };
 };
 
 
@@ -494,6 +501,10 @@ struct mlx4_ib_dev {
 	struct kobject	       *dev_ports_parent[MLX4_MFUNC_MAX];
 	struct kobject	       *dev_ports_parent[MLX4_MFUNC_MAX];
 	struct mlx4_ib_iov_port	iov_ports[MLX4_MAX_PORTS];
 	struct mlx4_ib_iov_port	iov_ports[MLX4_MAX_PORTS];
 	struct pkey_mgt		pkeys;
 	struct pkey_mgt		pkeys;
+	unsigned long *ib_uc_qpns_bitmap;
+	int steer_qpn_count;
+	int steer_qpn_base;
+	int steering_support;
 };
 };
 
 
 struct ib_event_work {
 struct ib_event_work {
@@ -675,9 +686,6 @@ int __mlx4_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 int __mlx4_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
 			union ib_gid *gid, int netw_view);
 			union ib_gid *gid, int netw_view);
 
 
-int mlx4_ib_resolve_grh(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah_attr,
-			u8 *mac, int *is_mcast, u8 port);
-
 static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
 static inline bool mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
 {
 {
 	u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
 	u8 port = be32_to_cpu(ah->av.ib.port_pd) >> 24 & 3;
@@ -752,5 +760,9 @@ void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device);
 
 
 __be64 mlx4_ib_gen_node_guid(void);
 __be64 mlx4_ib_gen_node_guid(void);
 
 
+int mlx4_ib_steer_qp_alloc(struct mlx4_ib_dev *dev, int count, int *qpn);
+void mlx4_ib_steer_qp_free(struct mlx4_ib_dev *dev, u32 qpn, int count);
+int mlx4_ib_steer_qp_reg(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
+			 int is_attach);
 
 
 #endif /* MLX4_IB_H */
 #endif /* MLX4_IB_H */

+ 130 - 27
drivers/infiniband/hw/mlx4/qp.c

@@ -90,6 +90,21 @@ enum {
 	MLX4_RAW_QP_MSGMAX	= 31,
 	MLX4_RAW_QP_MSGMAX	= 31,
 };
 };
 
 
+#ifndef ETH_ALEN
+#define ETH_ALEN        6
+#endif
+static inline u64 mlx4_mac_to_u64(u8 *addr)
+{
+	u64 mac = 0;
+	int i;
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		mac <<= 8;
+		mac |= addr[i];
+	}
+	return mac;
+}
+
 static const __be32 mlx4_ib_opcode[] = {
 static const __be32 mlx4_ib_opcode[] = {
 	[IB_WR_SEND]				= cpu_to_be32(MLX4_OPCODE_SEND),
 	[IB_WR_SEND]				= cpu_to_be32(MLX4_OPCODE_SEND),
 	[IB_WR_LSO]				= cpu_to_be32(MLX4_OPCODE_LSO),
 	[IB_WR_LSO]				= cpu_to_be32(MLX4_OPCODE_LSO),
@@ -716,6 +731,14 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
 		if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
 			qp->flags |= MLX4_IB_QP_LSO;
 			qp->flags |= MLX4_IB_QP_LSO;
 
 
+		if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+			if (dev->steering_support ==
+			    MLX4_STEERING_MODE_DEVICE_MANAGED)
+				qp->flags |= MLX4_IB_QP_NETIF;
+			else
+				goto err;
+		}
+
 		err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
 		err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp);
 		if (err)
 		if (err)
 			goto err;
 			goto err;
@@ -765,7 +788,11 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		if (init_attr->qp_type == IB_QPT_RAW_PACKET)
 		if (init_attr->qp_type == IB_QPT_RAW_PACKET)
 			err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
 			err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
 		else
 		else
-			err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn);
+			if (qp->flags & MLX4_IB_QP_NETIF)
+				err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
+			else
+				err = mlx4_qp_reserve_range(dev->dev, 1, 1,
+							    &qpn);
 		if (err)
 		if (err)
 			goto err_proxy;
 			goto err_proxy;
 	}
 	}
@@ -790,8 +817,12 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	return 0;
 	return 0;
 
 
 err_qpn:
 err_qpn:
-	if (!sqpn)
-		mlx4_qp_release_range(dev->dev, qpn, 1);
+	if (!sqpn) {
+		if (qp->flags & MLX4_IB_QP_NETIF)
+			mlx4_ib_steer_qp_free(dev, qpn, 1);
+		else
+			mlx4_qp_release_range(dev->dev, qpn, 1);
+	}
 err_proxy:
 err_proxy:
 	if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
 	if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI)
 		free_proxy_bufs(pd->device, qp);
 		free_proxy_bufs(pd->device, qp);
@@ -932,8 +963,12 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 
 
 	mlx4_qp_free(dev->dev, &qp->mqp);
 	mlx4_qp_free(dev->dev, &qp->mqp);
 
 
-	if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp))
-		mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+	if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) {
+		if (qp->flags & MLX4_IB_QP_NETIF)
+			mlx4_ib_steer_qp_free(dev, qp->mqp.qpn, 1);
+		else
+			mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1);
+	}
 
 
 	mlx4_mtt_cleanup(dev->dev, &qp->mtt);
 	mlx4_mtt_cleanup(dev->dev, &qp->mtt);
 
 
@@ -987,9 +1022,16 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 	 */
 	 */
 	if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
 	if (init_attr->create_flags & ~(MLX4_IB_QP_LSO |
 					MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
 					MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK |
-					MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP))
+					MLX4_IB_SRIOV_TUNNEL_QP |
+					MLX4_IB_SRIOV_SQP |
+					MLX4_IB_QP_NETIF))
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 
 
+	if (init_attr->create_flags & IB_QP_CREATE_NETIF_QP) {
+		if (init_attr->qp_type != IB_QPT_UD)
+			return ERR_PTR(-EINVAL);
+	}
+
 	if (init_attr->create_flags &&
 	if (init_attr->create_flags &&
 	    (udata ||
 	    (udata ||
 	     ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
 	     ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) &&
@@ -1144,16 +1186,15 @@ static void mlx4_set_sched(struct mlx4_qp_path *path, u8 port)
 	path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
 	path->sched_queue = (path->sched_queue & 0xbf) | ((port - 1) << 6);
 }
 }
 
 
-static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
-			 struct mlx4_qp_path *path, u8 port)
+static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
+			  u64 smac, u16 vlan_tag, struct mlx4_qp_path *path,
+			  u8 port)
 {
 {
-	int err;
 	int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
 	int is_eth = rdma_port_get_link_layer(&dev->ib_dev, port) ==
 		IB_LINK_LAYER_ETHERNET;
 		IB_LINK_LAYER_ETHERNET;
-	u8 mac[6];
-	int is_mcast;
-	u16 vlan_tag;
 	int vidx;
 	int vidx;
+	int smac_index;
+
 
 
 	path->grh_mylmc     = ah->src_path_bits & 0x7f;
 	path->grh_mylmc     = ah->src_path_bits & 0x7f;
 	path->rlid	    = cpu_to_be16(ah->dlid);
 	path->rlid	    = cpu_to_be16(ah->dlid);
@@ -1188,22 +1229,27 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 		if (!(ah->ah_flags & IB_AH_GRH))
 		if (!(ah->ah_flags & IB_AH_GRH))
 			return -1;
 			return -1;
 
 
-		err = mlx4_ib_resolve_grh(dev, ah, mac, &is_mcast, port);
-		if (err)
-			return err;
-
-		memcpy(path->dmac, mac, 6);
+		memcpy(path->dmac, ah->dmac, ETH_ALEN);
 		path->ackto = MLX4_IB_LINK_TYPE_ETH;
 		path->ackto = MLX4_IB_LINK_TYPE_ETH;
-		/* use index 0 into MAC table for IBoE */
-		path->grh_mylmc &= 0x80;
+		/* find the index  into MAC table for IBoE */
+		if (!is_zero_ether_addr((const u8 *)&smac)) {
+			if (mlx4_find_cached_mac(dev->dev, port, smac,
+						 &smac_index))
+				return -ENOENT;
+		} else {
+			smac_index = 0;
+		}
+
+		path->grh_mylmc &= 0x80 | smac_index;
 
 
-		vlan_tag = rdma_get_vlan_id(&dev->iboe.gid_table[port - 1][ah->grh.sgid_index]);
+		path->feup |= MLX4_FEUP_FORCE_ETH_UP;
 		if (vlan_tag < 0x1000) {
 		if (vlan_tag < 0x1000) {
 			if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
 			if (mlx4_find_cached_vlan(dev->dev, port, vlan_tag, &vidx))
 				return -ENOENT;
 				return -ENOENT;
 
 
 			path->vlan_index = vidx;
 			path->vlan_index = vidx;
 			path->fl = 1 << 6;
 			path->fl = 1 << 6;
+			path->feup |= MLX4_FVL_FORCE_ETH_VLAN;
 		}
 		}
 	} else
 	} else
 		path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
 		path->sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE |
@@ -1212,6 +1258,28 @@ static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
 	return 0;
 	return 0;
 }
 }
 
 
+static int mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_qp_attr *qp,
+			 enum ib_qp_attr_mask qp_attr_mask,
+			 struct mlx4_qp_path *path, u8 port)
+{
+	return _mlx4_set_path(dev, &qp->ah_attr,
+			      mlx4_mac_to_u64((u8 *)qp->smac),
+			      (qp_attr_mask & IB_QP_VID) ? qp->vlan_id : 0xffff,
+			      path, port);
+}
+
+static int mlx4_set_alt_path(struct mlx4_ib_dev *dev,
+			     const struct ib_qp_attr *qp,
+			     enum ib_qp_attr_mask qp_attr_mask,
+			     struct mlx4_qp_path *path, u8 port)
+{
+	return _mlx4_set_path(dev, &qp->alt_ah_attr,
+			      mlx4_mac_to_u64((u8 *)qp->alt_smac),
+			      (qp_attr_mask & IB_QP_ALT_VID) ?
+			      qp->alt_vlan_id : 0xffff,
+			      path, port);
+}
+
 static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
 {
 {
 	struct mlx4_ib_gid_entry *ge, *tmp;
 	struct mlx4_ib_gid_entry *ge, *tmp;
@@ -1235,6 +1303,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	struct mlx4_qp_context *context;
 	struct mlx4_qp_context *context;
 	enum mlx4_qp_optpar optpar = 0;
 	enum mlx4_qp_optpar optpar = 0;
 	int sqd_event;
 	int sqd_event;
+	int steer_qp = 0;
 	int err = -EINVAL;
 	int err = -EINVAL;
 
 
 	context = kzalloc(sizeof *context, GFP_KERNEL);
 	context = kzalloc(sizeof *context, GFP_KERNEL);
@@ -1319,6 +1388,11 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 			optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
 			optpar |= MLX4_QP_OPTPAR_COUNTER_INDEX;
 		} else
 		} else
 			context->pri_path.counter_index = 0xff;
 			context->pri_path.counter_index = 0xff;
+
+		if (qp->flags & MLX4_IB_QP_NETIF) {
+			mlx4_ib_steer_qp_reg(dev, qp, 1);
+			steer_qp = 1;
+		}
 	}
 	}
 
 
 	if (attr_mask & IB_QP_PKEY_INDEX) {
 	if (attr_mask & IB_QP_PKEY_INDEX) {
@@ -1329,7 +1403,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 	}
 	}
 
 
 	if (attr_mask & IB_QP_AV) {
 	if (attr_mask & IB_QP_AV) {
-		if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path,
+		if (mlx4_set_path(dev, attr, attr_mask, &context->pri_path,
 				  attr_mask & IB_QP_PORT ?
 				  attr_mask & IB_QP_PORT ?
 				  attr->port_num : qp->port))
 				  attr->port_num : qp->port))
 			goto out;
 			goto out;
@@ -1352,8 +1426,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 		    dev->dev->caps.pkey_table_len[attr->alt_port_num])
 		    dev->dev->caps.pkey_table_len[attr->alt_port_num])
 			goto out;
 			goto out;
 
 
-		if (mlx4_set_path(dev, &attr->alt_ah_attr, &context->alt_path,
-				  attr->alt_port_num))
+		if (mlx4_set_alt_path(dev, attr, attr_mask, &context->alt_path,
+				      attr->alt_port_num))
 			goto out;
 			goto out;
 
 
 		context->alt_path.pkey_index = attr->alt_pkey_index;
 		context->alt_path.pkey_index = attr->alt_pkey_index;
@@ -1464,6 +1538,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 		context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
 		context->pri_path.ackto = (context->pri_path.ackto & 0xf8) |
 					MLX4_IB_LINK_TYPE_ETH;
 					MLX4_IB_LINK_TYPE_ETH;
 
 
+	if (ibqp->qp_type == IB_QPT_UD && (new_state == IB_QPS_RTR)) {
+		int is_eth = rdma_port_get_link_layer(
+				&dev->ib_dev, qp->port) ==
+				IB_LINK_LAYER_ETHERNET;
+		if (is_eth) {
+			context->pri_path.ackto = MLX4_IB_LINK_TYPE_ETH;
+			optpar |= MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH;
+		}
+	}
+
+
 	if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD	&&
 	if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD	&&
 	    attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
 	    attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify)
 		sqd_event = 1;
 		sqd_event = 1;
@@ -1547,9 +1632,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
 		qp->sq_next_wqe = 0;
 		qp->sq_next_wqe = 0;
 		if (qp->rq.wqe_cnt)
 		if (qp->rq.wqe_cnt)
 			*qp->db.db  = 0;
 			*qp->db.db  = 0;
+
+		if (qp->flags & MLX4_IB_QP_NETIF)
+			mlx4_ib_steer_qp_reg(dev, qp, 0);
 	}
 	}
 
 
 out:
 out:
+	if (err && steer_qp)
+		mlx4_ib_steer_qp_reg(dev, qp, 0);
 	kfree(context);
 	kfree(context);
 	return err;
 	return err;
 }
 }
@@ -1561,13 +1651,21 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	struct mlx4_ib_qp *qp = to_mqp(ibqp);
 	struct mlx4_ib_qp *qp = to_mqp(ibqp);
 	enum ib_qp_state cur_state, new_state;
 	enum ib_qp_state cur_state, new_state;
 	int err = -EINVAL;
 	int err = -EINVAL;
-
+	int ll;
 	mutex_lock(&qp->mutex);
 	mutex_lock(&qp->mutex);
 
 
 	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
 	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
 
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+		ll = IB_LINK_LAYER_UNSPECIFIED;
+	} else {
+		int port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
+		ll = rdma_port_get_link_layer(&dev->ib_dev, port);
+	}
+
+	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
+				attr_mask, ll)) {
 		pr_debug("qpn 0x%x: invalid attribute mask specified "
 		pr_debug("qpn 0x%x: invalid attribute mask specified "
 			 "for transition %d to %d. qp_type %d,"
 			 "for transition %d to %d. qp_type %d,"
 			 " attr_mask 0x%x\n",
 			 " attr_mask 0x%x\n",
@@ -1784,8 +1882,10 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
 				return err;
 				return err;
 		}
 		}
 
 
-		vlan = rdma_get_vlan_id(&sgid);
-		is_vlan = vlan < 0x1000;
+		if (ah->av.eth.vlan != 0xffff) {
+			vlan = be16_to_cpu(ah->av.eth.vlan) & 0x0fff;
+			is_vlan = 1;
+		}
 	}
 	}
 	ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
 	ib_ud_header_init(send_size, !is_eth, is_eth, is_vlan, is_grh, 0, &sqp->ud_header);
 
 
@@ -2762,6 +2862,9 @@ done:
 	if (qp->flags & MLX4_IB_QP_LSO)
 	if (qp->flags & MLX4_IB_QP_LSO)
 		qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
 		qp_init_attr->create_flags |= IB_QP_CREATE_IPOIB_UD_LSO;
 
 
+	if (qp->flags & MLX4_IB_QP_NETIF)
+		qp_init_attr->create_flags |= IB_QP_CREATE_NETIF_QP;
+
 	qp_init_attr->sq_sig_type =
 	qp_init_attr->sq_sig_type =
 		qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
 		qp->sq_signal_bits == cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE) ?
 		IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 		IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;

+ 6 - 2
drivers/infiniband/hw/mlx4/sysfs.c

@@ -582,8 +582,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
 	p->pkey_group.attrs =
 	p->pkey_group.attrs =
 		alloc_group_attrs(show_port_pkey, store_port_pkey,
 		alloc_group_attrs(show_port_pkey, store_port_pkey,
 				  dev->dev->caps.pkey_table_len[port_num]);
 				  dev->dev->caps.pkey_table_len[port_num]);
-	if (!p->pkey_group.attrs)
+	if (!p->pkey_group.attrs) {
+		ret = -ENOMEM;
 		goto err_alloc;
 		goto err_alloc;
+	}
 
 
 	ret = sysfs_create_group(&p->kobj, &p->pkey_group);
 	ret = sysfs_create_group(&p->kobj, &p->pkey_group);
 	if (ret)
 	if (ret)
@@ -591,8 +593,10 @@ static int add_port(struct mlx4_ib_dev *dev, int port_num, int slave)
 
 
 	p->gid_group.name  = "gid_idx";
 	p->gid_group.name  = "gid_idx";
 	p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
 	p->gid_group.attrs = alloc_group_attrs(show_port_gid_idx, NULL, 1);
-	if (!p->gid_group.attrs)
+	if (!p->gid_group.attrs) {
+		ret = -ENOMEM;
 		goto err_free_pkey;
 		goto err_free_pkey;
+	}
 
 
 	ret = sysfs_create_group(&p->kobj, &p->gid_group);
 	ret = sysfs_create_group(&p->kobj, &p->gid_group);
 	if (ret)
 	if (ret)

+ 293 - 17
drivers/infiniband/hw/mlx5/cq.c

@@ -73,14 +73,24 @@ static void *get_cqe(struct mlx5_ib_cq *cq, int n)
 	return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
 	return get_cqe_from_buf(&cq->buf, n, cq->mcq.cqe_sz);
 }
 }
 
 
+static u8 sw_ownership_bit(int n, int nent)
+{
+	return (n & nent) ? 1 : 0;
+}
+
 static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
 static void *get_sw_cqe(struct mlx5_ib_cq *cq, int n)
 {
 {
 	void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
 	void *cqe = get_cqe(cq, n & cq->ibcq.cqe);
 	struct mlx5_cqe64 *cqe64;
 	struct mlx5_cqe64 *cqe64;
 
 
 	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
 	cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
-	return ((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^
-		!!(n & (cq->ibcq.cqe + 1))) ? NULL : cqe;
+
+	if (likely((cqe64->op_own) >> 4 != MLX5_CQE_INVALID) &&
+	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ibcq.cqe + 1)))) {
+		return cqe;
+	} else {
+		return NULL;
+	}
 }
 }
 
 
 static void *next_cqe_sw(struct mlx5_ib_cq *cq)
 static void *next_cqe_sw(struct mlx5_ib_cq *cq)
@@ -351,6 +361,11 @@ static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
 	qp->sq.last_poll = tail;
 	qp->sq.last_poll = tail;
 }
 }
 
 
+static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
+{
+	mlx5_buf_free(&dev->mdev, &buf->buf);
+}
+
 static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 			 struct mlx5_ib_qp **cur_qp,
 			 struct mlx5_ib_qp **cur_qp,
 			 struct ib_wc *wc)
 			 struct ib_wc *wc)
@@ -366,6 +381,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 	void *cqe;
 	void *cqe;
 	int idx;
 	int idx;
 
 
+repoll:
 	cqe = next_cqe_sw(cq);
 	cqe = next_cqe_sw(cq);
 	if (!cqe)
 	if (!cqe)
 		return -EAGAIN;
 		return -EAGAIN;
@@ -379,7 +395,18 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 	 */
 	 */
 	rmb();
 	rmb();
 
 
-	/* TBD: resize CQ */
+	opcode = cqe64->op_own >> 4;
+	if (unlikely(opcode == MLX5_CQE_RESIZE_CQ)) {
+		if (likely(cq->resize_buf)) {
+			free_cq_buf(dev, &cq->buf);
+			cq->buf = *cq->resize_buf;
+			kfree(cq->resize_buf);
+			cq->resize_buf = NULL;
+			goto repoll;
+		} else {
+			mlx5_ib_warn(dev, "unexpected resize cqe\n");
+		}
+	}
 
 
 	qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
 	qpn = ntohl(cqe64->sop_drop_qpn) & 0xffffff;
 	if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
 	if (!*cur_qp || (qpn != (*cur_qp)->ibqp.qp_num)) {
@@ -398,7 +425,6 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 	}
 	}
 
 
 	wc->qp  = &(*cur_qp)->ibqp;
 	wc->qp  = &(*cur_qp)->ibqp;
-	opcode = cqe64->op_own >> 4;
 	switch (opcode) {
 	switch (opcode) {
 	case MLX5_CQE_REQ:
 	case MLX5_CQE_REQ:
 		wq = &(*cur_qp)->sq;
 		wq = &(*cur_qp)->sq;
@@ -503,15 +529,11 @@ static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
 		return err;
 		return err;
 
 
 	buf->cqe_size = cqe_size;
 	buf->cqe_size = cqe_size;
+	buf->nent = nent;
 
 
 	return 0;
 	return 0;
 }
 }
 
 
-static void free_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf)
-{
-	mlx5_buf_free(&dev->mdev, &buf->buf);
-}
-
 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata,
 			  struct ib_ucontext *context, struct mlx5_ib_cq *cq,
 			  struct ib_ucontext *context, struct mlx5_ib_cq *cq,
 			  int entries, struct mlx5_create_cq_mbox_in **cqb,
 			  int entries, struct mlx5_create_cq_mbox_in **cqb,
@@ -576,16 +598,16 @@ static void destroy_cq_user(struct mlx5_ib_cq *cq, struct ib_ucontext *context)
 	ib_umem_release(cq->buf.umem);
 	ib_umem_release(cq->buf.umem);
 }
 }
 
 
-static void init_cq_buf(struct mlx5_ib_cq *cq, int nent)
+static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf)
 {
 {
 	int i;
 	int i;
 	void *cqe;
 	void *cqe;
 	struct mlx5_cqe64 *cqe64;
 	struct mlx5_cqe64 *cqe64;
 
 
-	for (i = 0; i < nent; i++) {
-		cqe = get_cqe(cq, i);
-		cqe64 = (cq->buf.cqe_size == 64) ? cqe : cqe + 64;
-		cqe64->op_own = 0xf1;
+	for (i = 0; i < buf->nent; i++) {
+		cqe = get_cqe_from_buf(buf, i, buf->cqe_size);
+		cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
+		cqe64->op_own = MLX5_CQE_INVALID << 4;
 	}
 	}
 }
 }
 
 
@@ -610,7 +632,7 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
 	if (err)
 	if (err)
 		goto err_db;
 		goto err_db;
 
 
-	init_cq_buf(cq, entries);
+	init_cq_buf(cq, &cq->buf);
 
 
 	*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
 	*inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages;
 	*cqb = mlx5_vzalloc(*inlen);
 	*cqb = mlx5_vzalloc(*inlen);
@@ -818,12 +840,266 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq)
 
 
 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 {
 {
-	return -ENOSYS;
+	struct mlx5_modify_cq_mbox_in *in;
+	struct mlx5_ib_dev *dev = to_mdev(cq->device);
+	struct mlx5_ib_cq *mcq = to_mcq(cq);
+	int err;
+	u32 fsel;
+
+	if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
+		return -ENOSYS;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->cqn = cpu_to_be32(mcq->mcq.cqn);
+	fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT);
+	in->ctx.cq_period = cpu_to_be16(cq_period);
+	in->ctx.cq_max_count = cpu_to_be16(cq_count);
+	in->field_select = cpu_to_be32(fsel);
+	err = mlx5_core_modify_cq(&dev->mdev, &mcq->mcq, in, sizeof(*in));
+	kfree(in);
+
+	if (err)
+		mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn);
+
+	return err;
+}
+
+static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+		       int entries, struct ib_udata *udata, int *npas,
+		       int *page_shift, int *cqe_size)
+{
+	struct mlx5_ib_resize_cq ucmd;
+	struct ib_umem *umem;
+	int err;
+	int npages;
+	struct ib_ucontext *context = cq->buf.umem->context;
+
+	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
+	if (err)
+		return err;
+
+	if (ucmd.reserved0 || ucmd.reserved1)
+		return -EINVAL;
+
+	umem = ib_umem_get(context, ucmd.buf_addr, entries * ucmd.cqe_size,
+			   IB_ACCESS_LOCAL_WRITE, 1);
+	if (IS_ERR(umem)) {
+		err = PTR_ERR(umem);
+		return err;
+	}
+
+	mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift,
+			   npas, NULL);
+
+	cq->resize_umem = umem;
+	*cqe_size = ucmd.cqe_size;
+
+	return 0;
+}
+
+static void un_resize_user(struct mlx5_ib_cq *cq)
+{
+	ib_umem_release(cq->resize_umem);
+}
+
+static int resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq,
+			 int entries, int cqe_size)
+{
+	int err;
+
+	cq->resize_buf = kzalloc(sizeof(*cq->resize_buf), GFP_KERNEL);
+	if (!cq->resize_buf)
+		return -ENOMEM;
+
+	err = alloc_cq_buf(dev, cq->resize_buf, entries, cqe_size);
+	if (err)
+		goto ex;
+
+	init_cq_buf(cq, cq->resize_buf);
+
+	return 0;
+
+ex:
+	kfree(cq->resize_buf);
+	return err;
+}
+
+static void un_resize_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
+{
+	free_cq_buf(dev, cq->resize_buf);
+	cq->resize_buf = NULL;
+}
+
+static int copy_resize_cqes(struct mlx5_ib_cq *cq)
+{
+	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+	struct mlx5_cqe64 *scqe64;
+	struct mlx5_cqe64 *dcqe64;
+	void *start_cqe;
+	void *scqe;
+	void *dcqe;
+	int ssize;
+	int dsize;
+	int i;
+	u8 sw_own;
+
+	ssize = cq->buf.cqe_size;
+	dsize = cq->resize_buf->cqe_size;
+	if (ssize != dsize) {
+		mlx5_ib_warn(dev, "resize from different cqe size is not supported\n");
+		return -EINVAL;
+	}
+
+	i = cq->mcq.cons_index;
+	scqe = get_sw_cqe(cq, i);
+	scqe64 = ssize == 64 ? scqe : scqe + 64;
+	start_cqe = scqe;
+	if (!scqe) {
+		mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
+		return -EINVAL;
+	}
+
+	while ((scqe64->op_own >> 4) != MLX5_CQE_RESIZE_CQ) {
+		dcqe = get_cqe_from_buf(cq->resize_buf,
+					(i + 1) & (cq->resize_buf->nent),
+					dsize);
+		dcqe64 = dsize == 64 ? dcqe : dcqe + 64;
+		sw_own = sw_ownership_bit(i + 1, cq->resize_buf->nent);
+		memcpy(dcqe, scqe, dsize);
+		dcqe64->op_own = (dcqe64->op_own & ~MLX5_CQE_OWNER_MASK) | sw_own;
+
+		++i;
+		scqe = get_sw_cqe(cq, i);
+		scqe64 = ssize == 64 ? scqe : scqe + 64;
+		if (!scqe) {
+			mlx5_ib_warn(dev, "expected cqe in sw ownership\n");
+			return -EINVAL;
+		}
+
+		if (scqe == start_cqe) {
+			pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
+				cq->mcq.cqn);
+			return -ENOMEM;
+		}
+	}
+	++cq->mcq.cons_index;
+	return 0;
 }
 }
 
 
 int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 {
 {
-	return -ENOSYS;
+	struct mlx5_ib_dev *dev = to_mdev(ibcq->device);
+	struct mlx5_ib_cq *cq = to_mcq(ibcq);
+	struct mlx5_modify_cq_mbox_in *in;
+	int err;
+	int npas;
+	int page_shift;
+	int inlen;
+	int uninitialized_var(cqe_size);
+	unsigned long flags;
+
+	if (!(dev->mdev.caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
+		pr_info("Firmware does not support resize CQ\n");
+		return -ENOSYS;
+	}
+
+	if (entries < 1)
+		return -EINVAL;
+
+	entries = roundup_pow_of_two(entries + 1);
+	if (entries > dev->mdev.caps.max_cqes + 1)
+		return -EINVAL;
+
+	if (entries == ibcq->cqe + 1)
+		return 0;
+
+	mutex_lock(&cq->resize_mutex);
+	if (udata) {
+		err = resize_user(dev, cq, entries, udata, &npas, &page_shift,
+				  &cqe_size);
+	} else {
+		cqe_size = 64;
+		err = resize_kernel(dev, cq, entries, cqe_size);
+		if (!err) {
+			npas = cq->resize_buf->buf.npages;
+			page_shift = cq->resize_buf->buf.page_shift;
+		}
+	}
+
+	if (err)
+		goto ex;
+
+	inlen = sizeof(*in) + npas * sizeof(in->pas[0]);
+	in = mlx5_vzalloc(inlen);
+	if (!in) {
+		err = -ENOMEM;
+		goto ex_resize;
+	}
+
+	if (udata)
+		mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift,
+				     in->pas, 0);
+	else
+		mlx5_fill_page_array(&cq->resize_buf->buf, in->pas);
+
+	in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE  |
+				       MLX5_MODIFY_CQ_MASK_PG_OFFSET |
+				       MLX5_MODIFY_CQ_MASK_PG_SIZE);
+	in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+	in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5;
+	in->ctx.page_offset = 0;
+	in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24);
+	in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE);
+	in->cqn = cpu_to_be32(cq->mcq.cqn);
+
+	err = mlx5_core_modify_cq(&dev->mdev, &cq->mcq, in, inlen);
+	if (err)
+		goto ex_alloc;
+
+	if (udata) {
+		cq->ibcq.cqe = entries - 1;
+		ib_umem_release(cq->buf.umem);
+		cq->buf.umem = cq->resize_umem;
+		cq->resize_umem = NULL;
+	} else {
+		struct mlx5_ib_cq_buf tbuf;
+		int resized = 0;
+
+		spin_lock_irqsave(&cq->lock, flags);
+		if (cq->resize_buf) {
+			err = copy_resize_cqes(cq);
+			if (!err) {
+				tbuf = cq->buf;
+				cq->buf = *cq->resize_buf;
+				kfree(cq->resize_buf);
+				cq->resize_buf = NULL;
+				resized = 1;
+			}
+		}
+		cq->ibcq.cqe = entries - 1;
+		spin_unlock_irqrestore(&cq->lock, flags);
+		if (resized)
+			free_cq_buf(dev, &tbuf);
+	}
+	mutex_unlock(&cq->resize_mutex);
+
+	mlx5_vfree(in);
+	return 0;
+
+ex_alloc:
+	mlx5_vfree(in);
+
+ex_resize:
+	if (udata)
+		un_resize_user(cq);
+	else
+		un_resize_kernel(dev, cq);
+ex:
+	mutex_unlock(&cq->resize_mutex);
+	return err;
 }
 }
 
 
 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)

+ 8 - 5
drivers/infiniband/hw/mlx5/main.c

@@ -541,6 +541,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	struct mlx5_ib_ucontext *context;
 	struct mlx5_ib_ucontext *context;
 	struct mlx5_uuar_info *uuari;
 	struct mlx5_uuar_info *uuari;
 	struct mlx5_uar *uars;
 	struct mlx5_uar *uars;
+	int gross_uuars;
 	int num_uars;
 	int num_uars;
 	int uuarn;
 	int uuarn;
 	int err;
 	int err;
@@ -559,11 +560,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	if (req.total_num_uuars == 0)
 	if (req.total_num_uuars == 0)
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 
 
-	req.total_num_uuars = ALIGN(req.total_num_uuars, MLX5_BF_REGS_PER_PAGE);
+	req.total_num_uuars = ALIGN(req.total_num_uuars,
+				    MLX5_NON_FP_BF_REGS_PER_PAGE);
 	if (req.num_low_latency_uuars > req.total_num_uuars - 1)
 	if (req.num_low_latency_uuars > req.total_num_uuars - 1)
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 
 
-	num_uars = req.total_num_uuars / MLX5_BF_REGS_PER_PAGE;
+	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
+	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
 	resp.qp_tab_size      = 1 << dev->mdev.caps.log_max_qp;
 	resp.qp_tab_size      = 1 << dev->mdev.caps.log_max_qp;
 	resp.bf_reg_size      = dev->mdev.caps.bf_reg_size;
 	resp.bf_reg_size      = dev->mdev.caps.bf_reg_size;
 	resp.cache_line_size  = L1_CACHE_BYTES;
 	resp.cache_line_size  = L1_CACHE_BYTES;
@@ -585,7 +588,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 		goto out_ctx;
 		goto out_ctx;
 	}
 	}
 
 
-	uuari->bitmap = kcalloc(BITS_TO_LONGS(req.total_num_uuars),
+	uuari->bitmap = kcalloc(BITS_TO_LONGS(gross_uuars),
 				sizeof(*uuari->bitmap),
 				sizeof(*uuari->bitmap),
 				GFP_KERNEL);
 				GFP_KERNEL);
 	if (!uuari->bitmap) {
 	if (!uuari->bitmap) {
@@ -595,13 +598,13 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	/*
 	/*
 	 * clear all fast path uuars
 	 * clear all fast path uuars
 	 */
 	 */
-	for (i = 0; i < req.total_num_uuars; i++) {
+	for (i = 0; i < gross_uuars; i++) {
 		uuarn = i & 3;
 		uuarn = i & 3;
 		if (uuarn == 2 || uuarn == 3)
 		if (uuarn == 2 || uuarn == 3)
 			set_bit(i, uuari->bitmap);
 			set_bit(i, uuari->bitmap);
 	}
 	}
 
 
-	uuari->count = kcalloc(req.total_num_uuars, sizeof(*uuari->count), GFP_KERNEL);
+	uuari->count = kcalloc(gross_uuars, sizeof(*uuari->count), GFP_KERNEL);
 	if (!uuari->count) {
 	if (!uuari->count) {
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto out_bitmap;
 		goto out_bitmap;

+ 2 - 2
drivers/infiniband/hw/mlx5/mlx5_ib.h

@@ -195,6 +195,7 @@ struct mlx5_ib_cq_buf {
 	struct mlx5_buf		buf;
 	struct mlx5_buf		buf;
 	struct ib_umem		*umem;
 	struct ib_umem		*umem;
 	int			cqe_size;
 	int			cqe_size;
+	int			nent;
 };
 };
 
 
 enum mlx5_ib_qp_flags {
 enum mlx5_ib_qp_flags {
@@ -220,7 +221,7 @@ struct mlx5_ib_cq {
 	/* protect resize cq
 	/* protect resize cq
 	 */
 	 */
 	struct mutex		resize_mutex;
 	struct mutex		resize_mutex;
-	struct mlx5_ib_cq_resize *resize_buf;
+	struct mlx5_ib_cq_buf  *resize_buf;
 	struct ib_umem	       *resize_umem;
 	struct ib_umem	       *resize_umem;
 	int			cqe_size;
 	int			cqe_size;
 };
 };
@@ -264,7 +265,6 @@ struct mlx5_ib_mr {
 	enum ib_wc_status	status;
 	enum ib_wc_status	status;
 	struct mlx5_ib_dev     *dev;
 	struct mlx5_ib_dev     *dev;
 	struct mlx5_create_mkey_mbox_out out;
 	struct mlx5_create_mkey_mbox_out out;
-	unsigned long		start;
 };
 };
 
 
 struct mlx5_ib_fast_reg_page_list {
 struct mlx5_ib_fast_reg_page_list {

+ 0 - 1
drivers/infiniband/hw/mlx5/mr.c

@@ -146,7 +146,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		spin_lock_irq(&ent->lock);
 		spin_lock_irq(&ent->lock);
 		ent->pending++;
 		ent->pending++;
 		spin_unlock_irq(&ent->lock);
 		spin_unlock_irq(&ent->lock);
-		mr->start = jiffies;
 		err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
 		err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
 					    sizeof(*in), reg_mr_callback,
 					    sizeof(*in), reg_mr_callback,
 					    mr, &mr->out);
 					    mr, &mr->out);

+ 93 - 40
drivers/infiniband/hw/mlx5/qp.c

@@ -340,14 +340,57 @@ static int qp_has_rq(struct ib_qp_init_attr *attr)
 	return 1;
 	return 1;
 }
 }
 
 
+static int first_med_uuar(void)
+{
+	return 1;
+}
+
+static int next_uuar(int n)
+{
+	n++;
+
+	while (((n % 4) & 2))
+		n++;
+
+	return n;
+}
+
+static int num_med_uuar(struct mlx5_uuar_info *uuari)
+{
+	int n;
+
+	n = uuari->num_uars * MLX5_NON_FP_BF_REGS_PER_PAGE -
+		uuari->num_low_latency_uuars - 1;
+
+	return n >= 0 ? n : 0;
+}
+
+static int max_uuari(struct mlx5_uuar_info *uuari)
+{
+	return uuari->num_uars * 4;
+}
+
+static int first_hi_uuar(struct mlx5_uuar_info *uuari)
+{
+	int med;
+	int i;
+	int t;
+
+	med = num_med_uuar(uuari);
+	for (t = 0, i = first_med_uuar();; i = next_uuar(i)) {
+		t++;
+		if (t == med)
+			return next_uuar(i);
+	}
+
+	return 0;
+}
+
 static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
 static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
 {
 {
-	int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
-	int start_uuar;
 	int i;
 	int i;
 
 
-	start_uuar = nuuars - uuari->num_low_latency_uuars;
-	for (i = start_uuar; i < nuuars; i++) {
+	for (i = first_hi_uuar(uuari); i < max_uuari(uuari); i = next_uuar(i)) {
 		if (!test_bit(i, uuari->bitmap)) {
 		if (!test_bit(i, uuari->bitmap)) {
 			set_bit(i, uuari->bitmap);
 			set_bit(i, uuari->bitmap);
 			uuari->count[i]++;
 			uuari->count[i]++;
@@ -360,19 +403,10 @@ static int alloc_high_class_uuar(struct mlx5_uuar_info *uuari)
 
 
 static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
 static int alloc_med_class_uuar(struct mlx5_uuar_info *uuari)
 {
 {
-	int nuuars = uuari->num_uars * MLX5_BF_REGS_PER_PAGE;
-	int minidx = 1;
-	int uuarn;
-	int end;
+	int minidx = first_med_uuar();
 	int i;
 	int i;
 
 
-	end = nuuars - uuari->num_low_latency_uuars;
-
-	for (i = 1; i < end; i++) {
-		uuarn = i & 3;
-		if (uuarn == 2 || uuarn == 3)
-			continue;
-
+	for (i = first_med_uuar(); i < first_hi_uuar(uuari); i = next_uuar(i)) {
 		if (uuari->count[i] < uuari->count[minidx])
 		if (uuari->count[i] < uuari->count[minidx])
 			minidx = i;
 			minidx = i;
 	}
 	}
@@ -489,12 +523,12 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 {
 {
 	struct mlx5_ib_ucontext *context;
 	struct mlx5_ib_ucontext *context;
 	struct mlx5_ib_create_qp ucmd;
 	struct mlx5_ib_create_qp ucmd;
-	int page_shift;
+	int page_shift = 0;
 	int uar_index;
 	int uar_index;
 	int npages;
 	int npages;
-	u32 offset;
+	u32 offset = 0;
 	int uuarn;
 	int uuarn;
-	int ncont;
+	int ncont = 0;
 	int err;
 	int err;
 
 
 	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
 	err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd));
@@ -510,11 +544,16 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
 	uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_HIGH);
 	if (uuarn < 0) {
 	if (uuarn < 0) {
 		mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
 		mlx5_ib_dbg(dev, "failed to allocate low latency UUAR\n");
-		mlx5_ib_dbg(dev, "reverting to high latency\n");
-		uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+		mlx5_ib_dbg(dev, "reverting to medium latency\n");
+		uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_MEDIUM);
 		if (uuarn < 0) {
 		if (uuarn < 0) {
-			mlx5_ib_dbg(dev, "uuar allocation failed\n");
-			return uuarn;
+			mlx5_ib_dbg(dev, "failed to allocate medium latency UUAR\n");
+			mlx5_ib_dbg(dev, "reverting to high latency\n");
+			uuarn = alloc_uuar(&context->uuari, MLX5_IB_LATENCY_CLASS_LOW);
+			if (uuarn < 0) {
+				mlx5_ib_warn(dev, "uuar allocation failed\n");
+				return uuarn;
+			}
 		}
 		}
 	}
 	}
 
 
@@ -525,23 +564,29 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	if (err)
 	if (err)
 		goto err_uuar;
 		goto err_uuar;
 
 
-	qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
-			       qp->buf_size, 0, 0);
-	if (IS_ERR(qp->umem)) {
-		mlx5_ib_dbg(dev, "umem_get failed\n");
-		err = PTR_ERR(qp->umem);
-		goto err_uuar;
+	if (ucmd.buf_addr && qp->buf_size) {
+		qp->umem = ib_umem_get(pd->uobject->context, ucmd.buf_addr,
+				       qp->buf_size, 0, 0);
+		if (IS_ERR(qp->umem)) {
+			mlx5_ib_dbg(dev, "umem_get failed\n");
+			err = PTR_ERR(qp->umem);
+			goto err_uuar;
+		}
+	} else {
+		qp->umem = NULL;
 	}
 	}
 
 
-	mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
-			   &ncont, NULL);
-	err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
-	if (err) {
-		mlx5_ib_warn(dev, "bad offset\n");
-		goto err_umem;
+	if (qp->umem) {
+		mlx5_ib_cont_pages(qp->umem, ucmd.buf_addr, &npages, &page_shift,
+				   &ncont, NULL);
+		err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset);
+		if (err) {
+			mlx5_ib_warn(dev, "bad offset\n");
+			goto err_umem;
+		}
+		mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
+			    ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
 	}
 	}
-	mlx5_ib_dbg(dev, "addr 0x%llx, size %d, npages %d, page_shift %d, ncont %d, offset %d\n",
-		    ucmd.buf_addr, qp->buf_size, npages, page_shift, ncont, offset);
 
 
 	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
 	*inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont;
 	*in = mlx5_vzalloc(*inlen);
 	*in = mlx5_vzalloc(*inlen);
@@ -549,7 +594,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		err = -ENOMEM;
 		err = -ENOMEM;
 		goto err_umem;
 		goto err_umem;
 	}
 	}
-	mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
+	if (qp->umem)
+		mlx5_ib_populate_pas(dev, qp->umem, page_shift, (*in)->pas, 0);
 	(*in)->ctx.log_pg_sz_remote_qpn =
 	(*in)->ctx.log_pg_sz_remote_qpn =
 		cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
 		cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24);
 	(*in)->ctx.params2 = cpu_to_be32(offset << 6);
 	(*in)->ctx.params2 = cpu_to_be32(offset << 6);
@@ -580,7 +626,8 @@ err_free:
 	mlx5_vfree(*in);
 	mlx5_vfree(*in);
 
 
 err_umem:
 err_umem:
-	ib_umem_release(qp->umem);
+	if (qp->umem)
+		ib_umem_release(qp->umem);
 
 
 err_uuar:
 err_uuar:
 	free_uuar(&context->uuari, uuarn);
 	free_uuar(&context->uuari, uuarn);
@@ -593,7 +640,8 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp)
 
 
 	context = to_mucontext(pd->uobject->context);
 	context = to_mucontext(pd->uobject->context);
 	mlx5_ib_db_unmap_user(context, &qp->db);
 	mlx5_ib_db_unmap_user(context, &qp->db);
-	ib_umem_release(qp->umem);
+	if (qp->umem)
+		ib_umem_release(qp->umem);
 	free_uuar(&context->uuari, qp->uuarn);
 	free_uuar(&context->uuari, qp->uuarn);
 }
 }
 
 
@@ -1616,7 +1664,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
 
 	if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
 	if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
-	    !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask))
+	    !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+				IB_LINK_LAYER_UNSPECIFIED))
 		goto out;
 		goto out;
 
 
 	if ((attr_mask & IB_QP_PORT) &&
 	if ((attr_mask & IB_QP_PORT) &&
@@ -2212,6 +2261,10 @@ out:
 
 
 		qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
 		qp->db.db[MLX5_SND_DBR] = cpu_to_be32(qp->sq.cur_post);
 
 
+		/* Make sure doorbell record is visible to the HCA before
+		 * we hit doorbell */
+		wmb();
+
 		if (bf->need_lock)
 		if (bf->need_lock)
 			spin_lock(&bf->lock);
 			spin_lock(&bf->lock);
 
 

+ 3 - 0
drivers/infiniband/hw/mlx5/user.h

@@ -93,6 +93,9 @@ struct mlx5_ib_create_cq_resp {
 
 
 struct mlx5_ib_resize_cq {
 struct mlx5_ib_resize_cq {
 	__u64	buf_addr;
 	__u64	buf_addr;
+	__u16	cqe_size;
+	__u16	reserved0;
+	__u32	reserved1;
 };
 };
 
 
 struct mlx5_ib_create_srq {
 struct mlx5_ib_create_srq {

+ 2 - 1
drivers/infiniband/hw/mthca/mthca_qp.c

@@ -860,7 +860,8 @@ int mthca_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int attr_mask,
 
 
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
 
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask)) {
+	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
+				IB_LINK_LAYER_UNSPECIFIED)) {
 		mthca_dbg(dev, "Bad QP transition (transport %d) "
 		mthca_dbg(dev, "Bad QP transition (transport %d) "
 			  "%d->%d with attr 0x%08x\n",
 			  "%d->%d with attr 0x%08x\n",
 			  qp->transport, cur_state, new_state,
 			  qp->transport, cur_state, new_state,

+ 1 - 2
drivers/infiniband/hw/nes/nes_cm.c

@@ -1354,8 +1354,7 @@ static int nes_addr_resolve_neigh(struct nes_vnic *nesvnic, u32 dst_ip, int arpi
 				  neigh->ha, ntohl(rt->rt_gateway));
 				  neigh->ha, ntohl(rt->rt_gateway));
 
 
 			if (arpindex >= 0) {
 			if (arpindex >= 0) {
-				if (!memcmp(nesadapter->arp_table[arpindex].mac_addr,
-					    neigh->ha, ETH_ALEN)) {
+				if (ether_addr_equal(nesadapter->arp_table[arpindex].mac_addr, neigh->ha)) {
 					/* Mac address same as in nes_arp_table */
 					/* Mac address same as in nes_arp_table */
 					goto out;
 					goto out;
 				}
 				}

+ 1 - 1
drivers/infiniband/hw/ocrdma/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_OCRDMA
 config INFINIBAND_OCRDMA
 	tristate "Emulex One Connect HCA support"
 	tristate "Emulex One Connect HCA support"
-	depends on ETHERNET && NETDEVICES && PCI && (IPV6 || IPV6=n)
+	depends on ETHERNET && NETDEVICES && PCI && INET && (IPV6 || IPV6=n)
 	select NET_VENDOR_EMULEX
 	select NET_VENDOR_EMULEX
 	select BE2NET
 	select BE2NET
 	---help---
 	---help---

+ 12 - 0
drivers/infiniband/hw/ocrdma/ocrdma.h

@@ -423,5 +423,17 @@ static inline int is_cqe_wr_imm(struct ocrdma_cqe *cqe)
 		OCRDMA_CQE_WRITE_IMM) ? 1 : 0;
 		OCRDMA_CQE_WRITE_IMM) ? 1 : 0;
 }
 }
 
 
+static inline int ocrdma_resolve_dmac(struct ocrdma_dev *dev,
+		struct ib_ah_attr *ah_attr, u8 *mac_addr)
+{
+	struct in6_addr in6;
+
+	memcpy(&in6, ah_attr->grh.dgid.raw, sizeof(in6));
+	if (rdma_is_multicast_addr(&in6))
+		rdma_get_mcast_mac(&in6, mac_addr);
+	else
+		memcpy(mac_addr, ah_attr->dmac, ETH_ALEN);
+	return 0;
+}
 
 
 #endif
 #endif

+ 4 - 2
drivers/infiniband/hw/ocrdma/ocrdma_ah.c

@@ -49,7 +49,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 
 
 	ah->sgid_index = attr->grh.sgid_index;
 	ah->sgid_index = attr->grh.sgid_index;
 
 
-	vlan_tag = rdma_get_vlan_id(&attr->grh.dgid);
+	vlan_tag = attr->vlan_id;
 	if (!vlan_tag || (vlan_tag > 0xFFF))
 	if (!vlan_tag || (vlan_tag > 0xFFF))
 		vlan_tag = dev->pvid;
 		vlan_tag = dev->pvid;
 	if (vlan_tag && (vlan_tag < 0x1000)) {
 	if (vlan_tag && (vlan_tag < 0x1000)) {
@@ -64,7 +64,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 		eth_sz = sizeof(struct ocrdma_eth_basic);
 		eth_sz = sizeof(struct ocrdma_eth_basic);
 	}
 	}
 	memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
 	memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
-	status = ocrdma_resolve_dgid(dev, &attr->grh.dgid, &eth.dmac[0]);
+	memcpy(&eth.dmac[0], attr->dmac, ETH_ALEN);
+	status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
 	if (status)
 	if (status)
 		return status;
 		return status;
 	status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
 	status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
@@ -84,6 +85,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 	memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
 	memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
 	if (vlan_enabled)
 	if (vlan_enabled)
 		ah->av->valid |= OCRDMA_AV_VLAN_VALID;
 		ah->av->valid |= OCRDMA_AV_VLAN_VALID;
+	ah->av->valid = cpu_to_le32(ah->av->valid);
 	return status;
 	return status;
 }
 }
 
 

+ 2 - 19
drivers/infiniband/hw/ocrdma/ocrdma_hw.c

@@ -2076,23 +2076,6 @@ mbx_err:
 	return status;
 	return status;
 }
 }
 
 
-int ocrdma_resolve_dgid(struct ocrdma_dev *dev, union ib_gid *dgid,
-			u8 *mac_addr)
-{
-	struct in6_addr in6;
-
-	memcpy(&in6, dgid, sizeof in6);
-	if (rdma_is_multicast_addr(&in6)) {
-		rdma_get_mcast_mac(&in6, mac_addr);
-	} else if (rdma_link_local_addr(&in6)) {
-		rdma_get_ll_mac(&in6, mac_addr);
-	} else {
-		pr_err("%s() fail to resolve mac_addr.\n", __func__);
-		return -EINVAL;
-	}
-	return 0;
-}
-
 static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 				struct ocrdma_modify_qp *cmd,
 				struct ocrdma_modify_qp *cmd,
 				struct ib_qp_attr *attrs)
 				struct ib_qp_attr *attrs)
@@ -2126,14 +2109,14 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 
 
 	qp->sgid_idx = ah_attr->grh.sgid_index;
 	qp->sgid_idx = ah_attr->grh.sgid_index;
 	memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
 	memcpy(&cmd->params.sgid[0], &sgid.raw[0], sizeof(cmd->params.sgid));
-	ocrdma_resolve_dgid(qp->dev, &ah_attr->grh.dgid, &mac_addr[0]);
+	ocrdma_resolve_dmac(qp->dev, ah_attr, &mac_addr[0]);
 	cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
 	cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
 				(mac_addr[2] << 16) | (mac_addr[3] << 24);
 				(mac_addr[2] << 16) | (mac_addr[3] << 24);
 	/* convert them to LE format. */
 	/* convert them to LE format. */
 	ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
 	ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
 	ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
 	ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
 	cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
 	cmd->params.vlan_dmac_b4_to_b5 = mac_addr[4] | (mac_addr[5] << 8);
-	vlan_id = rdma_get_vlan_id(&sgid);
+	vlan_id = ah_attr->vlan_id;
 	if (vlan_id && (vlan_id < 0x1000)) {
 	if (vlan_id && (vlan_id < 0x1000)) {
 		cmd->params.vlan_dmac_b4_to_b5 |=
 		cmd->params.vlan_dmac_b4_to_b5 |=
 		    vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;
 		    vlan_id << OCRDMA_QP_PARAMS_VLAN_SHIFT;

+ 0 - 1
drivers/infiniband/hw/ocrdma/ocrdma_hw.h

@@ -94,7 +94,6 @@ void ocrdma_ring_cq_db(struct ocrdma_dev *, u16 cq_id, bool armed,
 int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
 int ocrdma_mbx_get_link_speed(struct ocrdma_dev *dev, u8 *lnk_speed);
 int ocrdma_query_config(struct ocrdma_dev *,
 int ocrdma_query_config(struct ocrdma_dev *,
 			struct ocrdma_mbx_query_config *config);
 			struct ocrdma_mbx_query_config *config);
-int ocrdma_resolve_dgid(struct ocrdma_dev *, union ib_gid *dgid, u8 *mac_addr);
 
 
 int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
 int ocrdma_mbx_alloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
 int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);
 int ocrdma_mbx_dealloc_pd(struct ocrdma_dev *, struct ocrdma_pd *);

+ 40 - 98
drivers/infiniband/hw/ocrdma/ocrdma_main.c

@@ -67,46 +67,24 @@ void ocrdma_get_guid(struct ocrdma_dev *dev, u8 *guid)
 	guid[7] = mac_addr[5];
 	guid[7] = mac_addr[5];
 }
 }
 
 
-static void ocrdma_build_sgid_mac(union ib_gid *sgid, unsigned char *mac_addr,
-				  bool is_vlan, u16 vlan_id)
-{
-	sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
-	sgid->raw[8] = mac_addr[0] ^ 2;
-	sgid->raw[9] = mac_addr[1];
-	sgid->raw[10] = mac_addr[2];
-	if (is_vlan) {
-		sgid->raw[11] = vlan_id >> 8;
-		sgid->raw[12] = vlan_id & 0xff;
-	} else {
-		sgid->raw[11] = 0xff;
-		sgid->raw[12] = 0xfe;
-	}
-	sgid->raw[13] = mac_addr[3];
-	sgid->raw[14] = mac_addr[4];
-	sgid->raw[15] = mac_addr[5];
-}
-
-static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
-			    bool is_vlan, u16 vlan_id)
+static bool ocrdma_add_sgid(struct ocrdma_dev *dev, union ib_gid *new_sgid)
 {
 {
 	int i;
 	int i;
-	union ib_gid new_sgid;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
 	memset(&ocrdma_zero_sgid, 0, sizeof(union ib_gid));
 
 
-	ocrdma_build_sgid_mac(&new_sgid, mac_addr, is_vlan, vlan_id);
 
 
 	spin_lock_irqsave(&dev->sgid_lock, flags);
 	spin_lock_irqsave(&dev->sgid_lock, flags);
 	for (i = 0; i < OCRDMA_MAX_SGID; i++) {
 	for (i = 0; i < OCRDMA_MAX_SGID; i++) {
 		if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
 		if (!memcmp(&dev->sgid_tbl[i], &ocrdma_zero_sgid,
 			    sizeof(union ib_gid))) {
 			    sizeof(union ib_gid))) {
 			/* found free entry */
 			/* found free entry */
-			memcpy(&dev->sgid_tbl[i], &new_sgid,
+			memcpy(&dev->sgid_tbl[i], new_sgid,
 			       sizeof(union ib_gid));
 			       sizeof(union ib_gid));
 			spin_unlock_irqrestore(&dev->sgid_lock, flags);
 			spin_unlock_irqrestore(&dev->sgid_lock, flags);
 			return true;
 			return true;
-		} else if (!memcmp(&dev->sgid_tbl[i], &new_sgid,
+		} else if (!memcmp(&dev->sgid_tbl[i], new_sgid,
 				   sizeof(union ib_gid))) {
 				   sizeof(union ib_gid))) {
 			/* entry already present, no addition is required. */
 			/* entry already present, no addition is required. */
 			spin_unlock_irqrestore(&dev->sgid_lock, flags);
 			spin_unlock_irqrestore(&dev->sgid_lock, flags);
@@ -117,20 +95,17 @@ static bool ocrdma_add_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
 	return false;
 	return false;
 }
 }
 
 
-static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
-			    bool is_vlan, u16 vlan_id)
+static bool ocrdma_del_sgid(struct ocrdma_dev *dev, union ib_gid *sgid)
 {
 {
 	int found = false;
 	int found = false;
 	int i;
 	int i;
-	union ib_gid sgid;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	ocrdma_build_sgid_mac(&sgid, mac_addr, is_vlan, vlan_id);
 
 
 	spin_lock_irqsave(&dev->sgid_lock, flags);
 	spin_lock_irqsave(&dev->sgid_lock, flags);
 	/* first is default sgid, which cannot be deleted. */
 	/* first is default sgid, which cannot be deleted. */
 	for (i = 1; i < OCRDMA_MAX_SGID; i++) {
 	for (i = 1; i < OCRDMA_MAX_SGID; i++) {
-		if (!memcmp(&dev->sgid_tbl[i], &sgid, sizeof(union ib_gid))) {
+		if (!memcmp(&dev->sgid_tbl[i], sgid, sizeof(union ib_gid))) {
 			/* found matching entry */
 			/* found matching entry */
 			memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
 			memset(&dev->sgid_tbl[i], 0, sizeof(union ib_gid));
 			found = true;
 			found = true;
@@ -141,75 +116,18 @@ static bool ocrdma_del_sgid(struct ocrdma_dev *dev, unsigned char *mac_addr,
 	return found;
 	return found;
 }
 }
 
 
-static void ocrdma_add_default_sgid(struct ocrdma_dev *dev)
-{
-	/* GID Index 0 - Invariant manufacturer-assigned EUI-64 */
-	union ib_gid *sgid = &dev->sgid_tbl[0];
-
-	sgid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
-	ocrdma_get_guid(dev, &sgid->raw[8]);
-}
-
-#if IS_ENABLED(CONFIG_VLAN_8021Q)
-static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
-{
-	struct net_device *netdev, *tmp;
-	u16 vlan_id;
-	bool is_vlan;
-
-	netdev = dev->nic_info.netdev;
-
-	rcu_read_lock();
-	for_each_netdev_rcu(&init_net, tmp) {
-		if (netdev == tmp || vlan_dev_real_dev(tmp) == netdev) {
-			if (!netif_running(tmp) || !netif_oper_up(tmp))
-				continue;
-			if (netdev != tmp) {
-				vlan_id = vlan_dev_vlan_id(tmp);
-				is_vlan = true;
-			} else {
-				is_vlan = false;
-				vlan_id = 0;
-				tmp = netdev;
-			}
-			ocrdma_add_sgid(dev, tmp->dev_addr, is_vlan, vlan_id);
-		}
-	}
-	rcu_read_unlock();
-}
-#else
-static void ocrdma_add_vlan_sgids(struct ocrdma_dev *dev)
-{
-
-}
-#endif /* VLAN */
-
-static int ocrdma_build_sgid_tbl(struct ocrdma_dev *dev)
+static int ocrdma_addr_event(unsigned long event, struct net_device *netdev,
+			     union ib_gid *gid)
 {
 {
-	ocrdma_add_default_sgid(dev);
-	ocrdma_add_vlan_sgids(dev);
-	return 0;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-
-static int ocrdma_inet6addr_event(struct notifier_block *notifier,
-				  unsigned long event, void *ptr)
-{
-	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
-	struct net_device *netdev = ifa->idev->dev;
 	struct ib_event gid_event;
 	struct ib_event gid_event;
 	struct ocrdma_dev *dev;
 	struct ocrdma_dev *dev;
 	bool found = false;
 	bool found = false;
 	bool updated = false;
 	bool updated = false;
 	bool is_vlan = false;
 	bool is_vlan = false;
-	u16 vid = 0;
 
 
 	is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN;
 	is_vlan = netdev->priv_flags & IFF_802_1Q_VLAN;
-	if (is_vlan) {
-		vid = vlan_dev_vlan_id(netdev);
+	if (is_vlan)
 		netdev = vlan_dev_real_dev(netdev);
 		netdev = vlan_dev_real_dev(netdev);
-	}
 
 
 	rcu_read_lock();
 	rcu_read_lock();
 	list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
 	list_for_each_entry_rcu(dev, &ocrdma_dev_list, entry) {
@@ -222,16 +140,14 @@ static int ocrdma_inet6addr_event(struct notifier_block *notifier,
 
 
 	if (!found)
 	if (!found)
 		return NOTIFY_DONE;
 		return NOTIFY_DONE;
-	if (!rdma_link_local_addr((struct in6_addr *)&ifa->addr))
-		return NOTIFY_DONE;
 
 
 	mutex_lock(&dev->dev_lock);
 	mutex_lock(&dev->dev_lock);
 	switch (event) {
 	switch (event) {
 	case NETDEV_UP:
 	case NETDEV_UP:
-		updated = ocrdma_add_sgid(dev, netdev->dev_addr, is_vlan, vid);
+		updated = ocrdma_add_sgid(dev, gid);
 		break;
 		break;
 	case NETDEV_DOWN:
 	case NETDEV_DOWN:
-		updated = ocrdma_del_sgid(dev, netdev->dev_addr, is_vlan, vid);
+		updated = ocrdma_del_sgid(dev, gid);
 		break;
 		break;
 	default:
 	default:
 		break;
 		break;
@@ -247,6 +163,32 @@ static int ocrdma_inet6addr_event(struct notifier_block *notifier,
 	return NOTIFY_OK;
 	return NOTIFY_OK;
 }
 }
 
 
+static int ocrdma_inetaddr_event(struct notifier_block *notifier,
+				  unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = ptr;
+	union ib_gid gid;
+	struct net_device *netdev = ifa->ifa_dev->dev;
+
+	ipv6_addr_set_v4mapped(ifa->ifa_address, (struct in6_addr *)&gid);
+	return ocrdma_addr_event(event, netdev, &gid);
+}
+
+static struct notifier_block ocrdma_inetaddr_notifier = {
+	.notifier_call = ocrdma_inetaddr_event
+};
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+static int ocrdma_inet6addr_event(struct notifier_block *notifier,
+				  unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr;
+	union  ib_gid *gid = (union ib_gid *)&ifa->addr;
+	struct net_device *netdev = ifa->idev->dev;
+	return ocrdma_addr_event(event, netdev, gid);
+}
+
 static struct notifier_block ocrdma_inet6addr_notifier = {
 static struct notifier_block ocrdma_inet6addr_notifier = {
 	.notifier_call = ocrdma_inet6addr_event
 	.notifier_call = ocrdma_inet6addr_event
 };
 };
@@ -423,10 +365,6 @@ static struct ocrdma_dev *ocrdma_add(struct be_dev_info *dev_info)
 	if (status)
 	if (status)
 		goto alloc_err;
 		goto alloc_err;
 
 
-	status = ocrdma_build_sgid_tbl(dev);
-	if (status)
-		goto alloc_err;
-
 	status = ocrdma_register_device(dev);
 	status = ocrdma_register_device(dev);
 	if (status)
 	if (status)
 		goto alloc_err;
 		goto alloc_err;
@@ -553,6 +491,10 @@ static int __init ocrdma_init_module(void)
 {
 {
 	int status;
 	int status;
 
 
+	status = register_inetaddr_notifier(&ocrdma_inetaddr_notifier);
+	if (status)
+		return status;
+
 #if IS_ENABLED(CONFIG_IPV6)
 #if IS_ENABLED(CONFIG_IPV6)
 	status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
 	status = register_inet6addr_notifier(&ocrdma_inet6addr_notifier);
 	if (status)
 	if (status)

+ 2 - 2
drivers/infiniband/hw/ocrdma/ocrdma_sli.h

@@ -31,7 +31,7 @@
 #define Bit(_b) (1 << (_b))
 #define Bit(_b) (1 << (_b))
 
 
 #define OCRDMA_GEN1_FAMILY	0xB
 #define OCRDMA_GEN1_FAMILY	0xB
-#define OCRDMA_GEN2_FAMILY	0x2
+#define OCRDMA_GEN2_FAMILY	0x0F
 
 
 #define OCRDMA_SUBSYS_ROCE 10
 #define OCRDMA_SUBSYS_ROCE 10
 enum {
 enum {
@@ -1694,7 +1694,7 @@ struct ocrdma_grh {
 	u16	rsvd;
 	u16	rsvd;
 } __packed;
 } __packed;
 
 
-#define OCRDMA_AV_VALID		Bit(0)
+#define OCRDMA_AV_VALID		Bit(7)
 #define OCRDMA_AV_VLAN_VALID	Bit(1)
 #define OCRDMA_AV_VLAN_VALID	Bit(1)
 
 
 struct ocrdma_av {
 struct ocrdma_av {

+ 2 - 1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

@@ -1326,7 +1326,8 @@ int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		new_qps = old_qps;
 		new_qps = old_qps;
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 
 
-	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
+	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask,
+				IB_LINK_LAYER_ETHERNET)) {
 		pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
 		pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
 		       "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
 		       "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
 		       __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
 		       __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,

+ 1 - 1
drivers/infiniband/hw/qib/qib_qp.c

@@ -585,7 +585,7 @@ int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
 
 
 	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
 	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask))
+				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
 		goto inval;
 		goto inval;
 
 
 	if (attr_mask & IB_QP_AV) {
 	if (attr_mask & IB_QP_AV) {

+ 8 - 1
drivers/infiniband/hw/qib/qib_ud.c

@@ -57,13 +57,20 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 	struct qib_sge *sge;
 	struct qib_sge *sge;
 	struct ib_wc wc;
 	struct ib_wc wc;
 	u32 length;
 	u32 length;
+	enum ib_qp_type sqptype, dqptype;
 
 
 	qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
 	qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
 	if (!qp) {
 	if (!qp) {
 		ibp->n_pkt_drops++;
 		ibp->n_pkt_drops++;
 		return;
 		return;
 	}
 	}
-	if (qp->ibqp.qp_type != sqp->ibqp.qp_type ||
+
+	sqptype = sqp->ibqp.qp_type == IB_QPT_GSI ?
+			IB_QPT_UD : sqp->ibqp.qp_type;
+	dqptype = qp->ibqp.qp_type == IB_QPT_GSI ?
+			IB_QPT_UD : qp->ibqp.qp_type;
+
+	if (dqptype != sqptype ||
 	    !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
 	    !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
 		ibp->n_pkt_drops++;
 		ibp->n_pkt_drops++;
 		goto drop;
 		goto drop;

+ 10 - 0
drivers/infiniband/hw/usnic/Kconfig

@@ -0,0 +1,10 @@
+config INFINIBAND_USNIC
+	tristate "Verbs support for Cisco VIC"
+	depends on NETDEVICES && ETHERNET && INET && PCI && INTEL_IOMMU
+	select ENIC
+	select NET_VENDOR_CISCO
+	select PCI_IOV
+	select INFINIBAND_USER_ACCESS
+	---help---
+	  This is a low-level driver for Cisco's Virtual Interface
+	  Cards (VICs), including the VIC 1240 and 1280 cards.

+ 15 - 0
drivers/infiniband/hw/usnic/Makefile

@@ -0,0 +1,15 @@
+ccflags-y := -Idrivers/net/ethernet/cisco/enic
+
+obj-$(CONFIG_INFINIBAND_USNIC)+= usnic_verbs.o
+
+usnic_verbs-y=\
+usnic_fwd.o \
+usnic_transport.o \
+usnic_uiom.o \
+usnic_uiom_interval_tree.o \
+usnic_vnic.o \
+usnic_ib_main.o \
+usnic_ib_qp_grp.o \
+usnic_ib_sysfs.o \
+usnic_ib_verbs.o \
+usnic_debugfs.o \

+ 29 - 0
drivers/infiniband/hw/usnic/usnic.h

@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_H_
+#define USNIC_H_
+
+#define DRV_NAME	"usnic_verbs"
+
+#define PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC	0x00cf	/* User space NIC */
+
+#define DRV_VERSION    "1.0.3"
+#define DRV_RELDATE    "December 19, 2013"
+
+#endif /* USNIC_H_ */

+ 73 - 0
drivers/infiniband/hw/usnic/usnic_abi.h

@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+
+#ifndef USNIC_ABI_H
+#define USNIC_ABI_H
+
+/* ABI between userspace and kernel */
+#define USNIC_UVERBS_ABI_VERSION	4
+
+#define USNIC_QP_GRP_MAX_WQS		8
+#define USNIC_QP_GRP_MAX_RQS		8
+#define USNIC_QP_GRP_MAX_CQS		16
+
+enum usnic_transport_type {
+	USNIC_TRANSPORT_UNKNOWN		= 0,
+	USNIC_TRANSPORT_ROCE_CUSTOM	= 1,
+	USNIC_TRANSPORT_IPV4_UDP	= 2,
+	USNIC_TRANSPORT_MAX		= 3,
+};
+
+struct usnic_transport_spec {
+	enum usnic_transport_type	trans_type;
+	union {
+		struct {
+			uint16_t	port_num;
+		} usnic_roce;
+		struct {
+			uint32_t	sock_fd;
+		} udp;
+	};
+};
+
+struct usnic_ib_create_qp_cmd {
+	struct usnic_transport_spec	spec;
+};
+
+/*TODO: Future - usnic_modify_qp needs to pass in generic filters */
+struct usnic_ib_create_qp_resp {
+	u32				vfid;
+	u32				qp_grp_id;
+	u64				bar_bus_addr;
+	u32				bar_len;
+/*
+ * WQ, RQ, CQ are explicity specified bc exposing a generic resources inteface
+ * expands the scope of ABI to many files.
+ */
+	u32				wq_cnt;
+	u32				rq_cnt;
+	u32				cq_cnt;
+	u32				wq_idx[USNIC_QP_GRP_MAX_WQS];
+	u32				rq_idx[USNIC_QP_GRP_MAX_RQS];
+	u32				cq_idx[USNIC_QP_GRP_MAX_CQS];
+	u32				transport;
+	u32				reserved[9];
+};
+
+#endif /* USNIC_ABI_H */

+ 27 - 0
drivers/infiniband/hw/usnic/usnic_common_pkt_hdr.h

@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_PKT_HDR_H
+#define USNIC_CMN_PKT_HDR_H
+
+#define USNIC_ROCE_ETHERTYPE		(0x8915)
+#define USNIC_ROCE_GRH_VER              (8)
+#define USNIC_PROTO_VER                 (1)
+#define USNIC_ROCE_GRH_VER_SHIFT        (4)
+
+#endif /* USNIC_COMMON_PKT_HDR_H */

+ 68 - 0
drivers/infiniband/hw/usnic/usnic_common_util.h

@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_CMN_UTIL_H
+#define USNIC_CMN_UTIL_H
+
+static inline void
+usnic_mac_to_gid(const char *const mac, char *raw_gid)
+{
+	raw_gid[0] = 0xfe;
+	raw_gid[1] = 0x80;
+	memset(&raw_gid[2], 0, 6);
+	raw_gid[8] = mac[0]^2;
+	raw_gid[9] = mac[1];
+	raw_gid[10] = mac[2];
+	raw_gid[11] = 0xff;
+	raw_gid[12] = 0xfe;
+	raw_gid[13] = mac[3];
+	raw_gid[14] = mac[4];
+	raw_gid[15] = mac[5];
+}
+
+static inline void
+usnic_mac_ip_to_gid(const char *const mac, const __be32 inaddr, char *raw_gid)
+{
+	raw_gid[0] = 0xfe;
+	raw_gid[1] = 0x80;
+	memset(&raw_gid[2], 0, 2);
+	memcpy(&raw_gid[4], &inaddr, 4);
+	raw_gid[8] = mac[0]^2;
+	raw_gid[9] = mac[1];
+	raw_gid[10] = mac[2];
+	raw_gid[11] = 0xff;
+	raw_gid[12] = 0xfe;
+	raw_gid[13] = mac[3];
+	raw_gid[14] = mac[4];
+	raw_gid[15] = mac[5];
+}
+
+static inline void
+usnic_write_gid_if_id_from_mac(char *mac, char *raw_gid)
+{
+	raw_gid[8] = mac[0]^2;
+	raw_gid[9] = mac[1];
+	raw_gid[10] = mac[2];
+	raw_gid[11] = 0xff;
+	raw_gid[12] = 0xfe;
+	raw_gid[13] = mac[3];
+	raw_gid[14] = mac[4];
+	raw_gid[15] = mac[5];
+}
+
+#endif /* USNIC_COMMON_UTIL_H */

+ 154 - 0
drivers/infiniband/hw/usnic/usnic_debugfs.c

@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "usnic.h"
+#include "usnic_log.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_transport.h"
+
+static struct dentry *debugfs_root;
+static struct dentry *flows_dentry;
+
+static ssize_t usnic_debugfs_buildinfo_read(struct file *f, char __user *data,
+						size_t count, loff_t *ppos)
+{
+	char buf[500];
+	int res;
+
+	if (*ppos > 0)
+		return 0;
+
+	res = scnprintf(buf, sizeof(buf),
+			"version:       %s\n"
+			"build date:    %s\n",
+			DRV_VERSION, DRV_RELDATE);
+
+	return simple_read_from_buffer(data, count, ppos, buf, res);
+}
+
+static const struct file_operations usnic_debugfs_buildinfo_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = usnic_debugfs_buildinfo_read
+};
+
+static ssize_t flowinfo_read(struct file *f, char __user *data,
+				size_t count, loff_t *ppos)
+{
+	struct usnic_ib_qp_grp_flow *qp_flow;
+	int n;
+	int left;
+	char *ptr;
+	char buf[512];
+
+	qp_flow = f->private_data;
+	ptr = buf;
+	left = count;
+
+	if (*ppos > 0)
+		return 0;
+
+	spin_lock(&qp_flow->qp_grp->lock);
+	n = scnprintf(ptr, left,
+			"QP Grp ID: %d Transport: %s ",
+			qp_flow->qp_grp->grp_id,
+			usnic_transport_to_str(qp_flow->trans_type));
+	UPDATE_PTR_LEFT(n, ptr, left);
+	if (qp_flow->trans_type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+		n = scnprintf(ptr, left, "Port_Num:%hu\n",
+					qp_flow->usnic_roce.port_num);
+		UPDATE_PTR_LEFT(n, ptr, left);
+	} else if (qp_flow->trans_type == USNIC_TRANSPORT_IPV4_UDP) {
+		n = usnic_transport_sock_to_str(ptr, left,
+				qp_flow->udp.sock);
+		UPDATE_PTR_LEFT(n, ptr, left);
+		n = scnprintf(ptr, left, "\n");
+		UPDATE_PTR_LEFT(n, ptr, left);
+	}
+	spin_unlock(&qp_flow->qp_grp->lock);
+
+	return simple_read_from_buffer(data, count, ppos, buf, ptr - buf);
+}
+
+static const struct file_operations flowinfo_ops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = flowinfo_read,
+};
+
+void usnic_debugfs_init(void)
+{
+	debugfs_root = debugfs_create_dir(DRV_NAME, NULL);
+	if (IS_ERR(debugfs_root)) {
+		usnic_err("Failed to create debugfs root dir, check if debugfs is enabled in kernel configuration\n");
+		goto out_clear_root;
+	}
+
+	flows_dentry = debugfs_create_dir("flows", debugfs_root);
+	if (IS_ERR_OR_NULL(flows_dentry)) {
+		usnic_err("Failed to create debugfs flow dir with err %ld\n",
+				PTR_ERR(flows_dentry));
+		goto out_free_root;
+	}
+
+	debugfs_create_file("build-info", S_IRUGO, debugfs_root,
+				NULL, &usnic_debugfs_buildinfo_ops);
+	return;
+
+out_free_root:
+	debugfs_remove_recursive(debugfs_root);
+out_clear_root:
+	debugfs_root = NULL;
+}
+
+void usnic_debugfs_exit(void)
+{
+	if (!debugfs_root)
+		return;
+
+	debugfs_remove_recursive(debugfs_root);
+	debugfs_root = NULL;
+}
+
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+	if (IS_ERR_OR_NULL(flows_dentry))
+		return;
+
+	scnprintf(qp_flow->dentry_name, sizeof(qp_flow->dentry_name),
+			"%u", qp_flow->flow->flow_id);
+	qp_flow->dbgfs_dentry = debugfs_create_file(qp_flow->dentry_name,
+							S_IRUGO,
+							flows_dentry,
+							qp_flow,
+							&flowinfo_ops);
+	if (IS_ERR_OR_NULL(qp_flow->dbgfs_dentry)) {
+		usnic_err("Failed to create dbg fs entry for flow %u\n",
+				qp_flow->flow->flow_id);
+	}
+}
+
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+	if (!IS_ERR_OR_NULL(qp_flow->dbgfs_dentry))
+		debugfs_remove(qp_flow->dbgfs_dentry);
+}

+ 29 - 0
drivers/infiniband/hw/usnic/usnic_debugfs.h

@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#ifndef USNIC_DEBUGFS_H_
+#define USNIC_DEBUGFS_H_
+
+#include "usnic_ib_qp_grp.h"
+
+void usnic_debugfs_init(void);
+
+void usnic_debugfs_exit(void);
+void usnic_debugfs_flow_add(struct usnic_ib_qp_grp_flow *qp_flow);
+void usnic_debugfs_flow_remove(struct usnic_ib_qp_grp_flow *qp_flow);
+
+#endif /*!USNIC_DEBUGFS_H_ */

+ 350 - 0
drivers/infiniband/hw/usnic/usnic_fwd.c

@@ -0,0 +1,350 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+
+#include "enic_api.h"
+#include "usnic_common_pkt_hdr.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+
+static int usnic_fwd_devcmd_locked(struct usnic_fwd_dev *ufdev, int vnic_idx,
+					enum vnic_devcmd_cmd cmd, u64 *a0,
+					u64 *a1)
+{
+	int status;
+	struct net_device *netdev = ufdev->netdev;
+
+	lockdep_assert_held(&ufdev->lock);
+
+	status = enic_api_devcmd_proxy_by_index(netdev,
+			vnic_idx,
+			cmd,
+			a0, a1,
+			1000);
+	if (status) {
+		if (status == ERR_EINVAL && cmd == CMD_DEL_FILTER) {
+			usnic_dbg("Dev %s vnic idx %u cmd %u already deleted",
+					ufdev->name, vnic_idx, cmd);
+		} else {
+			usnic_err("Dev %s vnic idx %u cmd %u failed with status %d\n",
+					ufdev->name, vnic_idx, cmd,
+					status);
+		}
+	} else {
+		usnic_dbg("Dev %s vnic idx %u cmd %u success",
+				ufdev->name, vnic_idx, cmd);
+	}
+
+	return status;
+}
+
+static int usnic_fwd_devcmd(struct usnic_fwd_dev *ufdev, int vnic_idx,
+				enum vnic_devcmd_cmd cmd, u64 *a0, u64 *a1)
+{
+	int status;
+
+	spin_lock(&ufdev->lock);
+	status = usnic_fwd_devcmd_locked(ufdev, vnic_idx, cmd, a0, a1);
+	spin_unlock(&ufdev->lock);
+
+	return status;
+}
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev)
+{
+	struct usnic_fwd_dev *ufdev;
+
+	ufdev = kzalloc(sizeof(*ufdev), GFP_KERNEL);
+	if (!ufdev)
+		return NULL;
+
+	ufdev->pdev = pdev;
+	ufdev->netdev = pci_get_drvdata(pdev);
+	spin_lock_init(&ufdev->lock);
+	strncpy(ufdev->name, netdev_name(ufdev->netdev),
+			sizeof(ufdev->name) - 1);
+
+	return ufdev;
+}
+
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev)
+{
+	kfree(ufdev);
+}
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN])
+{
+	spin_lock(&ufdev->lock);
+	memcpy(&ufdev->mac, mac, sizeof(ufdev->mac));
+	spin_unlock(&ufdev->lock);
+}
+
+int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr)
+{
+	int status;
+
+	spin_lock(&ufdev->lock);
+	if (ufdev->inaddr == 0) {
+		ufdev->inaddr = inaddr;
+		status = 0;
+	} else {
+		status = -EFAULT;
+	}
+	spin_unlock(&ufdev->lock);
+
+	return status;
+}
+
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev)
+{
+	spin_lock(&ufdev->lock);
+	ufdev->inaddr = 0;
+	spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev)
+{
+	spin_lock(&ufdev->lock);
+	ufdev->link_up = 1;
+	spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev)
+{
+	spin_lock(&ufdev->lock);
+	ufdev->link_up = 0;
+	spin_unlock(&ufdev->lock);
+}
+
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu)
+{
+	spin_lock(&ufdev->lock);
+	ufdev->mtu = mtu;
+	spin_unlock(&ufdev->lock);
+}
+
+static int usnic_fwd_dev_ready_locked(struct usnic_fwd_dev *ufdev)
+{
+	lockdep_assert_held(&ufdev->lock);
+
+	if (!ufdev->link_up)
+		return -EPERM;
+
+	return 0;
+}
+
+static int validate_filter_locked(struct usnic_fwd_dev *ufdev,
+					struct filter *filter)
+{
+
+	lockdep_assert_held(&ufdev->lock);
+
+	if (filter->type == FILTER_IPV4_5TUPLE) {
+		if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_AD))
+			return -EACCES;
+		if (!(filter->u.ipv4.flags & FILTER_FIELD_5TUP_DST_PT))
+			return -EBUSY;
+		else if (ufdev->inaddr == 0)
+			return -EINVAL;
+		else if (filter->u.ipv4.dst_port == 0)
+			return -ERANGE;
+		else if (ntohl(ufdev->inaddr) != filter->u.ipv4.dst_addr)
+			return -EFAULT;
+		else
+			return 0;
+	}
+
+	return 0;
+}
+
+static void fill_tlv(struct filter_tlv *tlv, struct filter *filter,
+		struct filter_action *action)
+{
+	tlv->type = CLSF_TLV_FILTER;
+	tlv->length = sizeof(struct filter);
+	*((struct filter *)&tlv->val) = *filter;
+
+	tlv = (struct filter_tlv *)((char *)tlv + sizeof(struct filter_tlv) +
+			sizeof(struct filter));
+	tlv->type = CLSF_TLV_ACTION;
+	tlv->length = sizeof(struct filter_action);
+	*((struct filter_action *)&tlv->val) = *action;
+}
+
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+				struct usnic_filter_action *uaction)
+{
+	struct filter_tlv *tlv;
+	struct pci_dev *pdev;
+	struct usnic_fwd_flow *flow;
+	uint64_t a0, a1;
+	uint64_t tlv_size;
+	dma_addr_t tlv_pa;
+	int status;
+
+	pdev = ufdev->pdev;
+	tlv_size = (2*sizeof(struct filter_tlv) + sizeof(struct filter) +
+			sizeof(struct filter_action));
+
+	flow = kzalloc(sizeof(*flow), GFP_ATOMIC);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	tlv = pci_alloc_consistent(pdev, tlv_size, &tlv_pa);
+	if (!tlv) {
+		usnic_err("Failed to allocate memory\n");
+		status = -ENOMEM;
+		goto out_free_flow;
+	}
+
+	fill_tlv(tlv, filter, &uaction->action);
+
+	spin_lock(&ufdev->lock);
+	status = usnic_fwd_dev_ready_locked(ufdev);
+	if (status) {
+		usnic_err("Forwarding dev %s not ready with status %d\n",
+				ufdev->name, status);
+		goto out_free_tlv;
+	}
+
+	status = validate_filter_locked(ufdev, filter);
+	if (status) {
+		usnic_err("Failed to validate filter with status %d\n",
+				status);
+		goto out_free_tlv;
+	}
+
+	/* Issue Devcmd */
+	a0 = tlv_pa;
+	a1 = tlv_size;
+	status = usnic_fwd_devcmd_locked(ufdev, uaction->vnic_idx,
+						CMD_ADD_FILTER, &a0, &a1);
+	if (status) {
+		usnic_err("VF %s Filter add failed with status:%d",
+				ufdev->name, status);
+		status = -EFAULT;
+		goto out_free_tlv;
+	} else {
+		usnic_dbg("VF %s FILTER ID:%llu", ufdev->name, a0);
+	}
+
+	flow->flow_id = (uint32_t) a0;
+	flow->vnic_idx = uaction->vnic_idx;
+	flow->ufdev = ufdev;
+
+out_free_tlv:
+	spin_unlock(&ufdev->lock);
+	pci_free_consistent(pdev, tlv_size, tlv, tlv_pa);
+	if (!status)
+		return flow;
+out_free_flow:
+	kfree(flow);
+	return ERR_PTR(status);
+}
+
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow)
+{
+	int status;
+	u64 a0, a1;
+
+	a0 = flow->flow_id;
+
+	status = usnic_fwd_devcmd(flow->ufdev, flow->vnic_idx,
+					CMD_DEL_FILTER, &a0, &a1);
+	if (status) {
+		if (status == ERR_EINVAL) {
+			usnic_dbg("Filter %u already deleted for VF Idx %u pf: %s status: %d",
+					flow->flow_id, flow->vnic_idx,
+					flow->ufdev->name, status);
+		} else {
+			usnic_err("PF %s VF Idx %u Filter: %u FILTER DELETE failed with status %d",
+					flow->ufdev->name, flow->vnic_idx,
+					flow->flow_id, status);
+		}
+		status = 0;
+		/*
+		 * Log the error and fake success to the caller because if
+		 * a flow fails to be deleted in the firmware, it is an
+		 * unrecoverable error.
+		 */
+	} else {
+		usnic_dbg("PF %s VF Idx %u Filter: %u FILTER DELETED",
+				flow->ufdev->name, flow->vnic_idx,
+				flow->flow_id);
+	}
+
+	kfree(flow);
+	return status;
+}
+
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+	int status;
+	struct net_device *pf_netdev;
+	u64 a0, a1;
+
+	pf_netdev = ufdev->netdev;
+	a0 = qp_idx;
+	a1 = CMD_QP_RQWQ;
+
+	status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_ENABLE,
+						&a0, &a1);
+	if (status) {
+		usnic_err("PF %s VNIC Index %u RQ Index: %u ENABLE Failed with status %d",
+				netdev_name(pf_netdev),
+				vnic_idx,
+				qp_idx,
+				status);
+	} else {
+		usnic_dbg("PF %s VNIC Index %u RQ Index: %u ENABLED",
+				netdev_name(pf_netdev),
+				vnic_idx, qp_idx);
+	}
+
+	return status;
+}
+
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx)
+{
+	int status;
+	u64 a0, a1;
+	struct net_device *pf_netdev;
+
+	pf_netdev = ufdev->netdev;
+	a0 = qp_idx;
+	a1 = CMD_QP_RQWQ;
+
+	status = usnic_fwd_devcmd(ufdev, vnic_idx, CMD_QP_DISABLE,
+			&a0, &a1);
+	if (status) {
+		usnic_err("PF %s VNIC Index %u RQ Index: %u DISABLE Failed with status %d",
+				netdev_name(pf_netdev),
+				vnic_idx,
+				qp_idx,
+				status);
+	} else {
+		usnic_dbg("PF %s VNIC Index %u RQ Index: %u DISABLED",
+				netdev_name(pf_netdev),
+				vnic_idx,
+				qp_idx);
+	}
+
+	return status;
+}

+ 113 - 0
drivers/infiniband/hw/usnic/usnic_fwd.h

@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_FWD_H_
+#define USNIC_FWD_H_
+
+#include <linux/if.h>
+#include <linux/netdevice.h>
+#include <linux/pci.h>
+#include <linux/in.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_pkt_hdr.h"
+#include "vnic_devcmd.h"
+
+struct usnic_fwd_dev {
+	struct pci_dev			*pdev;
+	struct net_device		*netdev;
+	spinlock_t			lock;
+	/*
+	 * The following fields can be read directly off the device.
+	 * However, they should be set by a accessor function, except name,
+	 * which cannot be changed.
+	 */
+	bool				link_up;
+	char				mac[ETH_ALEN];
+	unsigned int			mtu;
+	__be32				inaddr;
+	char				name[IFNAMSIZ+1];
+};
+
+struct usnic_fwd_flow {
+	uint32_t			flow_id;
+	struct usnic_fwd_dev		*ufdev;
+	unsigned int			vnic_idx;
+};
+
+struct usnic_filter_action {
+	int				vnic_idx;
+	struct filter_action		action;
+};
+
+struct usnic_fwd_dev *usnic_fwd_dev_alloc(struct pci_dev *pdev);
+void usnic_fwd_dev_free(struct usnic_fwd_dev *ufdev);
+
+void usnic_fwd_set_mac(struct usnic_fwd_dev *ufdev, char mac[ETH_ALEN]);
+int usnic_fwd_add_ipaddr(struct usnic_fwd_dev *ufdev, __be32 inaddr);
+void usnic_fwd_del_ipaddr(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_up(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_carrier_down(struct usnic_fwd_dev *ufdev);
+void usnic_fwd_set_mtu(struct usnic_fwd_dev *ufdev, unsigned int mtu);
+
+/*
+ * Allocate a flow on this forwarding device. Whoever calls this function,
+ * must monitor netdev events on ufdev's netdevice. If NETDEV_REBOOT or
+ * NETDEV_DOWN is seen, flow will no longer function and must be
+ * immediately freed by calling usnic_dealloc_flow.
+ */
+struct usnic_fwd_flow*
+usnic_fwd_alloc_flow(struct usnic_fwd_dev *ufdev, struct filter *filter,
+				struct usnic_filter_action *action);
+int usnic_fwd_dealloc_flow(struct usnic_fwd_flow *flow);
+int usnic_fwd_enable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+int usnic_fwd_disable_qp(struct usnic_fwd_dev *ufdev, int vnic_idx, int qp_idx);
+
+static inline void usnic_fwd_init_usnic_filter(struct filter *filter,
+						uint32_t usnic_id)
+{
+	filter->type = FILTER_USNIC_ID;
+	filter->u.usnic.ethtype = USNIC_ROCE_ETHERTYPE;
+	filter->u.usnic.flags = FILTER_FIELD_USNIC_ETHTYPE |
+				FILTER_FIELD_USNIC_ID |
+				FILTER_FIELD_USNIC_PROTO;
+	filter->u.usnic.proto_version = (USNIC_ROCE_GRH_VER <<
+					 USNIC_ROCE_GRH_VER_SHIFT) |
+					 USNIC_PROTO_VER;
+	filter->u.usnic.usnic_id = usnic_id;
+}
+
+static inline void usnic_fwd_init_udp_filter(struct filter *filter,
+						uint32_t daddr, uint16_t dport)
+{
+	filter->type = FILTER_IPV4_5TUPLE;
+	filter->u.ipv4.flags = FILTER_FIELD_5TUP_PROTO;
+	filter->u.ipv4.protocol = PROTO_UDP;
+
+	if (daddr) {
+		filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_AD;
+		filter->u.ipv4.dst_addr = daddr;
+	}
+
+	if (dport) {
+		filter->u.ipv4.flags |= FILTER_FIELD_5TUP_DST_PT;
+		filter->u.ipv4.dst_port = dport;
+	}
+}
+
+#endif /* !USNIC_FWD_H_ */

+ 118 - 0
drivers/infiniband/hw/usnic/usnic_ib.h

@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_H_
+#define USNIC_IB_H_
+
+#include <linux/iommu.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_verbs.h>
+
+
+#include "usnic.h"
+#include "usnic_abi.h"
+#include "usnic_vnic.h"
+
+#define USNIC_IB_PORT_CNT		1
+#define USNIC_IB_NUM_COMP_VECTORS	1
+
+extern unsigned int usnic_ib_share_vf;
+
+struct usnic_ib_ucontext {
+	struct ib_ucontext		ibucontext;
+	/* Protected by usnic_ib_dev->usdev_lock */
+	struct list_head		qp_grp_list;
+	struct list_head		link;
+};
+
+struct usnic_ib_pd {
+	struct ib_pd			ibpd;
+	struct usnic_uiom_pd		*umem_pd;
+};
+
+struct usnic_ib_mr {
+	struct ib_mr			ibmr;
+	struct usnic_uiom_reg		*umem;
+};
+
+struct usnic_ib_dev {
+	struct ib_device		ib_dev;
+	struct pci_dev			*pdev;
+	struct net_device		*netdev;
+	struct usnic_fwd_dev		*ufdev;
+	struct list_head		ib_dev_link;
+	struct list_head		vf_dev_list;
+	struct list_head		ctx_list;
+	struct mutex			usdev_lock;
+
+	/* provisioning information */
+	struct kref			vf_cnt;
+	unsigned int			vf_res_cnt[USNIC_VNIC_RES_TYPE_MAX];
+
+	/* sysfs vars for QPN reporting */
+	struct kobject *qpn_kobj;
+};
+
+struct usnic_ib_vf {
+	struct usnic_ib_dev		*pf;
+	spinlock_t			lock;
+	struct usnic_vnic		*vnic;
+	unsigned int			qp_grp_ref_cnt;
+	struct usnic_ib_pd		*pd;
+	struct list_head		link;
+};
+
+static inline
+struct usnic_ib_dev *to_usdev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct usnic_ib_dev, ib_dev);
+}
+
+static inline
+struct usnic_ib_ucontext *to_ucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_pd *to_upd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct usnic_ib_pd, ibpd);
+}
+
+static inline
+struct usnic_ib_ucontext *to_uucontext(struct ib_ucontext *ibucontext)
+{
+	return container_of(ibucontext, struct usnic_ib_ucontext, ibucontext);
+}
+
+static inline
+struct usnic_ib_mr *to_umr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct usnic_ib_mr, ibmr);
+}
+void usnic_ib_log_vf(struct usnic_ib_vf *vf);
+
+#define UPDATE_PTR_LEFT(N, P, L)			\
+do {							\
+	L -= (N);					\
+	P += (N);					\
+} while (0)
+
+#endif /* USNIC_IB_H_ */

+ 682 - 0
drivers/infiniband/hw/usnic/usnic_ib_main.c

@@ -0,0 +1,682 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Upinder Malhi <umalhi@cisco.com>
+ * Author: Anant Deepak <anadeepa@cisco.com>
+ * Author: Cesare Cantu' <cantuc@cisco.com>
+ * Author: Jeff Squyres <jsquyres@cisco.com>
+ * Author: Kiran Thirumalai <kithirum@cisco.com>
+ * Author: Xuyang Wang <xuywang@cisco.com>
+ * Author: Reese Faucette <rfaucett@cisco.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/inetdevice.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_log.h"
+#include "usnic_fwd.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_transport.h"
+#include "usnic_uiom.h"
+#include "usnic_ib_sysfs.h"
+
+unsigned int usnic_log_lvl = USNIC_LOG_LVL_ERR;
+unsigned int usnic_ib_share_vf = 1;
+
+static const char usnic_version[] =
+	DRV_NAME ": Cisco VIC (USNIC) Verbs Driver v"
+	DRV_VERSION " (" DRV_RELDATE ")\n";
+
+static DEFINE_MUTEX(usnic_ib_ibdev_list_lock);
+static LIST_HEAD(usnic_ib_ibdev_list);
+
+/* Callback dump funcs */
+static int usnic_ib_dump_vf_hdr(void *obj, char *buf, int buf_sz)
+{
+	struct usnic_ib_vf *vf = obj;
+	return scnprintf(buf, buf_sz, "PF: %s ", vf->pf->ib_dev.name);
+}
+/* End callback dump funcs */
+
+static void usnic_ib_dump_vf(struct usnic_ib_vf *vf, char *buf, int buf_sz)
+{
+	usnic_vnic_dump(vf->vnic, buf, buf_sz, vf,
+			usnic_ib_dump_vf_hdr,
+			usnic_ib_qp_grp_dump_hdr, usnic_ib_qp_grp_dump_rows);
+}
+
+void usnic_ib_log_vf(struct usnic_ib_vf *vf)
+{
+	char buf[1000];
+	usnic_ib_dump_vf(vf, buf, sizeof(buf));
+	usnic_dbg("%s\n", buf);
+}
+
+/* Start of netdev section */
+static inline const char *usnic_ib_netdev_event_to_string(unsigned long event)
+{
+	const char *event2str[] = {"NETDEV_NONE", "NETDEV_UP", "NETDEV_DOWN",
+		"NETDEV_REBOOT", "NETDEV_CHANGE",
+		"NETDEV_REGISTER", "NETDEV_UNREGISTER", "NETDEV_CHANGEMTU",
+		"NETDEV_CHANGEADDR", "NETDEV_GOING_DOWN", "NETDEV_FEAT_CHANGE",
+		"NETDEV_BONDING_FAILOVER", "NETDEV_PRE_UP",
+		"NETDEV_PRE_TYPE_CHANGE", "NETDEV_POST_TYPE_CHANGE",
+		"NETDEV_POST_INT", "NETDEV_UNREGISTER_FINAL", "NETDEV_RELEASE",
+		"NETDEV_NOTIFY_PEERS", "NETDEV_JOIN"
+	};
+
+	if (event >= ARRAY_SIZE(event2str))
+		return "UNKNOWN_NETDEV_EVENT";
+	else
+		return event2str[event];
+}
+
+static void usnic_ib_qp_grp_modify_active_to_err(struct usnic_ib_dev *us_ibdev)
+{
+	struct usnic_ib_ucontext *ctx;
+	struct usnic_ib_qp_grp *qp_grp;
+	enum ib_qp_state cur_state;
+	int status;
+
+	BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+	list_for_each_entry(ctx, &us_ibdev->ctx_list, link) {
+		list_for_each_entry(qp_grp, &ctx->qp_grp_list, link) {
+			cur_state = qp_grp->state;
+			if (cur_state == IB_QPS_INIT ||
+				cur_state == IB_QPS_RTR ||
+				cur_state == IB_QPS_RTS) {
+				status = usnic_ib_qp_grp_modify(qp_grp,
+								IB_QPS_ERR,
+								NULL);
+				if (status) {
+					usnic_err("Failed to transistion qp grp %u from %s to %s\n",
+						qp_grp->grp_id,
+						usnic_ib_qp_grp_state_to_string
+						(cur_state),
+						usnic_ib_qp_grp_state_to_string
+						(IB_QPS_ERR));
+				}
+			}
+		}
+	}
+}
+
+static void usnic_ib_handle_usdev_event(struct usnic_ib_dev *us_ibdev,
+					unsigned long event)
+{
+	struct net_device *netdev;
+	struct ib_event ib_event;
+
+	memset(&ib_event, 0, sizeof(ib_event));
+
+	mutex_lock(&us_ibdev->usdev_lock);
+	netdev = us_ibdev->netdev;
+	switch (event) {
+	case NETDEV_REBOOT:
+		usnic_info("PF Reset on %s\n", us_ibdev->ib_dev.name);
+		usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+		ib_event.event = IB_EVENT_PORT_ERR;
+		ib_event.device = &us_ibdev->ib_dev;
+		ib_event.element.port_num = 1;
+		ib_dispatch_event(&ib_event);
+		break;
+	case NETDEV_UP:
+	case NETDEV_DOWN:
+	case NETDEV_CHANGE:
+		if (!us_ibdev->ufdev->link_up &&
+				netif_carrier_ok(netdev)) {
+			usnic_fwd_carrier_up(us_ibdev->ufdev);
+			usnic_info("Link UP on %s\n", us_ibdev->ib_dev.name);
+			ib_event.event = IB_EVENT_PORT_ACTIVE;
+			ib_event.device = &us_ibdev->ib_dev;
+			ib_event.element.port_num = 1;
+			ib_dispatch_event(&ib_event);
+		} else if (us_ibdev->ufdev->link_up &&
+				!netif_carrier_ok(netdev)) {
+			usnic_fwd_carrier_down(us_ibdev->ufdev);
+			usnic_info("Link DOWN on %s\n", us_ibdev->ib_dev.name);
+			usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+			ib_event.event = IB_EVENT_PORT_ERR;
+			ib_event.device = &us_ibdev->ib_dev;
+			ib_event.element.port_num = 1;
+			ib_dispatch_event(&ib_event);
+		} else {
+			usnic_dbg("Ignoring %s on %s\n",
+					usnic_ib_netdev_event_to_string(event),
+					us_ibdev->ib_dev.name);
+		}
+		break;
+	case NETDEV_CHANGEADDR:
+		if (!memcmp(us_ibdev->ufdev->mac, netdev->dev_addr,
+				sizeof(us_ibdev->ufdev->mac))) {
+			usnic_dbg("Ignoring addr change on %s\n",
+					us_ibdev->ib_dev.name);
+		} else {
+			usnic_info(" %s old mac: %pM new mac: %pM\n",
+					us_ibdev->ib_dev.name,
+					us_ibdev->ufdev->mac,
+					netdev->dev_addr);
+			usnic_fwd_set_mac(us_ibdev->ufdev, netdev->dev_addr);
+			usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+			ib_event.event = IB_EVENT_GID_CHANGE;
+			ib_event.device = &us_ibdev->ib_dev;
+			ib_event.element.port_num = 1;
+			ib_dispatch_event(&ib_event);
+		}
+
+		break;
+	case NETDEV_CHANGEMTU:
+		if (us_ibdev->ufdev->mtu != netdev->mtu) {
+			usnic_info("MTU Change on %s old: %u new: %u\n",
+					us_ibdev->ib_dev.name,
+					us_ibdev->ufdev->mtu, netdev->mtu);
+			usnic_fwd_set_mtu(us_ibdev->ufdev, netdev->mtu);
+			usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+		} else {
+			usnic_dbg("Ignoring MTU change on %s\n",
+					us_ibdev->ib_dev.name);
+		}
+		break;
+	default:
+		usnic_dbg("Ignoring event %s on %s",
+				usnic_ib_netdev_event_to_string(event),
+				us_ibdev->ib_dev.name);
+	}
+	mutex_unlock(&us_ibdev->usdev_lock);
+}
+
+static int usnic_ib_netdevice_event(struct notifier_block *notifier,
+					unsigned long event, void *ptr)
+{
+	struct usnic_ib_dev *us_ibdev;
+
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+	mutex_lock(&usnic_ib_ibdev_list_lock);
+	list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+		if (us_ibdev->netdev == netdev) {
+			usnic_ib_handle_usdev_event(us_ibdev, event);
+			break;
+		}
+	}
+	mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block usnic_ib_netdevice_notifier = {
+	.notifier_call = usnic_ib_netdevice_event
+};
+/* End of netdev section */
+
+/* Start of inet section */
+static int usnic_ib_handle_inet_event(struct usnic_ib_dev *us_ibdev,
+					unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = ptr;
+	struct ib_event ib_event;
+
+	mutex_lock(&us_ibdev->usdev_lock);
+
+	switch (event) {
+	case NETDEV_DOWN:
+		usnic_info("%s via ip notifiers",
+				usnic_ib_netdev_event_to_string(event));
+		usnic_fwd_del_ipaddr(us_ibdev->ufdev);
+		usnic_ib_qp_grp_modify_active_to_err(us_ibdev);
+		ib_event.event = IB_EVENT_GID_CHANGE;
+		ib_event.device = &us_ibdev->ib_dev;
+		ib_event.element.port_num = 1;
+		ib_dispatch_event(&ib_event);
+		break;
+	case NETDEV_UP:
+		usnic_fwd_add_ipaddr(us_ibdev->ufdev, ifa->ifa_address);
+		usnic_info("%s via ip notifiers: ip %pI4",
+				usnic_ib_netdev_event_to_string(event),
+				&us_ibdev->ufdev->inaddr);
+		ib_event.event = IB_EVENT_GID_CHANGE;
+		ib_event.device = &us_ibdev->ib_dev;
+		ib_event.element.port_num = 1;
+		ib_dispatch_event(&ib_event);
+		break;
+	default:
+		usnic_info("Ignoring event %s on %s",
+				usnic_ib_netdev_event_to_string(event),
+				us_ibdev->ib_dev.name);
+	}
+	mutex_unlock(&us_ibdev->usdev_lock);
+
+	return NOTIFY_DONE;
+}
+
+static int usnic_ib_inetaddr_event(struct notifier_block *notifier,
+					unsigned long event, void *ptr)
+{
+	struct usnic_ib_dev *us_ibdev;
+	struct in_ifaddr *ifa = ptr;
+	struct net_device *netdev = ifa->ifa_dev->dev;
+
+	mutex_lock(&usnic_ib_ibdev_list_lock);
+	list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+		if (us_ibdev->netdev == netdev) {
+			usnic_ib_handle_inet_event(us_ibdev, event, ptr);
+			break;
+		}
+	}
+	mutex_unlock(&usnic_ib_ibdev_list_lock);
+
+	return NOTIFY_DONE;
+}
+static struct notifier_block usnic_ib_inetaddr_notifier = {
+	.notifier_call = usnic_ib_inetaddr_event
+};
+/* End of inet section*/
+
+/* Start of PF discovery section */
+static void *usnic_ib_device_add(struct pci_dev *dev)
+{
+	struct usnic_ib_dev *us_ibdev;
+	union ib_gid gid;
+	struct in_ifaddr *in;
+	struct net_device *netdev;
+
+	usnic_dbg("\n");
+	netdev = pci_get_drvdata(dev);
+
+	us_ibdev = (struct usnic_ib_dev *)ib_alloc_device(sizeof(*us_ibdev));
+	if (IS_ERR_OR_NULL(us_ibdev)) {
+		usnic_err("Device %s context alloc failed\n",
+				netdev_name(pci_get_drvdata(dev)));
+		return ERR_PTR(us_ibdev ? PTR_ERR(us_ibdev) : -EFAULT);
+	}
+
+	us_ibdev->ufdev = usnic_fwd_dev_alloc(dev);
+	if (IS_ERR_OR_NULL(us_ibdev->ufdev)) {
+		usnic_err("Failed to alloc ufdev for %s with err %ld\n",
+				pci_name(dev), PTR_ERR(us_ibdev->ufdev));
+		goto err_dealloc;
+	}
+
+	mutex_init(&us_ibdev->usdev_lock);
+	INIT_LIST_HEAD(&us_ibdev->vf_dev_list);
+	INIT_LIST_HEAD(&us_ibdev->ctx_list);
+
+	us_ibdev->pdev = dev;
+	us_ibdev->netdev = pci_get_drvdata(dev);
+	us_ibdev->ib_dev.owner = THIS_MODULE;
+	us_ibdev->ib_dev.node_type = RDMA_NODE_USNIC_UDP;
+	us_ibdev->ib_dev.phys_port_cnt = USNIC_IB_PORT_CNT;
+	us_ibdev->ib_dev.num_comp_vectors = USNIC_IB_NUM_COMP_VECTORS;
+	us_ibdev->ib_dev.dma_device = &dev->dev;
+	us_ibdev->ib_dev.uverbs_abi_ver = USNIC_UVERBS_ABI_VERSION;
+	strlcpy(us_ibdev->ib_dev.name, "usnic_%d", IB_DEVICE_NAME_MAX);
+
+	us_ibdev->ib_dev.uverbs_cmd_mask =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+		(1ull << IB_USER_VERBS_CMD_REG_MR) |
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
+
+	us_ibdev->ib_dev.query_device = usnic_ib_query_device;
+	us_ibdev->ib_dev.query_port = usnic_ib_query_port;
+	us_ibdev->ib_dev.query_pkey = usnic_ib_query_pkey;
+	us_ibdev->ib_dev.query_gid = usnic_ib_query_gid;
+	us_ibdev->ib_dev.get_link_layer = usnic_ib_port_link_layer;
+	us_ibdev->ib_dev.alloc_pd = usnic_ib_alloc_pd;
+	us_ibdev->ib_dev.dealloc_pd = usnic_ib_dealloc_pd;
+	us_ibdev->ib_dev.create_qp = usnic_ib_create_qp;
+	us_ibdev->ib_dev.modify_qp = usnic_ib_modify_qp;
+	us_ibdev->ib_dev.query_qp = usnic_ib_query_qp;
+	us_ibdev->ib_dev.destroy_qp = usnic_ib_destroy_qp;
+	us_ibdev->ib_dev.create_cq = usnic_ib_create_cq;
+	us_ibdev->ib_dev.destroy_cq = usnic_ib_destroy_cq;
+	us_ibdev->ib_dev.reg_user_mr = usnic_ib_reg_mr;
+	us_ibdev->ib_dev.dereg_mr = usnic_ib_dereg_mr;
+	us_ibdev->ib_dev.alloc_ucontext = usnic_ib_alloc_ucontext;
+	us_ibdev->ib_dev.dealloc_ucontext = usnic_ib_dealloc_ucontext;
+	us_ibdev->ib_dev.mmap = usnic_ib_mmap;
+	us_ibdev->ib_dev.create_ah = usnic_ib_create_ah;
+	us_ibdev->ib_dev.destroy_ah = usnic_ib_destroy_ah;
+	us_ibdev->ib_dev.post_send = usnic_ib_post_send;
+	us_ibdev->ib_dev.post_recv = usnic_ib_post_recv;
+	us_ibdev->ib_dev.poll_cq = usnic_ib_poll_cq;
+	us_ibdev->ib_dev.req_notify_cq = usnic_ib_req_notify_cq;
+	us_ibdev->ib_dev.get_dma_mr = usnic_ib_get_dma_mr;
+
+
+	if (ib_register_device(&us_ibdev->ib_dev, NULL))
+		goto err_fwd_dealloc;
+
+	usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
+	usnic_fwd_set_mac(us_ibdev->ufdev, us_ibdev->netdev->dev_addr);
+	if (netif_carrier_ok(us_ibdev->netdev))
+		usnic_fwd_carrier_up(us_ibdev->ufdev);
+
+	in = ((struct in_device *)(netdev->ip_ptr))->ifa_list;
+	if (in != NULL)
+		usnic_fwd_add_ipaddr(us_ibdev->ufdev, in->ifa_address);
+
+	usnic_mac_ip_to_gid(us_ibdev->netdev->perm_addr,
+				us_ibdev->ufdev->inaddr, &gid.raw[0]);
+	memcpy(&us_ibdev->ib_dev.node_guid, &gid.global.interface_id,
+		sizeof(gid.global.interface_id));
+	kref_init(&us_ibdev->vf_cnt);
+
+	usnic_info("Added ibdev: %s netdev: %s with mac %pM Link: %u MTU: %u\n",
+			us_ibdev->ib_dev.name, netdev_name(us_ibdev->netdev),
+			us_ibdev->ufdev->mac, us_ibdev->ufdev->link_up,
+			us_ibdev->ufdev->mtu);
+	return us_ibdev;
+
+err_fwd_dealloc:
+	usnic_fwd_dev_free(us_ibdev->ufdev);
+err_dealloc:
+	usnic_err("failed -- deallocing device\n");
+	ib_dealloc_device(&us_ibdev->ib_dev);
+	return NULL;
+}
+
+static void usnic_ib_device_remove(struct usnic_ib_dev *us_ibdev)
+{
+	usnic_info("Unregistering %s\n", us_ibdev->ib_dev.name);
+	usnic_ib_sysfs_unregister_usdev(us_ibdev);
+	usnic_fwd_dev_free(us_ibdev->ufdev);
+	ib_unregister_device(&us_ibdev->ib_dev);
+	ib_dealloc_device(&us_ibdev->ib_dev);
+}
+
+static void usnic_ib_undiscover_pf(struct kref *kref)
+{
+	struct usnic_ib_dev *us_ibdev, *tmp;
+	struct pci_dev *dev;
+	bool found = false;
+
+	dev = container_of(kref, struct usnic_ib_dev, vf_cnt)->pdev;
+	mutex_lock(&usnic_ib_ibdev_list_lock);
+	list_for_each_entry_safe(us_ibdev, tmp,
+				&usnic_ib_ibdev_list, ib_dev_link) {
+		if (us_ibdev->pdev == dev) {
+			list_del(&us_ibdev->ib_dev_link);
+			usnic_ib_device_remove(us_ibdev);
+			found = true;
+			break;
+		}
+	}
+
+	WARN(!found, "Failed to remove PF %s\n", pci_name(dev));
+
+	mutex_unlock(&usnic_ib_ibdev_list_lock);
+}
+
+static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic)
+{
+	struct usnic_ib_dev *us_ibdev;
+	struct pci_dev *parent_pci, *vf_pci;
+	int err;
+
+	vf_pci = usnic_vnic_get_pdev(vnic);
+	parent_pci = pci_physfn(vf_pci);
+
+	BUG_ON(!parent_pci);
+
+	mutex_lock(&usnic_ib_ibdev_list_lock);
+	list_for_each_entry(us_ibdev, &usnic_ib_ibdev_list, ib_dev_link) {
+		if (us_ibdev->pdev == parent_pci) {
+			kref_get(&us_ibdev->vf_cnt);
+			goto out;
+		}
+	}
+
+	us_ibdev = usnic_ib_device_add(parent_pci);
+	if (IS_ERR_OR_NULL(us_ibdev)) {
+		us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT);
+		goto out;
+	}
+
+	err = usnic_ib_sysfs_register_usdev(us_ibdev);
+	if (err) {
+		usnic_ib_device_remove(us_ibdev);
+		us_ibdev = ERR_PTR(err);
+		goto out;
+	}
+
+	list_add(&us_ibdev->ib_dev_link, &usnic_ib_ibdev_list);
+out:
+	mutex_unlock(&usnic_ib_ibdev_list_lock);
+	return us_ibdev;
+}
+/* End of PF discovery section */
+
+/* Start of PCI section */
+
+static DEFINE_PCI_DEVICE_TABLE(usnic_ib_pci_ids) = {
+	{PCI_DEVICE(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC)},
+	{0,}
+};
+
+static int usnic_ib_pci_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	int err;
+	struct usnic_ib_dev *pf;
+	struct usnic_ib_vf *vf;
+	enum usnic_vnic_res_type res_type;
+
+	vf = kzalloc(sizeof(*vf), GFP_KERNEL);
+	if (!vf)
+		return -ENOMEM;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		usnic_err("Failed to enable %s with err %d\n",
+				pci_name(pdev), err);
+		goto out_clean_vf;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		usnic_err("Failed to request region for %s with err %d\n",
+				pci_name(pdev), err);
+		goto out_disable_device;
+	}
+
+	pci_set_master(pdev);
+	pci_set_drvdata(pdev, vf);
+
+	vf->vnic = usnic_vnic_alloc(pdev);
+	if (IS_ERR_OR_NULL(vf->vnic)) {
+		err = vf->vnic ? PTR_ERR(vf->vnic) : -ENOMEM;
+		usnic_err("Failed to alloc vnic for %s with err %d\n",
+				pci_name(pdev), err);
+		goto out_release_regions;
+	}
+
+	pf = usnic_ib_discover_pf(vf->vnic);
+	if (IS_ERR_OR_NULL(pf)) {
+		usnic_err("Failed to discover pf of vnic %s with err%ld\n",
+				pci_name(pdev), PTR_ERR(pf));
+		err = pf ? PTR_ERR(pf) : -EFAULT;
+		goto out_clean_vnic;
+	}
+
+	vf->pf = pf;
+	spin_lock_init(&vf->lock);
+	mutex_lock(&pf->usdev_lock);
+	list_add_tail(&vf->link, &pf->vf_dev_list);
+	/*
+	 * Save max settings (will be same for each VF, easier to re-write than
+	 * to say "if (!set) { set_values(); set=1; }
+	 */
+	for (res_type = USNIC_VNIC_RES_TYPE_EOL+1;
+			res_type < USNIC_VNIC_RES_TYPE_MAX;
+			res_type++) {
+		pf->vf_res_cnt[res_type] = usnic_vnic_res_cnt(vf->vnic,
+								res_type);
+	}
+
+	mutex_unlock(&pf->usdev_lock);
+
+	usnic_info("Registering usnic VF %s into PF %s\n", pci_name(pdev),
+			pf->ib_dev.name);
+	usnic_ib_log_vf(vf);
+	return 0;
+
+out_clean_vnic:
+	usnic_vnic_free(vf->vnic);
+out_release_regions:
+	pci_set_drvdata(pdev, NULL);
+	pci_clear_master(pdev);
+	pci_release_regions(pdev);
+out_disable_device:
+	pci_disable_device(pdev);
+out_clean_vf:
+	kfree(vf);
+	return err;
+}
+
+static void usnic_ib_pci_remove(struct pci_dev *pdev)
+{
+	struct usnic_ib_vf *vf = pci_get_drvdata(pdev);
+	struct usnic_ib_dev *pf = vf->pf;
+
+	mutex_lock(&pf->usdev_lock);
+	list_del(&vf->link);
+	mutex_unlock(&pf->usdev_lock);
+
+	kref_put(&pf->vf_cnt, usnic_ib_undiscover_pf);
+	usnic_vnic_free(vf->vnic);
+	pci_set_drvdata(pdev, NULL);
+	pci_clear_master(pdev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+	kfree(vf);
+
+	usnic_info("Removed VF %s\n", pci_name(pdev));
+}
+
+/* PCI driver entry points */
+static struct pci_driver usnic_ib_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = usnic_ib_pci_ids,
+	.probe = usnic_ib_pci_probe,
+	.remove = usnic_ib_pci_remove,
+};
+/* End of PCI section */
+
+/* Start of module section */
+static int __init usnic_ib_init(void)
+{
+	int err;
+
+	printk_once(KERN_INFO "%s", usnic_version);
+
+	err = usnic_uiom_init(DRV_NAME);
+	if (err) {
+		usnic_err("Unable to initalize umem with err %d\n", err);
+		return err;
+	}
+
+	if (pci_register_driver(&usnic_ib_pci_driver)) {
+		usnic_err("Unable to register with PCI\n");
+		goto out_umem_fini;
+	}
+
+	err = register_netdevice_notifier(&usnic_ib_netdevice_notifier);
+	if (err) {
+		usnic_err("Failed to register netdev notifier\n");
+		goto out_pci_unreg;
+	}
+
+	err = register_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+	if (err) {
+		usnic_err("Failed to register inet addr notifier\n");
+		goto out_unreg_netdev_notifier;
+	}
+
+	err = usnic_transport_init();
+	if (err) {
+		usnic_err("Failed to initialize transport\n");
+		goto out_unreg_inetaddr_notifier;
+	}
+
+	usnic_debugfs_init();
+
+	return 0;
+
+out_unreg_inetaddr_notifier:
+	unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+out_unreg_netdev_notifier:
+	unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+out_pci_unreg:
+	pci_unregister_driver(&usnic_ib_pci_driver);
+out_umem_fini:
+	usnic_uiom_fini();
+
+	return err;
+}
+
+static void __exit usnic_ib_destroy(void)
+{
+	usnic_dbg("\n");
+	usnic_debugfs_exit();
+	usnic_transport_fini();
+	unregister_inetaddr_notifier(&usnic_ib_inetaddr_notifier);
+	unregister_netdevice_notifier(&usnic_ib_netdevice_notifier);
+	pci_unregister_driver(&usnic_ib_pci_driver);
+	usnic_uiom_fini();
+}
+
+MODULE_DESCRIPTION("Cisco VIC (usNIC) Verbs Driver");
+MODULE_AUTHOR("Upinder Malhi <umalhi@cisco.com>");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+module_param(usnic_log_lvl, uint, S_IRUGO | S_IWUSR);
+module_param(usnic_ib_share_vf, uint, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(usnic_log_lvl, " Off=0, Err=1, Info=2, Debug=3");
+MODULE_PARM_DESC(usnic_ib_share_vf, "Off=0, On=1 VF sharing amongst QPs");
+MODULE_DEVICE_TABLE(pci, usnic_ib_pci_ids);
+
+module_init(usnic_ib_init);
+module_exit(usnic_ib_destroy);
+/* End of module section */

+ 754 - 0
drivers/infiniband/hw/usnic/usnic_ib_qp_grp.c

@@ -0,0 +1,754 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bug.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+#include "usnic_fwd.h"
+#include "usnic_uiom.h"
+#include "usnic_debugfs.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_ib_sysfs.h"
+#include "usnic_transport.h"
+
+#define DFLT_RQ_IDX	0
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state)
+{
+	switch (state) {
+	case IB_QPS_RESET:
+		return "Rst";
+	case IB_QPS_INIT:
+		return "Init";
+	case IB_QPS_RTR:
+		return "RTR";
+	case IB_QPS_RTS:
+		return "RTS";
+	case IB_QPS_SQD:
+		return "SQD";
+	case IB_QPS_SQE:
+		return "SQE";
+	case IB_QPS_ERR:
+		return "ERR";
+	default:
+		return "UNKOWN STATE";
+
+	}
+}
+
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz)
+{
+	return scnprintf(buf, buf_sz, "|QPN\t|State\t|PID\t|VF Idx\t|Fil ID");
+}
+
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz)
+{
+	struct usnic_ib_qp_grp *qp_grp = obj;
+	struct usnic_ib_qp_grp_flow *default_flow;
+	if (obj) {
+		default_flow = list_first_entry(&qp_grp->flows_lst,
+					struct usnic_ib_qp_grp_flow, link);
+		return scnprintf(buf, buf_sz, "|%d\t|%s\t|%d\t|%hu\t|%d",
+					qp_grp->ibqp.qp_num,
+					usnic_ib_qp_grp_state_to_string(
+							qp_grp->state),
+					qp_grp->owner_pid,
+					usnic_vnic_get_index(qp_grp->vf->vnic),
+					default_flow->flow->flow_id);
+	} else {
+		return scnprintf(buf, buf_sz, "|N/A\t|N/A\t|N/A\t|N/A\t|N/A");
+	}
+}
+
+static struct usnic_vnic_res_chunk *
+get_qp_res_chunk(struct usnic_ib_qp_grp *qp_grp)
+{
+	lockdep_assert_held(&qp_grp->lock);
+	/*
+	 * The QP res chunk, used to derive qp indices,
+	 * are just indices of the RQs
+	 */
+	return usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+}
+
+static int enable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+
+	int status;
+	int i, vnic_idx;
+	struct usnic_vnic_res_chunk *res_chunk;
+	struct usnic_vnic_res *res;
+
+	lockdep_assert_held(&qp_grp->lock);
+
+	vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+	res_chunk = get_qp_res_chunk(qp_grp);
+	if (IS_ERR_OR_NULL(res_chunk)) {
+		usnic_err("Unable to get qp res with err %ld\n",
+				PTR_ERR(res_chunk));
+		return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+	}
+
+	for (i = 0; i < res_chunk->cnt; i++) {
+		res = res_chunk->res[i];
+		status = usnic_fwd_enable_qp(qp_grp->ufdev, vnic_idx,
+						res->vnic_idx);
+		if (status) {
+			usnic_err("Failed to enable qp %d of %s:%d\n with err %d\n",
+					res->vnic_idx, qp_grp->ufdev->name,
+					vnic_idx, status);
+			goto out_err;
+		}
+	}
+
+	return 0;
+
+out_err:
+	for (i--; i >= 0; i--) {
+		res = res_chunk->res[i];
+		usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+					res->vnic_idx);
+	}
+
+	return status;
+}
+
+static int disable_qp_grp(struct usnic_ib_qp_grp *qp_grp)
+{
+	int i, vnic_idx;
+	struct usnic_vnic_res_chunk *res_chunk;
+	struct usnic_vnic_res *res;
+	int status = 0;
+
+	lockdep_assert_held(&qp_grp->lock);
+	vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+
+	res_chunk = get_qp_res_chunk(qp_grp);
+	if (IS_ERR_OR_NULL(res_chunk)) {
+		usnic_err("Unable to get qp res with err %ld\n",
+			PTR_ERR(res_chunk));
+		return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+	}
+
+	for (i = 0; i < res_chunk->cnt; i++) {
+		res = res_chunk->res[i];
+		status = usnic_fwd_disable_qp(qp_grp->ufdev, vnic_idx,
+						res->vnic_idx);
+		if (status) {
+			usnic_err("Failed to disable rq %d of %s:%d\n with err %d\n",
+					res->vnic_idx,
+					qp_grp->ufdev->name,
+					vnic_idx, status);
+		}
+	}
+
+	return status;
+
+}
+
+static int init_filter_action(struct usnic_ib_qp_grp *qp_grp,
+				struct usnic_filter_action *uaction)
+{
+	struct usnic_vnic_res_chunk *res_chunk;
+
+	res_chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+	if (IS_ERR_OR_NULL(res_chunk)) {
+		usnic_err("Unable to get %s with err %ld\n",
+			usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+			PTR_ERR(res_chunk));
+		return res_chunk ? PTR_ERR(res_chunk) : -ENOMEM;
+	}
+
+	uaction->vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+	uaction->action.type = FILTER_ACTION_RQ_STEERING;
+	uaction->action.u.rq_idx = res_chunk->res[DFLT_RQ_IDX]->vnic_idx;
+
+	return 0;
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_roce_custom_flow(struct usnic_ib_qp_grp *qp_grp,
+			struct usnic_transport_spec *trans_spec)
+{
+	uint16_t port_num;
+	int err;
+	struct filter filter;
+	struct usnic_filter_action uaction;
+	struct usnic_ib_qp_grp_flow *qp_flow;
+	struct usnic_fwd_flow *flow;
+	enum usnic_transport_type trans_type;
+
+	trans_type = trans_spec->trans_type;
+	port_num = trans_spec->usnic_roce.port_num;
+
+	/* Reserve Port */
+	port_num = usnic_transport_rsrv_port(trans_type, port_num);
+	if (port_num == 0)
+		return ERR_PTR(-EINVAL);
+
+	/* Create Flow */
+	usnic_fwd_init_usnic_filter(&filter, port_num);
+	err = init_filter_action(qp_grp, &uaction);
+	if (err)
+		goto out_unreserve_port;
+
+	flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+	if (IS_ERR_OR_NULL(flow)) {
+		usnic_err("Unable to alloc flow failed with err %ld\n",
+				PTR_ERR(flow));
+		err = flow ? PTR_ERR(flow) : -EFAULT;
+		goto out_unreserve_port;
+	}
+
+	/* Create Flow Handle */
+	qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+	if (IS_ERR_OR_NULL(qp_flow)) {
+		err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+		goto out_dealloc_flow;
+	}
+	qp_flow->flow = flow;
+	qp_flow->trans_type = trans_type;
+	qp_flow->usnic_roce.port_num = port_num;
+	qp_flow->qp_grp = qp_grp;
+	return qp_flow;
+
+out_dealloc_flow:
+	usnic_fwd_dealloc_flow(flow);
+out_unreserve_port:
+	usnic_transport_unrsrv_port(trans_type, port_num);
+	return ERR_PTR(err);
+}
+
+static void release_roce_custom_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+	usnic_fwd_dealloc_flow(qp_flow->flow);
+	usnic_transport_unrsrv_port(qp_flow->trans_type,
+					qp_flow->usnic_roce.port_num);
+	kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_udp_flow(struct usnic_ib_qp_grp *qp_grp,
+		struct usnic_transport_spec *trans_spec)
+{
+	struct socket *sock;
+	int sock_fd;
+	int err;
+	struct filter filter;
+	struct usnic_filter_action uaction;
+	struct usnic_ib_qp_grp_flow *qp_flow;
+	struct usnic_fwd_flow *flow;
+	enum usnic_transport_type trans_type;
+	uint32_t addr;
+	uint16_t port_num;
+	int proto;
+
+	trans_type = trans_spec->trans_type;
+	sock_fd = trans_spec->udp.sock_fd;
+
+	/* Get and check socket */
+	sock = usnic_transport_get_socket(sock_fd);
+	if (IS_ERR_OR_NULL(sock))
+		return ERR_CAST(sock);
+
+	err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port_num);
+	if (err)
+		goto out_put_sock;
+
+	if (proto != IPPROTO_UDP) {
+		usnic_err("Protocol for fd %d is not UDP", sock_fd);
+		err = -EPERM;
+		goto out_put_sock;
+	}
+
+	/* Create flow */
+	usnic_fwd_init_udp_filter(&filter, addr, port_num);
+	err = init_filter_action(qp_grp, &uaction);
+	if (err)
+		goto out_put_sock;
+
+	flow = usnic_fwd_alloc_flow(qp_grp->ufdev, &filter, &uaction);
+	if (IS_ERR_OR_NULL(flow)) {
+		usnic_err("Unable to alloc flow failed with err %ld\n",
+				PTR_ERR(flow));
+		err = flow ? PTR_ERR(flow) : -EFAULT;
+		goto out_put_sock;
+	}
+
+	/* Create qp_flow */
+	qp_flow = kzalloc(sizeof(*qp_flow), GFP_ATOMIC);
+	if (IS_ERR_OR_NULL(qp_flow)) {
+		err = qp_flow ? PTR_ERR(qp_flow) : -ENOMEM;
+		goto out_dealloc_flow;
+	}
+	qp_flow->flow = flow;
+	qp_flow->trans_type = trans_type;
+	qp_flow->udp.sock = sock;
+	qp_flow->qp_grp = qp_grp;
+	return qp_flow;
+
+out_dealloc_flow:
+	usnic_fwd_dealloc_flow(flow);
+out_put_sock:
+	usnic_transport_put_socket(sock);
+	return ERR_PTR(err);
+}
+
+static void release_udp_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+	usnic_fwd_dealloc_flow(qp_flow->flow);
+	usnic_transport_put_socket(qp_flow->udp.sock);
+	kfree(qp_flow);
+}
+
+static struct usnic_ib_qp_grp_flow*
+create_and_add_flow(struct usnic_ib_qp_grp *qp_grp,
+			struct usnic_transport_spec *trans_spec)
+{
+	struct usnic_ib_qp_grp_flow *qp_flow;
+	enum usnic_transport_type trans_type;
+
+	trans_type = trans_spec->trans_type;
+	switch (trans_type) {
+	case USNIC_TRANSPORT_ROCE_CUSTOM:
+		qp_flow = create_roce_custom_flow(qp_grp, trans_spec);
+		break;
+	case USNIC_TRANSPORT_IPV4_UDP:
+		qp_flow = create_udp_flow(qp_grp, trans_spec);
+		break;
+	default:
+		usnic_err("Unsupported transport %u\n",
+				trans_spec->trans_type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!IS_ERR_OR_NULL(qp_flow)) {
+		list_add_tail(&qp_flow->link, &qp_grp->flows_lst);
+		usnic_debugfs_flow_add(qp_flow);
+	}
+
+
+	return qp_flow;
+}
+
+static void release_and_remove_flow(struct usnic_ib_qp_grp_flow *qp_flow)
+{
+	usnic_debugfs_flow_remove(qp_flow);
+	list_del(&qp_flow->link);
+
+	switch (qp_flow->trans_type) {
+	case USNIC_TRANSPORT_ROCE_CUSTOM:
+		release_roce_custom_flow(qp_flow);
+		break;
+	case USNIC_TRANSPORT_IPV4_UDP:
+		release_udp_flow(qp_flow);
+		break;
+	default:
+		WARN(1, "Unsupported transport %u\n",
+				qp_flow->trans_type);
+		break;
+	}
+}
+
+static void release_and_remove_all_flows(struct usnic_ib_qp_grp *qp_grp)
+{
+	struct usnic_ib_qp_grp_flow *qp_flow, *tmp;
+	list_for_each_entry_safe(qp_flow, tmp, &qp_grp->flows_lst, link)
+		release_and_remove_flow(qp_flow);
+}
+
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+				enum ib_qp_state new_state,
+				void *data)
+{
+	int status = 0;
+	int vnic_idx;
+	struct ib_event ib_event;
+	enum ib_qp_state old_state;
+	struct usnic_transport_spec *trans_spec;
+	struct usnic_ib_qp_grp_flow *qp_flow;
+
+	old_state = qp_grp->state;
+	vnic_idx = usnic_vnic_get_index(qp_grp->vf->vnic);
+	trans_spec = (struct usnic_transport_spec *) data;
+
+	spin_lock(&qp_grp->lock);
+	switch (new_state) {
+	case IB_QPS_RESET:
+		switch (old_state) {
+		case IB_QPS_RESET:
+			/* NO-OP */
+			break;
+		case IB_QPS_INIT:
+			release_and_remove_all_flows(qp_grp);
+			status = 0;
+			break;
+		case IB_QPS_RTR:
+		case IB_QPS_RTS:
+		case IB_QPS_ERR:
+			status = disable_qp_grp(qp_grp);
+			release_and_remove_all_flows(qp_grp);
+			break;
+		default:
+			status = -EINVAL;
+		}
+		break;
+	case IB_QPS_INIT:
+		switch (old_state) {
+		case IB_QPS_RESET:
+			if (trans_spec) {
+				qp_flow = create_and_add_flow(qp_grp,
+								trans_spec);
+				if (IS_ERR_OR_NULL(qp_flow)) {
+					status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+					break;
+				}
+			} else {
+				/*
+				 * Optional to specify filters.
+				 */
+				status = 0;
+			}
+			break;
+		case IB_QPS_INIT:
+			if (trans_spec) {
+				qp_flow = create_and_add_flow(qp_grp,
+								trans_spec);
+				if (IS_ERR_OR_NULL(qp_flow)) {
+					status = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+					break;
+				}
+			} else {
+				/*
+				 * Doesn't make sense to go into INIT state
+				 * from INIT state w/o adding filters.
+				 */
+				status = -EINVAL;
+			}
+			break;
+		case IB_QPS_RTR:
+			status = disable_qp_grp(qp_grp);
+			break;
+		case IB_QPS_RTS:
+			status = disable_qp_grp(qp_grp);
+			break;
+		default:
+			status = -EINVAL;
+		}
+		break;
+	case IB_QPS_RTR:
+		switch (old_state) {
+		case IB_QPS_INIT:
+			status = enable_qp_grp(qp_grp);
+			break;
+		default:
+			status = -EINVAL;
+		}
+		break;
+	case IB_QPS_RTS:
+		switch (old_state) {
+		case IB_QPS_RTR:
+			/* NO-OP FOR NOW */
+			break;
+		default:
+			status = -EINVAL;
+		}
+		break;
+	case IB_QPS_ERR:
+		ib_event.device = &qp_grp->vf->pf->ib_dev;
+		ib_event.element.qp = &qp_grp->ibqp;
+		ib_event.event = IB_EVENT_QP_FATAL;
+
+		switch (old_state) {
+		case IB_QPS_RESET:
+			qp_grp->ibqp.event_handler(&ib_event,
+					qp_grp->ibqp.qp_context);
+			break;
+		case IB_QPS_INIT:
+			release_and_remove_all_flows(qp_grp);
+			qp_grp->ibqp.event_handler(&ib_event,
+					qp_grp->ibqp.qp_context);
+			break;
+		case IB_QPS_RTR:
+		case IB_QPS_RTS:
+			status = disable_qp_grp(qp_grp);
+			release_and_remove_all_flows(qp_grp);
+			qp_grp->ibqp.event_handler(&ib_event,
+					qp_grp->ibqp.qp_context);
+			break;
+		default:
+			status = -EINVAL;
+		}
+		break;
+	default:
+		status = -EINVAL;
+	}
+	spin_unlock(&qp_grp->lock);
+
+	if (!status) {
+		qp_grp->state = new_state;
+		usnic_info("Transistioned %u from %s to %s",
+		qp_grp->grp_id,
+		usnic_ib_qp_grp_state_to_string(old_state),
+		usnic_ib_qp_grp_state_to_string(new_state));
+	} else {
+		usnic_err("Failed to transistion %u from %s to %s",
+		qp_grp->grp_id,
+		usnic_ib_qp_grp_state_to_string(old_state),
+		usnic_ib_qp_grp_state_to_string(new_state));
+	}
+
+	return status;
+}
+
+static struct usnic_vnic_res_chunk**
+alloc_res_chunk_list(struct usnic_vnic *vnic,
+			struct usnic_vnic_res_spec *res_spec, void *owner_obj)
+{
+	enum usnic_vnic_res_type res_type;
+	struct usnic_vnic_res_chunk **res_chunk_list;
+	int err, i, res_cnt, res_lst_sz;
+
+	for (res_lst_sz = 0;
+		res_spec->resources[res_lst_sz].type != USNIC_VNIC_RES_TYPE_EOL;
+		res_lst_sz++) {
+		/* Do Nothing */
+	}
+
+	res_chunk_list = kzalloc(sizeof(*res_chunk_list)*(res_lst_sz+1),
+					GFP_ATOMIC);
+	if (!res_chunk_list)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; res_spec->resources[i].type != USNIC_VNIC_RES_TYPE_EOL;
+		i++) {
+		res_type = res_spec->resources[i].type;
+		res_cnt = res_spec->resources[i].cnt;
+
+		res_chunk_list[i] = usnic_vnic_get_resources(vnic, res_type,
+					res_cnt, owner_obj);
+		if (IS_ERR_OR_NULL(res_chunk_list[i])) {
+			err = res_chunk_list[i] ?
+					PTR_ERR(res_chunk_list[i]) : -ENOMEM;
+			usnic_err("Failed to get %s from %s with err %d\n",
+				usnic_vnic_res_type_to_str(res_type),
+				usnic_vnic_pci_name(vnic),
+				err);
+			goto out_free_res;
+		}
+	}
+
+	return res_chunk_list;
+
+out_free_res:
+	for (i--; i > 0; i--)
+		usnic_vnic_put_resources(res_chunk_list[i]);
+	kfree(res_chunk_list);
+	return ERR_PTR(err);
+}
+
+static void free_qp_grp_res(struct usnic_vnic_res_chunk **res_chunk_list)
+{
+	int i;
+	for (i = 0; res_chunk_list[i]; i++)
+		usnic_vnic_put_resources(res_chunk_list[i]);
+	kfree(res_chunk_list);
+}
+
+static int qp_grp_and_vf_bind(struct usnic_ib_vf *vf,
+				struct usnic_ib_pd *pd,
+				struct usnic_ib_qp_grp *qp_grp)
+{
+	int err;
+	struct pci_dev *pdev;
+
+	lockdep_assert_held(&vf->lock);
+
+	pdev = usnic_vnic_get_pdev(vf->vnic);
+	if (vf->qp_grp_ref_cnt == 0) {
+		err = usnic_uiom_attach_dev_to_pd(pd->umem_pd, &pdev->dev);
+		if (err) {
+			usnic_err("Failed to attach %s to domain\n",
+					pci_name(pdev));
+			return err;
+		}
+		vf->pd = pd;
+	}
+	vf->qp_grp_ref_cnt++;
+
+	WARN_ON(vf->pd != pd);
+	qp_grp->vf = vf;
+
+	return 0;
+}
+
+static void qp_grp_and_vf_unbind(struct usnic_ib_qp_grp *qp_grp)
+{
+	struct pci_dev *pdev;
+	struct usnic_ib_pd *pd;
+
+	lockdep_assert_held(&qp_grp->vf->lock);
+
+	pd = qp_grp->vf->pd;
+	pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+	if (--qp_grp->vf->qp_grp_ref_cnt == 0) {
+		qp_grp->vf->pd = NULL;
+		usnic_uiom_detach_dev_from_pd(pd->umem_pd, &pdev->dev);
+	}
+	qp_grp->vf = NULL;
+}
+
+static void log_spec(struct usnic_vnic_res_spec *res_spec)
+{
+	char buf[512];
+	usnic_vnic_spec_dump(buf, sizeof(buf), res_spec);
+	usnic_dbg("%s\n", buf);
+}
+
+static int qp_grp_id_from_flow(struct usnic_ib_qp_grp_flow *qp_flow,
+				uint32_t *id)
+{
+	enum usnic_transport_type trans_type = qp_flow->trans_type;
+	int err;
+
+	switch (trans_type) {
+	case USNIC_TRANSPORT_ROCE_CUSTOM:
+		*id = qp_flow->usnic_roce.port_num;
+		break;
+	case USNIC_TRANSPORT_IPV4_UDP:
+		err = usnic_transport_sock_get_addr(qp_flow->udp.sock,
+							NULL, NULL,
+							(uint16_t *) id);
+		if (err)
+			return err;
+		break;
+	default:
+		usnic_err("Unsupported transport %u\n", trans_type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+			struct usnic_ib_pd *pd,
+			struct usnic_vnic_res_spec *res_spec,
+			struct usnic_transport_spec *transport_spec)
+{
+	struct usnic_ib_qp_grp *qp_grp;
+	int err;
+	enum usnic_transport_type transport = transport_spec->trans_type;
+	struct usnic_ib_qp_grp_flow *qp_flow;
+
+	lockdep_assert_held(&vf->lock);
+
+	err = usnic_vnic_res_spec_satisfied(&min_transport_spec[transport],
+						res_spec);
+	if (err) {
+		usnic_err("Spec does not meet miniumum req for transport %d\n",
+				transport);
+		log_spec(res_spec);
+		return ERR_PTR(err);
+	}
+
+	qp_grp = kzalloc(sizeof(*qp_grp), GFP_ATOMIC);
+	if (!qp_grp) {
+		usnic_err("Unable to alloc qp_grp - Out of memory\n");
+		return NULL;
+	}
+
+	qp_grp->res_chunk_list = alloc_res_chunk_list(vf->vnic, res_spec,
+							qp_grp);
+	if (IS_ERR_OR_NULL(qp_grp->res_chunk_list)) {
+		err = qp_grp->res_chunk_list ?
+				PTR_ERR(qp_grp->res_chunk_list) : -ENOMEM;
+		usnic_err("Unable to alloc res for %d with err %d\n",
+				qp_grp->grp_id, err);
+		goto out_free_qp_grp;
+	}
+
+	err = qp_grp_and_vf_bind(vf, pd, qp_grp);
+	if (err)
+		goto out_free_res;
+
+	INIT_LIST_HEAD(&qp_grp->flows_lst);
+	spin_lock_init(&qp_grp->lock);
+	qp_grp->ufdev = ufdev;
+	qp_grp->state = IB_QPS_RESET;
+	qp_grp->owner_pid = current->pid;
+
+	qp_flow = create_and_add_flow(qp_grp, transport_spec);
+	if (IS_ERR_OR_NULL(qp_flow)) {
+		usnic_err("Unable to create and add flow with err %ld\n",
+				PTR_ERR(qp_flow));
+		err = qp_flow ? PTR_ERR(qp_flow) : -EFAULT;
+		goto out_qp_grp_vf_unbind;
+	}
+
+	err = qp_grp_id_from_flow(qp_flow, &qp_grp->grp_id);
+	if (err)
+		goto out_release_flow;
+	qp_grp->ibqp.qp_num = qp_grp->grp_id;
+
+	usnic_ib_sysfs_qpn_add(qp_grp);
+
+	return qp_grp;
+
+out_release_flow:
+	release_and_remove_flow(qp_flow);
+out_qp_grp_vf_unbind:
+	qp_grp_and_vf_unbind(qp_grp);
+out_free_res:
+	free_qp_grp_res(qp_grp->res_chunk_list);
+out_free_qp_grp:
+	kfree(qp_grp);
+
+	return ERR_PTR(err);
+}
+
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+
+	WARN_ON(qp_grp->state != IB_QPS_RESET);
+	lockdep_assert_held(&qp_grp->vf->lock);
+
+	release_and_remove_all_flows(qp_grp);
+	usnic_ib_sysfs_qpn_remove(qp_grp);
+	qp_grp_and_vf_unbind(qp_grp);
+	free_qp_grp_res(qp_grp->res_chunk_list);
+	kfree(qp_grp);
+}
+
+struct usnic_vnic_res_chunk*
+usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+				enum usnic_vnic_res_type res_type)
+{
+	int i;
+
+	for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+		if (qp_grp->res_chunk_list[i]->type == res_type)
+			return qp_grp->res_chunk_list[i];
+	}
+
+	return ERR_PTR(-EINVAL);
+}

+ 117 - 0
drivers/infiniband/hw/usnic/usnic_ib_qp_grp.h

@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_QP_GRP_H_
+#define USNIC_IB_QP_GRP_H_
+
+#include <linux/debugfs.h>
+#include <rdma/ib_verbs.h>
+
+#include "usnic_ib.h"
+#include "usnic_abi.h"
+#include "usnic_fwd.h"
+#include "usnic_vnic.h"
+
+/*
+ * The qp group struct represents all the hw resources needed to present a ib_qp
+ */
+struct usnic_ib_qp_grp {
+	struct ib_qp				ibqp;
+	enum ib_qp_state			state;
+	int					grp_id;
+
+	struct usnic_fwd_dev			*ufdev;
+	struct usnic_ib_ucontext		*ctx;
+	struct list_head			flows_lst;
+
+	struct usnic_vnic_res_chunk		**res_chunk_list;
+
+	pid_t					owner_pid;
+	struct usnic_ib_vf			*vf;
+	struct list_head			link;
+
+	spinlock_t				lock;
+
+	struct kobject				kobj;
+};
+
+struct usnic_ib_qp_grp_flow {
+	struct usnic_fwd_flow		*flow;
+	enum usnic_transport_type	trans_type;
+	union {
+		struct {
+			uint16_t	port_num;
+		} usnic_roce;
+		struct {
+			struct socket	*sock;
+		} udp;
+	};
+	struct usnic_ib_qp_grp		*qp_grp;
+	struct list_head		link;
+
+	/* Debug FS */
+	struct dentry			*dbgfs_dentry;
+	char				dentry_name[32];
+};
+
+static const struct
+usnic_vnic_res_spec min_transport_spec[USNIC_TRANSPORT_MAX] = {
+	{ /*USNIC_TRANSPORT_UNKNOWN*/
+		.resources = {
+			{.type = USNIC_VNIC_RES_TYPE_EOL,	.cnt = 0,},
+		},
+	},
+	{ /*USNIC_TRANSPORT_ROCE_CUSTOM*/
+		.resources = {
+			{.type = USNIC_VNIC_RES_TYPE_WQ,	.cnt = 1,},
+			{.type = USNIC_VNIC_RES_TYPE_RQ,	.cnt = 1,},
+			{.type = USNIC_VNIC_RES_TYPE_CQ,	.cnt = 1,},
+			{.type = USNIC_VNIC_RES_TYPE_EOL,	.cnt = 0,},
+		},
+	},
+	{ /*USNIC_TRANSPORT_IPV4_UDP*/
+		.resources = {
+			{.type = USNIC_VNIC_RES_TYPE_WQ,	.cnt = 1,},
+			{.type = USNIC_VNIC_RES_TYPE_RQ,	.cnt = 1,},
+			{.type = USNIC_VNIC_RES_TYPE_CQ,	.cnt = 1,},
+			{.type = USNIC_VNIC_RES_TYPE_EOL,	.cnt = 0,},
+		},
+	},
+};
+
+const char *usnic_ib_qp_grp_state_to_string(enum ib_qp_state state);
+int usnic_ib_qp_grp_dump_hdr(char *buf, int buf_sz);
+int usnic_ib_qp_grp_dump_rows(void *obj, char *buf, int buf_sz);
+struct usnic_ib_qp_grp *
+usnic_ib_qp_grp_create(struct usnic_fwd_dev *ufdev, struct usnic_ib_vf *vf,
+			struct usnic_ib_pd *pd,
+			struct usnic_vnic_res_spec *res_spec,
+			struct usnic_transport_spec *trans_spec);
+void usnic_ib_qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp);
+int usnic_ib_qp_grp_modify(struct usnic_ib_qp_grp *qp_grp,
+				enum ib_qp_state new_state,
+				void *data);
+struct usnic_vnic_res_chunk
+*usnic_ib_qp_grp_get_chunk(struct usnic_ib_qp_grp *qp_grp,
+				enum usnic_vnic_res_type type);
+static inline
+struct usnic_ib_qp_grp *to_uqp_grp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct usnic_ib_qp_grp, ibqp);
+}
+#endif /* USNIC_IB_QP_GRP_H_ */

+ 341 - 0
drivers/infiniband/hw/usnic/usnic_ib_sysfs.c

@@ -0,0 +1,341 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_common_util.h"
+#include "usnic_ib.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_vnic.h"
+#include "usnic_ib_verbs.h"
+#include "usnic_log.h"
+
+static ssize_t usnic_ib_show_fw_ver(struct device *device,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct usnic_ib_dev *us_ibdev =
+		container_of(device, struct usnic_ib_dev, ib_dev.dev);
+	struct ethtool_drvinfo info;
+
+	mutex_lock(&us_ibdev->usdev_lock);
+	us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+	mutex_unlock(&us_ibdev->usdev_lock);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", info.fw_version);
+}
+
+static ssize_t usnic_ib_show_board(struct device *device,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct usnic_ib_dev *us_ibdev =
+		container_of(device, struct usnic_ib_dev, ib_dev.dev);
+	unsigned short subsystem_device_id;
+
+	mutex_lock(&us_ibdev->usdev_lock);
+	subsystem_device_id = us_ibdev->pdev->subsystem_device;
+	mutex_unlock(&us_ibdev->usdev_lock);
+
+	return scnprintf(buf, PAGE_SIZE, "%hu\n", subsystem_device_id);
+}
+
+/*
+ * Report the configuration for this PF
+ */
+static ssize_t
+usnic_ib_show_config(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct usnic_ib_dev *us_ibdev;
+	char *ptr;
+	unsigned left;
+	unsigned n;
+	enum usnic_vnic_res_type res_type;
+
+	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+	/* Buffer space limit is 1 page */
+	ptr = buf;
+	left = PAGE_SIZE;
+
+	mutex_lock(&us_ibdev->usdev_lock);
+	if (atomic_read(&us_ibdev->vf_cnt.refcount) > 0) {
+		char *busname;
+
+		/*
+		 * bus name seems to come with annoying prefix.
+		 * Remove it if it is predictable
+		 */
+		busname = us_ibdev->pdev->bus->name;
+		if (strncmp(busname, "PCI Bus ", 8) == 0)
+			busname += 8;
+
+		n = scnprintf(ptr, left,
+			"%s: %s:%d.%d, %s, %pM, %u VFs\n Per VF:",
+			us_ibdev->ib_dev.name,
+			busname,
+			PCI_SLOT(us_ibdev->pdev->devfn),
+			PCI_FUNC(us_ibdev->pdev->devfn),
+			netdev_name(us_ibdev->netdev),
+			us_ibdev->ufdev->mac,
+			atomic_read(&us_ibdev->vf_cnt.refcount));
+		UPDATE_PTR_LEFT(n, ptr, left);
+
+		for (res_type = USNIC_VNIC_RES_TYPE_EOL;
+				res_type < USNIC_VNIC_RES_TYPE_MAX;
+				res_type++) {
+			if (us_ibdev->vf_res_cnt[res_type] == 0)
+				continue;
+			n = scnprintf(ptr, left, " %d %s%s",
+				us_ibdev->vf_res_cnt[res_type],
+				usnic_vnic_res_type_to_str(res_type),
+				(res_type < (USNIC_VNIC_RES_TYPE_MAX - 1)) ?
+				 "," : "");
+			UPDATE_PTR_LEFT(n, ptr, left);
+		}
+		n = scnprintf(ptr, left, "\n");
+		UPDATE_PTR_LEFT(n, ptr, left);
+	} else {
+		n = scnprintf(ptr, left, "%s: no VFs\n",
+				us_ibdev->ib_dev.name);
+		UPDATE_PTR_LEFT(n, ptr, left);
+	}
+	mutex_unlock(&us_ibdev->usdev_lock);
+
+	return ptr - buf;
+}
+
+static ssize_t
+usnic_ib_show_iface(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct usnic_ib_dev *us_ibdev;
+
+	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n",
+			netdev_name(us_ibdev->netdev));
+}
+
+static ssize_t
+usnic_ib_show_max_vf(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct usnic_ib_dev *us_ibdev;
+
+	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n",
+			atomic_read(&us_ibdev->vf_cnt.refcount));
+}
+
+static ssize_t
+usnic_ib_show_qp_per_vf(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct usnic_ib_dev *us_ibdev;
+	int qp_per_vf;
+
+	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+	qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+			us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+
+	return scnprintf(buf, PAGE_SIZE,
+				"%d\n", qp_per_vf);
+}
+
+static ssize_t
+usnic_ib_show_cq_per_vf(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct usnic_ib_dev *us_ibdev;
+
+	us_ibdev = container_of(device, struct usnic_ib_dev, ib_dev.dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n",
+			us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ]);
+}
+
+static DEVICE_ATTR(fw_ver, S_IRUGO, usnic_ib_show_fw_ver, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, usnic_ib_show_board, NULL);
+static DEVICE_ATTR(config, S_IRUGO, usnic_ib_show_config, NULL);
+static DEVICE_ATTR(iface, S_IRUGO, usnic_ib_show_iface, NULL);
+static DEVICE_ATTR(max_vf, S_IRUGO, usnic_ib_show_max_vf, NULL);
+static DEVICE_ATTR(qp_per_vf, S_IRUGO, usnic_ib_show_qp_per_vf, NULL);
+static DEVICE_ATTR(cq_per_vf, S_IRUGO, usnic_ib_show_cq_per_vf, NULL);
+
+static struct device_attribute *usnic_class_attributes[] = {
+	&dev_attr_fw_ver,
+	&dev_attr_board_id,
+	&dev_attr_config,
+	&dev_attr_iface,
+	&dev_attr_max_vf,
+	&dev_attr_qp_per_vf,
+	&dev_attr_cq_per_vf,
+};
+
+struct qpn_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct usnic_ib_qp_grp *, char *buf);
+};
+
+/*
+ * Definitions for supporting QPN entries in sysfs
+ */
+static ssize_t
+usnic_ib_qpn_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	struct usnic_ib_qp_grp *qp_grp;
+	struct qpn_attribute *qpn_attr;
+
+	qp_grp = container_of(kobj, struct usnic_ib_qp_grp, kobj);
+	qpn_attr = container_of(attr, struct qpn_attribute, attr);
+
+	return qpn_attr->show(qp_grp, buf);
+}
+
+static const struct sysfs_ops usnic_ib_qpn_sysfs_ops = {
+	.show = usnic_ib_qpn_attr_show
+};
+
+#define QPN_ATTR_RO(NAME) \
+struct qpn_attribute qpn_attr_##NAME = __ATTR_RO(NAME)
+
+static ssize_t context_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+	return scnprintf(buf, PAGE_SIZE, "0x%p\n", qp_grp->ctx);
+}
+
+static ssize_t summary_show(struct usnic_ib_qp_grp *qp_grp, char *buf)
+{
+	int i, j, n;
+	int left;
+	char *ptr;
+	struct usnic_vnic_res_chunk *res_chunk;
+	struct usnic_vnic_res *vnic_res;
+
+	left = PAGE_SIZE;
+	ptr = buf;
+
+	n = scnprintf(ptr, left,
+			"QPN: %d State: (%s) PID: %u VF Idx: %hu ",
+			qp_grp->ibqp.qp_num,
+			usnic_ib_qp_grp_state_to_string(qp_grp->state),
+			qp_grp->owner_pid,
+			usnic_vnic_get_index(qp_grp->vf->vnic));
+	UPDATE_PTR_LEFT(n, ptr, left);
+
+	for (i = 0; qp_grp->res_chunk_list[i]; i++) {
+		res_chunk = qp_grp->res_chunk_list[i];
+		for (j = 0; j < res_chunk->cnt; j++) {
+			vnic_res = res_chunk->res[j];
+			n = scnprintf(ptr, left, "%s[%d] ",
+				usnic_vnic_res_type_to_str(vnic_res->type),
+				vnic_res->vnic_idx);
+			UPDATE_PTR_LEFT(n, ptr, left);
+		}
+	}
+
+	n = scnprintf(ptr, left, "\n");
+	UPDATE_PTR_LEFT(n, ptr, left);
+
+	return ptr - buf;
+}
+
+static QPN_ATTR_RO(context);
+static QPN_ATTR_RO(summary);
+
+static struct attribute *usnic_ib_qpn_default_attrs[] = {
+	&qpn_attr_context.attr,
+	&qpn_attr_summary.attr,
+	NULL
+};
+
+static struct kobj_type usnic_ib_qpn_type = {
+	.sysfs_ops = &usnic_ib_qpn_sysfs_ops,
+	.default_attrs = usnic_ib_qpn_default_attrs
+};
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev)
+{
+	int i;
+	int err;
+	for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+		err = device_create_file(&us_ibdev->ib_dev.dev,
+						usnic_class_attributes[i]);
+		if (err) {
+			usnic_err("Failed to create device file %d for %s eith err %d",
+				i, us_ibdev->ib_dev.name, err);
+			return -EINVAL;
+		}
+	}
+
+	/* create kernel object for looking at individual QPs */
+	kobject_get(&us_ibdev->ib_dev.dev.kobj);
+	us_ibdev->qpn_kobj = kobject_create_and_add("qpn",
+			&us_ibdev->ib_dev.dev.kobj);
+	if (us_ibdev->qpn_kobj == NULL) {
+		kobject_put(&us_ibdev->ib_dev.dev.kobj);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(usnic_class_attributes); ++i) {
+		device_remove_file(&us_ibdev->ib_dev.dev,
+					usnic_class_attributes[i]);
+	}
+
+	kobject_put(us_ibdev->qpn_kobj);
+}
+
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp)
+{
+	struct usnic_ib_dev *us_ibdev;
+	int err;
+
+	us_ibdev = qp_grp->vf->pf;
+
+	err = kobject_init_and_add(&qp_grp->kobj, &usnic_ib_qpn_type,
+			kobject_get(us_ibdev->qpn_kobj),
+			"%d", qp_grp->grp_id);
+	if (err) {
+		kobject_put(us_ibdev->qpn_kobj);
+		return;
+	}
+}
+
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp)
+{
+	struct usnic_ib_dev *us_ibdev;
+
+	us_ibdev = qp_grp->vf->pf;
+
+	kobject_put(&qp_grp->kobj);
+	kobject_put(us_ibdev->qpn_kobj);
+}

+ 29 - 0
drivers/infiniband/hw/usnic/usnic_ib_sysfs.h

@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_SYSFS_H_
+#define USNIC_IB_SYSFS_H_
+
+#include "usnic_ib.h"
+
+int usnic_ib_sysfs_register_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_unregister_usdev(struct usnic_ib_dev *us_ibdev);
+void usnic_ib_sysfs_qpn_add(struct usnic_ib_qp_grp *qp_grp);
+void usnic_ib_sysfs_qpn_remove(struct usnic_ib_qp_grp *qp_grp);
+
+#endif /* !USNIC_IB_SYSFS_H_ */

+ 765 - 0
drivers/infiniband/hw/usnic/usnic_ib_verbs.c

@@ -0,0 +1,765 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_addr.h>
+
+#include "usnic_abi.h"
+#include "usnic_ib.h"
+#include "usnic_common_util.h"
+#include "usnic_ib_qp_grp.h"
+#include "usnic_fwd.h"
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_transport.h"
+
+#define USNIC_DEFAULT_TRANSPORT USNIC_TRANSPORT_ROCE_CUSTOM
+
+static void usnic_ib_fw_string_to_u64(char *fw_ver_str, u64 *fw_ver)
+{
+	*fw_ver = (u64) *fw_ver_str;
+}
+
+static int usnic_ib_fill_create_qp_resp(struct usnic_ib_qp_grp *qp_grp,
+					struct ib_udata *udata)
+{
+	struct usnic_ib_dev *us_ibdev;
+	struct usnic_ib_create_qp_resp resp;
+	struct pci_dev *pdev;
+	struct vnic_dev_bar *bar;
+	struct usnic_vnic_res_chunk *chunk;
+	struct usnic_ib_qp_grp_flow *default_flow;
+	int i, err;
+
+	memset(&resp, 0, sizeof(resp));
+
+	us_ibdev = qp_grp->vf->pf;
+	pdev = usnic_vnic_get_pdev(qp_grp->vf->vnic);
+	if (!pdev) {
+		usnic_err("Failed to get pdev of qp_grp %d\n",
+				qp_grp->grp_id);
+		return -EFAULT;
+	}
+
+	bar = usnic_vnic_get_bar(qp_grp->vf->vnic, 0);
+	if (!bar) {
+		usnic_err("Failed to get bar0 of qp_grp %d vf %s",
+				qp_grp->grp_id, pci_name(pdev));
+		return -EFAULT;
+	}
+
+	resp.vfid = usnic_vnic_get_index(qp_grp->vf->vnic);
+	resp.bar_bus_addr = bar->bus_addr;
+	resp.bar_len = bar->len;
+
+	chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_RQ);
+	if (IS_ERR_OR_NULL(chunk)) {
+		usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+			usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_RQ),
+			qp_grp->grp_id,
+			PTR_ERR(chunk));
+		return chunk ? PTR_ERR(chunk) : -ENOMEM;
+	}
+
+	WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_RQ);
+	resp.rq_cnt = chunk->cnt;
+	for (i = 0; i < chunk->cnt; i++)
+		resp.rq_idx[i] = chunk->res[i]->vnic_idx;
+
+	chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_WQ);
+	if (IS_ERR_OR_NULL(chunk)) {
+		usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+			usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_WQ),
+			qp_grp->grp_id,
+			PTR_ERR(chunk));
+		return chunk ? PTR_ERR(chunk) : -ENOMEM;
+	}
+
+	WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_WQ);
+	resp.wq_cnt = chunk->cnt;
+	for (i = 0; i < chunk->cnt; i++)
+		resp.wq_idx[i] = chunk->res[i]->vnic_idx;
+
+	chunk = usnic_ib_qp_grp_get_chunk(qp_grp, USNIC_VNIC_RES_TYPE_CQ);
+	if (IS_ERR_OR_NULL(chunk)) {
+		usnic_err("Failed to get chunk %s for qp_grp %d with err %ld\n",
+			usnic_vnic_res_type_to_str(USNIC_VNIC_RES_TYPE_CQ),
+			qp_grp->grp_id,
+			PTR_ERR(chunk));
+		return chunk ? PTR_ERR(chunk) : -ENOMEM;
+	}
+
+	WARN_ON(chunk->type != USNIC_VNIC_RES_TYPE_CQ);
+	resp.cq_cnt = chunk->cnt;
+	for (i = 0; i < chunk->cnt; i++)
+		resp.cq_idx[i] = chunk->res[i]->vnic_idx;
+
+	default_flow = list_first_entry(&qp_grp->flows_lst,
+					struct usnic_ib_qp_grp_flow, link);
+	resp.transport = default_flow->trans_type;
+
+	err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+	if (err) {
+		usnic_err("Failed to copy udata for %s", us_ibdev->ib_dev.name);
+		return err;
+	}
+
+	return 0;
+}
+
+static struct usnic_ib_qp_grp*
+find_free_vf_and_create_qp_grp(struct usnic_ib_dev *us_ibdev,
+				struct usnic_ib_pd *pd,
+				struct usnic_transport_spec *trans_spec,
+				struct usnic_vnic_res_spec *res_spec)
+{
+	struct usnic_ib_vf *vf;
+	struct usnic_vnic *vnic;
+	struct usnic_ib_qp_grp *qp_grp;
+	struct device *dev, **dev_list;
+	int i, found = 0;
+
+	BUG_ON(!mutex_is_locked(&us_ibdev->usdev_lock));
+
+	if (list_empty(&us_ibdev->vf_dev_list)) {
+		usnic_info("No vfs to allocate\n");
+		return NULL;
+	}
+
+	if (usnic_ib_share_vf) {
+		/* Try to find resouces on a used vf which is in pd */
+		dev_list = usnic_uiom_get_dev_list(pd->umem_pd);
+		for (i = 0; dev_list[i]; i++) {
+			dev = dev_list[i];
+			vf = pci_get_drvdata(to_pci_dev(dev));
+			spin_lock(&vf->lock);
+			vnic = vf->vnic;
+			if (!usnic_vnic_check_room(vnic, res_spec)) {
+				usnic_dbg("Found used vnic %s from %s\n",
+						us_ibdev->ib_dev.name,
+						pci_name(usnic_vnic_get_pdev(
+									vnic)));
+				found = 1;
+				break;
+			}
+			spin_unlock(&vf->lock);
+
+		}
+		usnic_uiom_free_dev_list(dev_list);
+	}
+
+	if (!found) {
+		/* Try to find resources on an unused vf */
+		list_for_each_entry(vf, &us_ibdev->vf_dev_list, link) {
+			spin_lock(&vf->lock);
+			vnic = vf->vnic;
+			if (vf->qp_grp_ref_cnt == 0 &&
+				usnic_vnic_check_room(vnic, res_spec) == 0) {
+				found = 1;
+				break;
+			}
+			spin_unlock(&vf->lock);
+		}
+	}
+
+	if (!found) {
+		usnic_info("No free qp grp found on %s\n",
+				us_ibdev->ib_dev.name);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	qp_grp = usnic_ib_qp_grp_create(us_ibdev->ufdev, vf, pd, res_spec,
+						trans_spec);
+	spin_unlock(&vf->lock);
+	if (IS_ERR_OR_NULL(qp_grp)) {
+		usnic_err("Failed to allocate qp_grp\n");
+		return ERR_PTR(qp_grp ? PTR_ERR(qp_grp) : -ENOMEM);
+	}
+
+	return qp_grp;
+}
+
+static void qp_grp_destroy(struct usnic_ib_qp_grp *qp_grp)
+{
+	struct usnic_ib_vf *vf = qp_grp->vf;
+
+	WARN_ON(qp_grp->state != IB_QPS_RESET);
+
+	spin_lock(&vf->lock);
+	usnic_ib_qp_grp_destroy(qp_grp);
+	spin_unlock(&vf->lock);
+}
+
+static void eth_speed_to_ib_speed(int speed, u8 *active_speed,
+					u8 *active_width)
+{
+	if (speed <= 10000) {
+		*active_width = IB_WIDTH_1X;
+		*active_speed = IB_SPEED_FDR10;
+	} else if (speed <= 20000) {
+		*active_width = IB_WIDTH_4X;
+		*active_speed = IB_SPEED_DDR;
+	} else if (speed <= 30000) {
+		*active_width = IB_WIDTH_4X;
+		*active_speed = IB_SPEED_QDR;
+	} else if (speed <= 40000) {
+		*active_width = IB_WIDTH_4X;
+		*active_speed = IB_SPEED_FDR10;
+	} else {
+		*active_width = IB_WIDTH_4X;
+		*active_speed = IB_SPEED_EDR;
+	}
+}
+
+static int create_qp_validate_user_data(struct usnic_ib_create_qp_cmd cmd)
+{
+	if (cmd.spec.trans_type <= USNIC_TRANSPORT_UNKNOWN ||
+			cmd.spec.trans_type >= USNIC_TRANSPORT_MAX)
+		return -EINVAL;
+
+	return 0;
+}
+
+/* Start of ib callback functions */
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+						u8 port_num)
+{
+	return IB_LINK_LAYER_ETHERNET;
+}
+
+int usnic_ib_query_device(struct ib_device *ibdev,
+				struct ib_device_attr *props)
+{
+	struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+	union ib_gid gid;
+	struct ethtool_drvinfo info;
+	struct ethtool_cmd cmd;
+	int qp_per_vf;
+
+	usnic_dbg("\n");
+	mutex_lock(&us_ibdev->usdev_lock);
+	us_ibdev->netdev->ethtool_ops->get_drvinfo(us_ibdev->netdev, &info);
+	us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
+	memset(props, 0, sizeof(*props));
+	usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+			&gid.raw[0]);
+	memcpy(&props->sys_image_guid, &gid.global.interface_id,
+		sizeof(gid.global.interface_id));
+	usnic_ib_fw_string_to_u64(&info.fw_version[0], &props->fw_ver);
+	props->max_mr_size = USNIC_UIOM_MAX_MR_SIZE;
+	props->page_size_cap = USNIC_UIOM_PAGE_SIZE;
+	props->vendor_id = PCI_VENDOR_ID_CISCO;
+	props->vendor_part_id = PCI_DEVICE_ID_CISCO_VIC_USPACE_NIC;
+	props->hw_ver = us_ibdev->pdev->subsystem_device;
+	qp_per_vf = max(us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_WQ],
+			us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_RQ]);
+	props->max_qp = qp_per_vf *
+		atomic_read(&us_ibdev->vf_cnt.refcount);
+	props->device_cap_flags = IB_DEVICE_PORT_ACTIVE_EVENT |
+		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
+	props->max_cq = us_ibdev->vf_res_cnt[USNIC_VNIC_RES_TYPE_CQ] *
+		atomic_read(&us_ibdev->vf_cnt.refcount);
+	props->max_pd = USNIC_UIOM_MAX_PD_CNT;
+	props->max_mr = USNIC_UIOM_MAX_MR_CNT;
+	props->local_ca_ack_delay = 0;
+	props->max_pkeys = 0;
+	props->atomic_cap = IB_ATOMIC_NONE;
+	props->masked_atomic_cap = props->atomic_cap;
+	props->max_qp_rd_atom = 0;
+	props->max_qp_init_rd_atom = 0;
+	props->max_res_rd_atom = 0;
+	props->max_srq = 0;
+	props->max_srq_wr = 0;
+	props->max_srq_sge = 0;
+	props->max_fast_reg_page_list_len = 0;
+	props->max_mcast_grp = 0;
+	props->max_mcast_qp_attach = 0;
+	props->max_total_mcast_qp_attach = 0;
+	props->max_map_per_fmr = 0;
+	/* Owned by Userspace
+	 * max_qp_wr, max_sge, max_sge_rd, max_cqe */
+	mutex_unlock(&us_ibdev->usdev_lock);
+
+	return 0;
+}
+
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+				struct ib_port_attr *props)
+{
+	struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+	struct ethtool_cmd cmd;
+
+	usnic_dbg("\n");
+
+	mutex_lock(&us_ibdev->usdev_lock);
+	us_ibdev->netdev->ethtool_ops->get_settings(us_ibdev->netdev, &cmd);
+	memset(props, 0, sizeof(*props));
+
+	props->lid = 0;
+	props->lmc = 1;
+	props->sm_lid = 0;
+	props->sm_sl = 0;
+
+	if (!us_ibdev->ufdev->link_up) {
+		props->state = IB_PORT_DOWN;
+		props->phys_state = 3;
+	} else if (!us_ibdev->ufdev->inaddr) {
+		props->state = IB_PORT_INIT;
+		props->phys_state = 4;
+	} else {
+		props->state = IB_PORT_ACTIVE;
+		props->phys_state = 5;
+	}
+
+	props->port_cap_flags = 0;
+	props->gid_tbl_len = 1;
+	props->pkey_tbl_len = 1;
+	props->bad_pkey_cntr = 0;
+	props->qkey_viol_cntr = 0;
+	eth_speed_to_ib_speed(cmd.speed, &props->active_speed,
+				&props->active_width);
+	props->max_mtu = IB_MTU_4096;
+	props->active_mtu = iboe_get_mtu(us_ibdev->ufdev->mtu);
+	/* Userspace will adjust for hdrs */
+	props->max_msg_sz = us_ibdev->ufdev->mtu;
+	props->max_vl_num = 1;
+	mutex_unlock(&us_ibdev->usdev_lock);
+
+	return 0;
+}
+
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+				int qp_attr_mask,
+				struct ib_qp_init_attr *qp_init_attr)
+{
+	struct usnic_ib_qp_grp *qp_grp;
+	struct usnic_ib_vf *vf;
+	int err;
+
+	usnic_dbg("\n");
+
+	memset(qp_attr, 0, sizeof(*qp_attr));
+	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
+
+	qp_grp = to_uqp_grp(qp);
+	vf = qp_grp->vf;
+	mutex_lock(&vf->pf->usdev_lock);
+	usnic_dbg("\n");
+	qp_attr->qp_state = qp_grp->state;
+	qp_attr->cur_qp_state = qp_grp->state;
+
+	switch (qp_grp->ibqp.qp_type) {
+	case IB_QPT_UD:
+		qp_attr->qkey = 0;
+		break;
+	default:
+		usnic_err("Unexpected qp_type %d\n", qp_grp->ibqp.qp_type);
+		err = -EINVAL;
+		goto err_out;
+	}
+
+	mutex_unlock(&vf->pf->usdev_lock);
+	return 0;
+
+err_out:
+	mutex_unlock(&vf->pf->usdev_lock);
+	return err;
+}
+
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+				union ib_gid *gid)
+{
+
+	struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+	usnic_dbg("\n");
+
+	if (index > 1)
+		return -EINVAL;
+
+	mutex_lock(&us_ibdev->usdev_lock);
+	memset(&(gid->raw[0]), 0, sizeof(gid->raw));
+	usnic_mac_ip_to_gid(us_ibdev->ufdev->mac, us_ibdev->ufdev->inaddr,
+			&gid->raw[0]);
+	mutex_unlock(&us_ibdev->usdev_lock);
+
+	return 0;
+}
+
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+				u16 *pkey)
+{
+	if (index > 1)
+		return -EINVAL;
+
+	*pkey = 0xffff;
+	return 0;
+}
+
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+					struct ib_ucontext *context,
+					struct ib_udata *udata)
+{
+	struct usnic_ib_pd *pd;
+	void *umem_pd;
+
+	usnic_dbg("\n");
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	umem_pd = pd->umem_pd = usnic_uiom_alloc_pd();
+	if (IS_ERR_OR_NULL(umem_pd)) {
+		kfree(pd);
+		return ERR_PTR(umem_pd ? PTR_ERR(umem_pd) : -ENOMEM);
+	}
+
+	usnic_info("domain 0x%p allocated for context 0x%p and device %s\n",
+			pd, context, ibdev->name);
+	return &pd->ibpd;
+}
+
+int usnic_ib_dealloc_pd(struct ib_pd *pd)
+{
+	usnic_info("freeing domain 0x%p\n", pd);
+
+	usnic_uiom_dealloc_pd((to_upd(pd))->umem_pd);
+	kfree(pd);
+	return 0;
+}
+
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+					struct ib_qp_init_attr *init_attr,
+					struct ib_udata *udata)
+{
+	int err;
+	struct usnic_ib_dev *us_ibdev;
+	struct usnic_ib_qp_grp *qp_grp;
+	struct usnic_ib_ucontext *ucontext;
+	int cq_cnt;
+	struct usnic_vnic_res_spec res_spec;
+	struct usnic_ib_create_qp_cmd cmd;
+	struct usnic_transport_spec trans_spec;
+
+	usnic_dbg("\n");
+
+	ucontext = to_uucontext(pd->uobject->context);
+	us_ibdev = to_usdev(pd->device);
+
+	err = ib_copy_from_udata(&cmd, udata, sizeof(cmd));
+	if (err) {
+		usnic_err("%s: cannot copy udata for create_qp\n",
+				us_ibdev->ib_dev.name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	err = create_qp_validate_user_data(cmd);
+	if (err) {
+		usnic_err("%s: Failed to validate user data\n",
+				us_ibdev->ib_dev.name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (init_attr->qp_type != IB_QPT_UD) {
+		usnic_err("%s asked to make a non-UD QP: %d\n",
+				us_ibdev->ib_dev.name, init_attr->qp_type);
+		return ERR_PTR(-EINVAL);
+	}
+
+	trans_spec = cmd.spec;
+	mutex_lock(&us_ibdev->usdev_lock);
+	cq_cnt = (init_attr->send_cq == init_attr->recv_cq) ? 1 : 2;
+	res_spec = min_transport_spec[trans_spec.trans_type];
+	usnic_vnic_res_spec_update(&res_spec, USNIC_VNIC_RES_TYPE_CQ, cq_cnt);
+	qp_grp = find_free_vf_and_create_qp_grp(us_ibdev, to_upd(pd),
+						&trans_spec,
+						&res_spec);
+	if (IS_ERR_OR_NULL(qp_grp)) {
+		err = qp_grp ? PTR_ERR(qp_grp) : -ENOMEM;
+		goto out_release_mutex;
+	}
+
+	err = usnic_ib_fill_create_qp_resp(qp_grp, udata);
+	if (err) {
+		err = -EBUSY;
+		goto out_release_qp_grp;
+	}
+
+	qp_grp->ctx = ucontext;
+	list_add_tail(&qp_grp->link, &ucontext->qp_grp_list);
+	usnic_ib_log_vf(qp_grp->vf);
+	mutex_unlock(&us_ibdev->usdev_lock);
+	return &qp_grp->ibqp;
+
+out_release_qp_grp:
+	qp_grp_destroy(qp_grp);
+out_release_mutex:
+	mutex_unlock(&us_ibdev->usdev_lock);
+	return ERR_PTR(err);
+}
+
+int usnic_ib_destroy_qp(struct ib_qp *qp)
+{
+	struct usnic_ib_qp_grp *qp_grp;
+	struct usnic_ib_vf *vf;
+
+	usnic_dbg("\n");
+
+	qp_grp = to_uqp_grp(qp);
+	vf = qp_grp->vf;
+	mutex_lock(&vf->pf->usdev_lock);
+	if (usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RESET, NULL)) {
+		usnic_err("Failed to move qp grp %u to reset\n",
+				qp_grp->grp_id);
+	}
+
+	list_del(&qp_grp->link);
+	qp_grp_destroy(qp_grp);
+	mutex_unlock(&vf->pf->usdev_lock);
+
+	return 0;
+}
+
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+				int attr_mask, struct ib_udata *udata)
+{
+	struct usnic_ib_qp_grp *qp_grp;
+	int status;
+	usnic_dbg("\n");
+
+	qp_grp = to_uqp_grp(ibqp);
+
+	/* TODO: Future Support All States */
+	mutex_lock(&qp_grp->vf->pf->usdev_lock);
+	if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_INIT) {
+		status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_INIT, NULL);
+	} else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTR) {
+		status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTR, NULL);
+	} else if ((attr_mask & IB_QP_STATE) && attr->qp_state == IB_QPS_RTS) {
+		status = usnic_ib_qp_grp_modify(qp_grp, IB_QPS_RTS, NULL);
+	} else {
+		usnic_err("Unexpected combination mask: %u state: %u\n",
+				attr_mask & IB_QP_STATE, attr->qp_state);
+		status = -EINVAL;
+	}
+
+	mutex_unlock(&qp_grp->vf->pf->usdev_lock);
+	return status;
+}
+
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
+					int vector, struct ib_ucontext *context,
+					struct ib_udata *udata)
+{
+	struct ib_cq *cq;
+
+	usnic_dbg("\n");
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
+	if (!cq)
+		return ERR_PTR(-EBUSY);
+
+	return cq;
+}
+
+int usnic_ib_destroy_cq(struct ib_cq *cq)
+{
+	usnic_dbg("\n");
+	kfree(cq);
+	return 0;
+}
+
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+					u64 virt_addr, int access_flags,
+					struct ib_udata *udata)
+{
+	struct usnic_ib_mr *mr;
+	int err;
+
+	usnic_dbg("start 0x%llx va 0x%llx length 0x%llx\n", start,
+			virt_addr, length);
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (IS_ERR_OR_NULL(mr))
+		return ERR_PTR(mr ? PTR_ERR(mr) : -ENOMEM);
+
+	mr->umem = usnic_uiom_reg_get(to_upd(pd)->umem_pd, start, length,
+					access_flags, 0);
+	if (IS_ERR_OR_NULL(mr->umem)) {
+		err = mr->umem ? PTR_ERR(mr->umem) : -EFAULT;
+		goto err_free;
+	}
+
+	mr->ibmr.lkey = mr->ibmr.rkey = 0;
+	return &mr->ibmr;
+
+err_free:
+	kfree(mr);
+	return ERR_PTR(err);
+}
+
+int usnic_ib_dereg_mr(struct ib_mr *ibmr)
+{
+	struct usnic_ib_mr *mr = to_umr(ibmr);
+
+	usnic_dbg("va 0x%lx length 0x%zx\n", mr->umem->va, mr->umem->length);
+
+	usnic_uiom_reg_release(mr->umem, ibmr->pd->uobject->context->closing);
+	kfree(mr);
+	return 0;
+}
+
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+							struct ib_udata *udata)
+{
+	struct usnic_ib_ucontext *context;
+	struct usnic_ib_dev *us_ibdev = to_usdev(ibdev);
+	usnic_dbg("\n");
+
+	context = kmalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return ERR_PTR(-ENOMEM);
+
+	INIT_LIST_HEAD(&context->qp_grp_list);
+	mutex_lock(&us_ibdev->usdev_lock);
+	list_add_tail(&context->link, &us_ibdev->ctx_list);
+	mutex_unlock(&us_ibdev->usdev_lock);
+
+	return &context->ibucontext;
+}
+
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+	struct usnic_ib_ucontext *context = to_uucontext(ibcontext);
+	struct usnic_ib_dev *us_ibdev = to_usdev(ibcontext->device);
+	usnic_dbg("\n");
+
+	mutex_lock(&us_ibdev->usdev_lock);
+	BUG_ON(!list_empty(&context->qp_grp_list));
+	list_del(&context->link);
+	mutex_unlock(&us_ibdev->usdev_lock);
+	kfree(context);
+	return 0;
+}
+
+int usnic_ib_mmap(struct ib_ucontext *context,
+				struct vm_area_struct *vma)
+{
+	struct usnic_ib_ucontext *uctx = to_ucontext(context);
+	struct usnic_ib_dev *us_ibdev;
+	struct usnic_ib_qp_grp *qp_grp;
+	struct usnic_ib_vf *vf;
+	struct vnic_dev_bar *bar;
+	dma_addr_t bus_addr;
+	unsigned int len;
+	unsigned int vfid;
+
+	usnic_dbg("\n");
+
+	us_ibdev = to_usdev(context->device);
+	vma->vm_flags |= VM_IO;
+	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+	vfid = vma->vm_pgoff;
+	usnic_dbg("Page Offset %lu PAGE_SHIFT %u VFID %u\n",
+			vma->vm_pgoff, PAGE_SHIFT, vfid);
+
+	mutex_lock(&us_ibdev->usdev_lock);
+	list_for_each_entry(qp_grp, &uctx->qp_grp_list, link) {
+		vf = qp_grp->vf;
+		if (usnic_vnic_get_index(vf->vnic) == vfid) {
+			bar = usnic_vnic_get_bar(vf->vnic, 0);
+			if ((vma->vm_end - vma->vm_start) != bar->len) {
+				usnic_err("Bar0 Len %lu - Request map %lu\n",
+						bar->len,
+						vma->vm_end - vma->vm_start);
+				mutex_unlock(&us_ibdev->usdev_lock);
+				return -EINVAL;
+			}
+			bus_addr = bar->bus_addr;
+			len = bar->len;
+			usnic_dbg("bus: %pa vaddr: %p size: %ld\n",
+					&bus_addr, bar->vaddr, bar->len);
+			mutex_unlock(&us_ibdev->usdev_lock);
+
+			return remap_pfn_range(vma,
+						vma->vm_start,
+						bus_addr >> PAGE_SHIFT,
+						len, vma->vm_page_prot);
+		}
+	}
+
+	mutex_unlock(&us_ibdev->usdev_lock);
+	usnic_err("No VF %u found\n", vfid);
+	return -EINVAL;
+}
+
+/* In ib callbacks section -  Start of stub funcs */
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+					struct ib_ah_attr *ah_attr)
+{
+	usnic_dbg("\n");
+	return ERR_PTR(-EPERM);
+}
+
+int usnic_ib_destroy_ah(struct ib_ah *ah)
+{
+	usnic_dbg("\n");
+	return -EINVAL;
+}
+
+int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+				struct ib_send_wr **bad_wr)
+{
+	usnic_dbg("\n");
+	return -EINVAL;
+}
+
+int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+				struct ib_recv_wr **bad_wr)
+{
+	usnic_dbg("\n");
+	return -EINVAL;
+}
+
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+				struct ib_wc *wc)
+{
+	usnic_dbg("\n");
+	return -EINVAL;
+}
+
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+					enum ib_cq_notify_flags flags)
+{
+	usnic_dbg("\n");
+	return -EINVAL;
+}
+
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc)
+{
+	usnic_dbg("\n");
+	return ERR_PTR(-ENOMEM);
+}
+
+
+/* In ib callbacks section - End of stub funcs */
+/* End of ib callbacks section */

+ 72 - 0
drivers/infiniband/hw/usnic/usnic_ib_verbs.h

@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_IB_VERBS_H_
+#define USNIC_IB_VERBS_H_
+
+#include "usnic_ib.h"
+
+enum rdma_link_layer usnic_ib_port_link_layer(struct ib_device *device,
+						u8 port_num);
+int usnic_ib_query_device(struct ib_device *ibdev,
+				struct ib_device_attr *props);
+int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
+				struct ib_port_attr *props);
+int usnic_ib_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+				int qp_attr_mask,
+				struct ib_qp_init_attr *qp_init_attr);
+int usnic_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
+				union ib_gid *gid);
+int usnic_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+				u16 *pkey);
+struct ib_pd *usnic_ib_alloc_pd(struct ib_device *ibdev,
+				struct ib_ucontext *context,
+				struct ib_udata *udata);
+int usnic_ib_dealloc_pd(struct ib_pd *pd);
+struct ib_qp *usnic_ib_create_qp(struct ib_pd *pd,
+					struct ib_qp_init_attr *init_attr,
+					struct ib_udata *udata);
+int usnic_ib_destroy_qp(struct ib_qp *qp);
+int usnic_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+				int attr_mask, struct ib_udata *udata);
+struct ib_cq *usnic_ib_create_cq(struct ib_device *ibdev, int entries,
+					int vector, struct ib_ucontext *context,
+					struct ib_udata *udata);
+int usnic_ib_destroy_cq(struct ib_cq *cq);
+struct ib_mr *usnic_ib_reg_mr(struct ib_pd *pd, u64 start, u64 length,
+				u64 virt_addr, int access_flags,
+				struct ib_udata *udata);
+int usnic_ib_dereg_mr(struct ib_mr *ibmr);
+struct ib_ucontext *usnic_ib_alloc_ucontext(struct ib_device *ibdev,
+						struct ib_udata *udata);
+int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+int usnic_ib_mmap(struct ib_ucontext *context,
+			struct vm_area_struct *vma);
+struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd,
+					struct ib_ah_attr *ah_attr);
+int usnic_ib_destroy_ah(struct ib_ah *ah);
+int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+			struct ib_send_wr **bad_wr);
+int usnic_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+			struct ib_recv_wr **bad_wr);
+int usnic_ib_poll_cq(struct ib_cq *ibcq, int num_entries,
+			struct ib_wc *wc);
+int usnic_ib_req_notify_cq(struct ib_cq *cq,
+				enum ib_cq_notify_flags flags);
+struct ib_mr *usnic_ib_get_dma_mr(struct ib_pd *pd, int acc);
+#endif /* !USNIC_IB_VERBS_H */

+ 58 - 0
drivers/infiniband/hw/usnic/usnic_log.h

@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_LOG_H_
+#define USNIC_LOG_H_
+
+#include "usnic.h"
+
+extern unsigned int usnic_log_lvl;
+
+#define USNIC_LOG_LVL_NONE		(0)
+#define USNIC_LOG_LVL_ERR		(1)
+#define USNIC_LOG_LVL_INFO		(2)
+#define USNIC_LOG_LVL_DBG		(3)
+
+#define usnic_printk(lvl, args...) \
+	do { \
+		printk(lvl "%s:%s:%d: ", DRV_NAME, __func__, \
+				__LINE__); \
+		printk(args); \
+	} while (0)
+
+#define usnic_dbg(args...) \
+	do { \
+		if (unlikely(usnic_log_lvl >= USNIC_LOG_LVL_DBG)) { \
+			usnic_printk(KERN_INFO, args); \
+	} \
+} while (0)
+
+#define usnic_info(args...) \
+do { \
+	if (usnic_log_lvl >= USNIC_LOG_LVL_INFO) { \
+			usnic_printk(KERN_INFO, args); \
+	} \
+} while (0)
+
+#define usnic_err(args...) \
+	do { \
+		if (usnic_log_lvl >= USNIC_LOG_LVL_ERR) { \
+			usnic_printk(KERN_ERR, args); \
+		} \
+	} while (0)
+#endif /* !USNIC_LOG_H_ */

+ 202 - 0
drivers/infiniband/hw/usnic/usnic_transport.c

@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/bitmap.h>
+#include <linux/file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <net/inet_sock.h>
+
+#include "usnic_transport.h"
+#include "usnic_log.h"
+
+/* ROCE */
+static unsigned long *roce_bitmap;
+static u16 roce_next_port = 1;
+#define ROCE_BITMAP_SZ ((1 << (8 /*CHAR_BIT*/ * sizeof(u16)))/8 /*CHAR BIT*/)
+static DEFINE_SPINLOCK(roce_bitmap_lock);
+
+const char *usnic_transport_to_str(enum usnic_transport_type type)
+{
+	switch (type) {
+	case USNIC_TRANSPORT_UNKNOWN:
+		return "Unknown";
+	case USNIC_TRANSPORT_ROCE_CUSTOM:
+		return "roce custom";
+	case USNIC_TRANSPORT_IPV4_UDP:
+		return "IPv4 UDP";
+	case USNIC_TRANSPORT_MAX:
+		return "Max?";
+	default:
+		return "Not known";
+	}
+}
+
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+					struct socket *sock)
+{
+	int err;
+	uint32_t addr;
+	uint16_t port;
+	int proto;
+
+	memset(buf, 0, buf_sz);
+	err = usnic_transport_sock_get_addr(sock, &proto, &addr, &port);
+	if (err)
+		return 0;
+
+	return scnprintf(buf, buf_sz, "Proto:%u Addr:%pI4h Port:%hu",
+			proto, &addr, port);
+}
+
+/*
+ * reserve a port number.  if "0" specified, we will try to pick one
+ * starting at roce_next_port.  roce_next_port will take on the values
+ * 1..4096
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+	if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+		spin_lock(&roce_bitmap_lock);
+		if (!port_num) {
+			port_num = bitmap_find_next_zero_area(roce_bitmap,
+						ROCE_BITMAP_SZ,
+						roce_next_port /* start */,
+						1 /* nr */,
+						0 /* align */);
+			roce_next_port = (port_num & 4095) + 1;
+		} else if (test_bit(port_num, roce_bitmap)) {
+			usnic_err("Failed to allocate port for %s\n",
+					usnic_transport_to_str(type));
+			spin_unlock(&roce_bitmap_lock);
+			goto out_fail;
+		}
+		bitmap_set(roce_bitmap, port_num, 1);
+		spin_unlock(&roce_bitmap_lock);
+	} else {
+		usnic_err("Failed to allocate port - transport %s unsupported\n",
+				usnic_transport_to_str(type));
+		goto out_fail;
+	}
+
+	usnic_dbg("Allocating port %hu for %s\n", port_num,
+			usnic_transport_to_str(type));
+	return port_num;
+
+out_fail:
+	return 0;
+}
+
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num)
+{
+	if (type == USNIC_TRANSPORT_ROCE_CUSTOM) {
+		spin_lock(&roce_bitmap_lock);
+		if (!port_num) {
+			usnic_err("Unreserved unvalid port num 0 for %s\n",
+					usnic_transport_to_str(type));
+			goto out_roce_custom;
+		}
+
+		if (!test_bit(port_num, roce_bitmap)) {
+			usnic_err("Unreserving invalid %hu for %s\n",
+					port_num,
+					usnic_transport_to_str(type));
+			goto out_roce_custom;
+		}
+		bitmap_clear(roce_bitmap, port_num, 1);
+		usnic_dbg("Freeing port %hu for %s\n", port_num,
+				usnic_transport_to_str(type));
+out_roce_custom:
+		spin_unlock(&roce_bitmap_lock);
+	} else {
+		usnic_err("Freeing invalid port %hu for %d\n", port_num, type);
+	}
+}
+
+struct socket *usnic_transport_get_socket(int sock_fd)
+{
+	struct socket *sock;
+	int err;
+	char buf[25];
+
+	/* sockfd_lookup will internally do a fget */
+	sock = sockfd_lookup(sock_fd, &err);
+	if (!sock) {
+		usnic_err("Unable to lookup socket for fd %d with err %d\n",
+				sock_fd, err);
+		return ERR_PTR(-ENOENT);
+	}
+
+	usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+	usnic_dbg("Get sock %s\n", buf);
+
+	return sock;
+}
+
+void usnic_transport_put_socket(struct socket *sock)
+{
+	char buf[100];
+
+	usnic_transport_sock_to_str(buf, sizeof(buf), sock);
+	usnic_dbg("Put sock %s\n", buf);
+	sockfd_put(sock);
+}
+
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+					uint32_t *addr, uint16_t *port)
+{
+	int len;
+	int err;
+	struct sockaddr_in sock_addr;
+
+	err = sock->ops->getname(sock,
+				(struct sockaddr *)&sock_addr,
+				&len, 0);
+	if (err)
+		return err;
+
+	if (sock_addr.sin_family != AF_INET)
+		return -EINVAL;
+
+	if (proto)
+		*proto = sock->sk->sk_protocol;
+	if (port)
+		*port = ntohs(((struct sockaddr_in *)&sock_addr)->sin_port);
+	if (addr)
+		*addr = ntohl(((struct sockaddr_in *)
+					&sock_addr)->sin_addr.s_addr);
+
+	return 0;
+}
+
+int usnic_transport_init(void)
+{
+	roce_bitmap = kzalloc(ROCE_BITMAP_SZ, GFP_KERNEL);
+	if (!roce_bitmap) {
+		usnic_err("Failed to allocate bit map");
+		return -ENOMEM;
+	}
+
+	/* Do not ever allocate bit 0, hence set it here */
+	bitmap_set(roce_bitmap, 0, 1);
+	return 0;
+}
+
+void usnic_transport_fini(void)
+{
+	kfree(roce_bitmap);
+}

+ 51 - 0
drivers/infiniband/hw/usnic/usnic_transport.h

@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_TRANSPORT_H_
+#define USNIC_TRANSPORT_H_
+
+#include "usnic_abi.h"
+
+const char *usnic_transport_to_str(enum usnic_transport_type trans_type);
+/*
+ * Returns number of bytes written, excluding null terminator. If
+ * nothing was written, the function returns 0.
+ */
+int usnic_transport_sock_to_str(char *buf, int buf_sz,
+					struct socket *sock);
+/*
+ * Reserve a port. If "port_num" is set, then the function will try
+ * to reserve that particular port.
+ */
+u16 usnic_transport_rsrv_port(enum usnic_transport_type type, u16 port_num);
+void usnic_transport_unrsrv_port(enum usnic_transport_type type, u16 port_num);
+/*
+ * Do a fget on the socket refered to by sock_fd and returns the socket.
+ * Socket will not be destroyed before usnic_transport_put_socket has
+ * been called.
+ */
+struct socket *usnic_transport_get_socket(int sock_fd);
+void usnic_transport_put_socket(struct socket *sock);
+/*
+ * Call usnic_transport_get_socket before calling *_sock_get_addr
+ */
+int usnic_transport_sock_get_addr(struct socket *sock, int *proto,
+					uint32_t *addr, uint16_t *port);
+int usnic_transport_init(void);
+void usnic_transport_fini(void);
+#endif /* !USNIC_TRANSPORT_H */

+ 604 - 0
drivers/infiniband/hw/usnic/usnic_uiom.c

@@ -0,0 +1,604 @@
+/*
+ * Copyright (c) 2005 Topspin Communications.  All rights reserved.
+ * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2013 Cisco Systems.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched.h>
+#include <linux/hugetlb.h>
+#include <linux/dma-attrs.h>
+#include <linux/iommu.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+
+#include "usnic_log.h"
+#include "usnic_uiom.h"
+#include "usnic_uiom_interval_tree.h"
+
+static struct workqueue_struct *usnic_uiom_wq;
+
+#define USNIC_UIOM_PAGE_CHUNK						\
+	((PAGE_SIZE - offsetof(struct usnic_uiom_chunk, page_list))	/\
+	((void *) &((struct usnic_uiom_chunk *) 0)->page_list[1] -	\
+	(void *) &((struct usnic_uiom_chunk *) 0)->page_list[0]))
+
+static void usnic_uiom_reg_account(struct work_struct *work)
+{
+	struct usnic_uiom_reg *umem = container_of(work,
+						struct usnic_uiom_reg, work);
+
+	down_write(&umem->mm->mmap_sem);
+	umem->mm->locked_vm -= umem->diff;
+	up_write(&umem->mm->mmap_sem);
+	mmput(umem->mm);
+	kfree(umem);
+}
+
+static int usnic_uiom_dma_fault(struct iommu_domain *domain,
+				struct device *dev,
+				unsigned long iova, int flags,
+				void *token)
+{
+	usnic_err("Device %s iommu fault domain 0x%pK va 0x%lx flags 0x%x\n",
+		dev_name(dev),
+		domain, iova, flags);
+	return -ENOSYS;
+}
+
+static void usnic_uiom_put_pages(struct list_head *chunk_list, int dirty)
+{
+	struct usnic_uiom_chunk *chunk, *tmp;
+	struct page *page;
+	struct scatterlist *sg;
+	int i;
+	dma_addr_t pa;
+
+	list_for_each_entry_safe(chunk, tmp, chunk_list, list) {
+		for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+			page = sg_page(sg);
+			pa = sg_phys(sg);
+			if (dirty)
+				set_page_dirty_lock(page);
+			put_page(page);
+			usnic_dbg("pa: %pa\n", &pa);
+		}
+		kfree(chunk);
+	}
+}
+
+static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
+				int dmasync, struct list_head *chunk_list)
+{
+	struct page **page_list;
+	struct scatterlist *sg;
+	struct usnic_uiom_chunk *chunk;
+	unsigned long locked;
+	unsigned long lock_limit;
+	unsigned long cur_base;
+	unsigned long npages;
+	int ret;
+	int off;
+	int i;
+	int flags;
+	dma_addr_t pa;
+	DEFINE_DMA_ATTRS(attrs);
+
+	if (dmasync)
+		dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
+
+	if (!can_do_mlock())
+		return -EPERM;
+
+	INIT_LIST_HEAD(chunk_list);
+
+	page_list = (struct page **) __get_free_page(GFP_KERNEL);
+	if (!page_list)
+		return -ENOMEM;
+
+	npages = PAGE_ALIGN(size + (addr & ~PAGE_MASK)) >> PAGE_SHIFT;
+
+	down_write(&current->mm->mmap_sem);
+
+	locked = npages + current->mm->locked_vm;
+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	flags = IOMMU_READ | IOMMU_CACHE;
+	flags |= (writable) ? IOMMU_WRITE : 0;
+	cur_base = addr & PAGE_MASK;
+	ret = 0;
+
+	while (npages) {
+		ret = get_user_pages(current, current->mm, cur_base,
+					min_t(unsigned long, npages,
+					PAGE_SIZE / sizeof(struct page *)),
+					1, !writable, page_list, NULL);
+
+		if (ret < 0)
+			goto out;
+
+		npages -= ret;
+		off = 0;
+
+		while (ret) {
+			chunk = kmalloc(sizeof(*chunk) +
+					sizeof(struct scatterlist) *
+					min_t(int, ret, USNIC_UIOM_PAGE_CHUNK),
+					GFP_KERNEL);
+			if (!chunk) {
+				ret = -ENOMEM;
+				goto out;
+			}
+
+			chunk->nents = min_t(int, ret, USNIC_UIOM_PAGE_CHUNK);
+			sg_init_table(chunk->page_list, chunk->nents);
+			for_each_sg(chunk->page_list, sg, chunk->nents, i) {
+				sg_set_page(sg, page_list[i + off],
+						PAGE_SIZE, 0);
+				pa = sg_phys(sg);
+				usnic_dbg("va: 0x%lx pa: %pa\n",
+						cur_base + i*PAGE_SIZE, &pa);
+			}
+			cur_base += chunk->nents * PAGE_SIZE;
+			ret -= chunk->nents;
+			off += chunk->nents;
+			list_add_tail(&chunk->list, chunk_list);
+		}
+
+		ret = 0;
+	}
+
+out:
+	if (ret < 0)
+		usnic_uiom_put_pages(chunk_list, 0);
+	else
+		current->mm->locked_vm = locked;
+
+	up_write(&current->mm->mmap_sem);
+	free_page((unsigned long) page_list);
+	return ret;
+}
+
+static void usnic_uiom_unmap_sorted_intervals(struct list_head *intervals,
+						struct usnic_uiom_pd *pd)
+{
+	struct usnic_uiom_interval_node *interval, *tmp;
+	long unsigned va, size;
+
+	list_for_each_entry_safe(interval, tmp, intervals, link) {
+		va = interval->start << PAGE_SHIFT;
+		size = ((interval->last - interval->start) + 1) << PAGE_SHIFT;
+		while (size > 0) {
+			/* Workaround for RH 970401 */
+			usnic_dbg("va 0x%lx size 0x%lx", va, PAGE_SIZE);
+			iommu_unmap(pd->domain, va, PAGE_SIZE);
+			va += PAGE_SIZE;
+			size -= PAGE_SIZE;
+		}
+	}
+}
+
+static void __usnic_uiom_reg_release(struct usnic_uiom_pd *pd,
+					struct usnic_uiom_reg *uiomr,
+					int dirty)
+{
+	int npages;
+	unsigned long vpn_start, vpn_last;
+	struct usnic_uiom_interval_node *interval, *tmp;
+	int writable = 0;
+	LIST_HEAD(rm_intervals);
+
+	npages = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+	vpn_start = (uiomr->va & PAGE_MASK) >> PAGE_SHIFT;
+	vpn_last = vpn_start + npages - 1;
+
+	spin_lock(&pd->lock);
+	usnic_uiom_remove_interval(&pd->rb_root, vpn_start,
+					vpn_last, &rm_intervals);
+	usnic_uiom_unmap_sorted_intervals(&rm_intervals, pd);
+
+	list_for_each_entry_safe(interval, tmp, &rm_intervals, link) {
+		if (interval->flags & IOMMU_WRITE)
+			writable = 1;
+		list_del(&interval->link);
+		kfree(interval);
+	}
+
+	usnic_uiom_put_pages(&uiomr->chunk_list, dirty & writable);
+	spin_unlock(&pd->lock);
+}
+
+static int usnic_uiom_map_sorted_intervals(struct list_head *intervals,
+						struct usnic_uiom_reg *uiomr)
+{
+	int i, err;
+	size_t size;
+	struct usnic_uiom_chunk *chunk;
+	struct usnic_uiom_interval_node *interval_node;
+	dma_addr_t pa;
+	dma_addr_t pa_start = 0;
+	dma_addr_t pa_end = 0;
+	long int va_start = -EINVAL;
+	struct usnic_uiom_pd *pd = uiomr->pd;
+	long int va = uiomr->va & PAGE_MASK;
+	int flags = IOMMU_READ | IOMMU_CACHE;
+
+	flags |= (uiomr->writable) ? IOMMU_WRITE : 0;
+	chunk = list_first_entry(&uiomr->chunk_list, struct usnic_uiom_chunk,
+									list);
+	list_for_each_entry(interval_node, intervals, link) {
+iter_chunk:
+		for (i = 0; i < chunk->nents; i++, va += PAGE_SIZE) {
+			pa = sg_phys(&chunk->page_list[i]);
+			if ((va >> PAGE_SHIFT) < interval_node->start)
+				continue;
+
+			if ((va >> PAGE_SHIFT) == interval_node->start) {
+				/* First page of the interval */
+				va_start = va;
+				pa_start = pa;
+				pa_end = pa;
+			}
+
+			WARN_ON(va_start == -EINVAL);
+
+			if ((pa_end + PAGE_SIZE != pa) &&
+					(pa != pa_start)) {
+				/* PAs are not contiguous */
+				size = pa_end - pa_start + PAGE_SIZE;
+				usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x",
+					va_start, &pa_start, size, flags);
+				err = iommu_map(pd->domain, va_start, pa_start,
+							size, flags);
+				if (err) {
+					usnic_err("Failed to map va 0x%lx pa 0x%pa size 0x%zx with err %d\n",
+						va_start, &pa_start, size, err);
+					goto err_out;
+				}
+				va_start = va;
+				pa_start = pa;
+				pa_end = pa;
+			}
+
+			if ((va >> PAGE_SHIFT) == interval_node->last) {
+				/* Last page of the interval */
+				size = pa - pa_start + PAGE_SIZE;
+				usnic_dbg("va 0x%lx pa %pa size 0x%zx flags 0x%x\n",
+					va_start, &pa_start, size, flags);
+				err = iommu_map(pd->domain, va_start, pa_start,
+						size, flags);
+				if (err) {
+					usnic_err("Failed to map va 0x%lx pa %pa size 0x%zx with err %d\n",
+						va_start, &pa_start, size, err);
+					goto err_out;
+				}
+				break;
+			}
+
+			if (pa != pa_start)
+				pa_end += PAGE_SIZE;
+		}
+
+		if (i == chunk->nents) {
+			/*
+			 * Hit last entry of the chunk,
+			 * hence advance to next chunk
+			 */
+			chunk = list_first_entry(&chunk->list,
+							struct usnic_uiom_chunk,
+							list);
+			goto iter_chunk;
+		}
+	}
+
+	return 0;
+
+err_out:
+	usnic_uiom_unmap_sorted_intervals(intervals, pd);
+	return err;
+}
+
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+						unsigned long addr, size_t size,
+						int writable, int dmasync)
+{
+	struct usnic_uiom_reg *uiomr;
+	unsigned long va_base, vpn_start, vpn_last;
+	unsigned long npages;
+	int offset, err;
+	LIST_HEAD(sorted_diff_intervals);
+
+	/*
+	 * Intel IOMMU map throws an error if a translation entry is
+	 * changed from read to write.  This module may not unmap
+	 * and then remap the entry after fixing the permission
+	 * b/c this open up a small windows where hw DMA may page fault
+	 * Hence, make all entries to be writable.
+	 */
+	writable = 1;
+
+	va_base = addr & PAGE_MASK;
+	offset = addr & ~PAGE_MASK;
+	npages = PAGE_ALIGN(size + offset) >> PAGE_SHIFT;
+	vpn_start = (addr & PAGE_MASK) >> PAGE_SHIFT;
+	vpn_last = vpn_start + npages - 1;
+
+	uiomr = kmalloc(sizeof(*uiomr), GFP_KERNEL);
+	if (!uiomr)
+		return ERR_PTR(-ENOMEM);
+
+	uiomr->va = va_base;
+	uiomr->offset = offset;
+	uiomr->length = size;
+	uiomr->writable = writable;
+	uiomr->pd = pd;
+
+	err = usnic_uiom_get_pages(addr, size, writable, dmasync,
+					&uiomr->chunk_list);
+	if (err) {
+		usnic_err("Failed get_pages vpn [0x%lx,0x%lx] err %d\n",
+				vpn_start, vpn_last, err);
+		goto out_free_uiomr;
+	}
+
+	spin_lock(&pd->lock);
+	err = usnic_uiom_get_intervals_diff(vpn_start, vpn_last,
+						(writable) ? IOMMU_WRITE : 0,
+						IOMMU_WRITE,
+						&pd->rb_root,
+						&sorted_diff_intervals);
+	if (err) {
+		usnic_err("Failed disjoint interval vpn [0x%lx,0x%lx] err %d\n",
+						vpn_start, vpn_last, err);
+		goto out_put_pages;
+	}
+
+	err = usnic_uiom_map_sorted_intervals(&sorted_diff_intervals, uiomr);
+	if (err) {
+		usnic_err("Failed map interval vpn [0x%lx,0x%lx] err %d\n",
+						vpn_start, vpn_last, err);
+		goto out_put_intervals;
+
+	}
+
+	err = usnic_uiom_insert_interval(&pd->rb_root, vpn_start, vpn_last,
+					(writable) ? IOMMU_WRITE : 0);
+	if (err) {
+		usnic_err("Failed insert interval vpn [0x%lx,0x%lx] err %d\n",
+						vpn_start, vpn_last, err);
+		goto out_unmap_intervals;
+	}
+
+	usnic_uiom_put_interval_set(&sorted_diff_intervals);
+	spin_unlock(&pd->lock);
+
+	return uiomr;
+
+out_unmap_intervals:
+	usnic_uiom_unmap_sorted_intervals(&sorted_diff_intervals, pd);
+out_put_intervals:
+	usnic_uiom_put_interval_set(&sorted_diff_intervals);
+out_put_pages:
+	usnic_uiom_put_pages(&uiomr->chunk_list, 0);
+	spin_unlock(&pd->lock);
+out_free_uiomr:
+	kfree(uiomr);
+	return ERR_PTR(err);
+}
+
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing)
+{
+	struct mm_struct *mm;
+	unsigned long diff;
+
+	__usnic_uiom_reg_release(uiomr->pd, uiomr, 1);
+
+	mm = get_task_mm(current);
+	if (!mm) {
+		kfree(uiomr);
+		return;
+	}
+
+	diff = PAGE_ALIGN(uiomr->length + uiomr->offset) >> PAGE_SHIFT;
+
+	/*
+	 * We may be called with the mm's mmap_sem already held.  This
+	 * can happen when a userspace munmap() is the call that drops
+	 * the last reference to our file and calls our release
+	 * method.  If there are memory regions to destroy, we'll end
+	 * up here and not be able to take the mmap_sem.  In that case
+	 * we defer the vm_locked accounting to the system workqueue.
+	 */
+	if (closing) {
+		if (!down_write_trylock(&mm->mmap_sem)) {
+			INIT_WORK(&uiomr->work, usnic_uiom_reg_account);
+			uiomr->mm = mm;
+			uiomr->diff = diff;
+
+			queue_work(usnic_uiom_wq, &uiomr->work);
+			return;
+		}
+	} else
+		down_write(&mm->mmap_sem);
+
+	current->mm->locked_vm -= diff;
+	up_write(&mm->mmap_sem);
+	mmput(mm);
+	kfree(uiomr);
+}
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void)
+{
+	struct usnic_uiom_pd *pd;
+	void *domain;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	pd->domain = domain = iommu_domain_alloc(&pci_bus_type);
+	if (IS_ERR_OR_NULL(domain)) {
+		usnic_err("Failed to allocate IOMMU domain with err %ld\n",
+				PTR_ERR(pd->domain));
+		kfree(pd);
+		return ERR_PTR(domain ? PTR_ERR(domain) : -ENOMEM);
+	}
+
+	iommu_set_fault_handler(pd->domain, usnic_uiom_dma_fault, NULL);
+
+	spin_lock_init(&pd->lock);
+	INIT_LIST_HEAD(&pd->devs);
+
+	return pd;
+}
+
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd)
+{
+	iommu_domain_free(pd->domain);
+	kfree(pd);
+}
+
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+	struct usnic_uiom_dev *uiom_dev;
+	int err;
+
+	uiom_dev = kzalloc(sizeof(*uiom_dev), GFP_ATOMIC);
+	if (!uiom_dev)
+		return -ENOMEM;
+	uiom_dev->dev = dev;
+
+	err = iommu_attach_device(pd->domain, dev);
+	if (err)
+		goto out_free_dev;
+
+	if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) {
+		usnic_err("IOMMU of %s does not support cache coherency\n",
+				dev_name(dev));
+		err = -EINVAL;
+		goto out_detach_device;
+	}
+
+	spin_lock(&pd->lock);
+	list_add_tail(&uiom_dev->link, &pd->devs);
+	pd->dev_cnt++;
+	spin_unlock(&pd->lock);
+
+	return 0;
+
+out_detach_device:
+	iommu_detach_device(pd->domain, dev);
+out_free_dev:
+	kfree(uiom_dev);
+	return err;
+}
+
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd, struct device *dev)
+{
+	struct usnic_uiom_dev *uiom_dev;
+	int found = 0;
+
+	spin_lock(&pd->lock);
+	list_for_each_entry(uiom_dev, &pd->devs, link) {
+		if (uiom_dev->dev == dev) {
+			found = 1;
+			break;
+		}
+	}
+
+	if (!found) {
+		usnic_err("Unable to free dev %s - not found\n",
+				dev_name(dev));
+		spin_unlock(&pd->lock);
+		return;
+	}
+
+	list_del(&uiom_dev->link);
+	pd->dev_cnt--;
+	spin_unlock(&pd->lock);
+
+	return iommu_detach_device(pd->domain, dev);
+}
+
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd)
+{
+	struct usnic_uiom_dev *uiom_dev;
+	struct device **devs;
+	int i = 0;
+
+	spin_lock(&pd->lock);
+	devs = kcalloc(pd->dev_cnt + 1, sizeof(*devs), GFP_ATOMIC);
+	if (!devs) {
+		devs = ERR_PTR(-ENOMEM);
+		goto out;
+	}
+
+	list_for_each_entry(uiom_dev, &pd->devs, link) {
+		devs[i++] = uiom_dev->dev;
+	}
+out:
+	spin_unlock(&pd->lock);
+	return devs;
+}
+
+void usnic_uiom_free_dev_list(struct device **devs)
+{
+	kfree(devs);
+}
+
+int usnic_uiom_init(char *drv_name)
+{
+	if (!iommu_present(&pci_bus_type)) {
+		usnic_err("IOMMU required but not present or enabled.  USNIC QPs will not function w/o enabling IOMMU\n");
+		return -EPERM;
+	}
+
+	usnic_uiom_wq = create_workqueue(drv_name);
+	if (!usnic_uiom_wq) {
+		usnic_err("Unable to alloc wq for drv %s\n", drv_name);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void usnic_uiom_fini(void)
+{
+	flush_workqueue(usnic_uiom_wq);
+	destroy_workqueue(usnic_uiom_wq);
+}

+ 80 - 0
drivers/infiniband/hw/usnic/usnic_uiom.h

@@ -0,0 +1,80 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_H_
+#define USNIC_UIOM_H_
+
+#include <linux/list.h>
+#include <linux/scatterlist.h>
+
+#include "usnic_uiom_interval_tree.h"
+
+#define USNIC_UIOM_READ			(1)
+#define USNIC_UIOM_WRITE		(2)
+
+#define USNIC_UIOM_MAX_PD_CNT		(1000)
+#define USNIC_UIOM_MAX_MR_CNT		(1000000)
+#define USNIC_UIOM_MAX_MR_SIZE		(~0UL)
+#define USNIC_UIOM_PAGE_SIZE		(PAGE_SIZE)
+
+struct usnic_uiom_dev {
+	struct device			*dev;
+	struct list_head		link;
+};
+
+struct usnic_uiom_pd {
+	struct iommu_domain		*domain;
+	spinlock_t			lock;
+	struct rb_root			rb_root;
+	struct list_head		devs;
+	int				dev_cnt;
+};
+
+struct usnic_uiom_reg {
+	struct usnic_uiom_pd		*pd;
+	unsigned long			va;
+	size_t				length;
+	int				offset;
+	int				page_size;
+	int				writable;
+	struct list_head		chunk_list;
+	struct work_struct		work;
+	struct mm_struct		*mm;
+	unsigned long			diff;
+};
+
+struct usnic_uiom_chunk {
+	struct list_head		list;
+	int				nents;
+	struct scatterlist		page_list[0];
+};
+
+struct usnic_uiom_pd *usnic_uiom_alloc_pd(void);
+void usnic_uiom_dealloc_pd(struct usnic_uiom_pd *pd);
+int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev);
+void usnic_uiom_detach_dev_from_pd(struct usnic_uiom_pd *pd,
+					struct device *dev);
+struct device **usnic_uiom_get_dev_list(struct usnic_uiom_pd *pd);
+void usnic_uiom_free_dev_list(struct device **devs);
+struct usnic_uiom_reg *usnic_uiom_reg_get(struct usnic_uiom_pd *pd,
+						unsigned long addr, size_t size,
+						int access, int dmasync);
+void usnic_uiom_reg_release(struct usnic_uiom_reg *uiomr, int closing);
+int usnic_uiom_init(char *drv_name);
+void usnic_uiom_fini(void);
+#endif /* USNIC_UIOM_H_ */

+ 236 - 0
drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.c

@@ -0,0 +1,236 @@
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/list_sort.h>
+
+#include <linux/interval_tree_generic.h>
+#include "usnic_uiom_interval_tree.h"
+
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+#define MAKE_NODE(node, start, end, ref_cnt, flags, err, err_out)	\
+		do {							\
+			node = usnic_uiom_interval_node_alloc(start,	\
+					end, ref_cnt, flags);		\
+				if (!node) {				\
+					err = -ENOMEM;			\
+					goto err_out;			\
+				}					\
+		} while (0)
+
+#define MARK_FOR_ADD(node, list) (list_add_tail(&node->link, list))
+
+#define MAKE_NODE_AND_APPEND(node, start, end, ref_cnt, flags, err,	\
+				err_out, list)				\
+				do {					\
+					MAKE_NODE(node, start, end,	\
+						ref_cnt, flags, err,	\
+						err_out);		\
+					MARK_FOR_ADD(node, list);	\
+				} while (0)
+
+#define FLAGS_EQUAL(flags1, flags2, mask)				\
+			(((flags1) & (mask)) == ((flags2) & (mask)))
+
+static struct usnic_uiom_interval_node*
+usnic_uiom_interval_node_alloc(long int start, long int last, int ref_cnt,
+				int flags)
+{
+	struct usnic_uiom_interval_node *interval = kzalloc(sizeof(*interval),
+								GFP_ATOMIC);
+	if (!interval)
+		return NULL;
+
+	interval->start = start;
+	interval->last = last;
+	interval->flags = flags;
+	interval->ref_cnt = ref_cnt;
+
+	return interval;
+}
+
+static int interval_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct usnic_uiom_interval_node *node_a, *node_b;
+
+	node_a = list_entry(a, struct usnic_uiom_interval_node, link);
+	node_b = list_entry(b, struct usnic_uiom_interval_node, link);
+
+	/* long to int */
+	if (node_a->start < node_b->start)
+		return -1;
+	else if (node_a->start > node_b->start)
+		return 1;
+
+	return 0;
+}
+
+static void
+find_intervals_intersection_sorted(struct rb_root *root, unsigned long start,
+					unsigned long last,
+					struct list_head *list)
+{
+	struct usnic_uiom_interval_node *node;
+
+	INIT_LIST_HEAD(list);
+
+	for (node = usnic_uiom_interval_tree_iter_first(root, start, last);
+		node;
+		node = usnic_uiom_interval_tree_iter_next(node, start, last))
+		list_add_tail(&node->link, list);
+
+	list_sort(NULL, list, interval_cmp);
+}
+
+int usnic_uiom_get_intervals_diff(unsigned long start, unsigned long last,
+					int flags, int flag_mask,
+					struct rb_root *root,
+					struct list_head *diff_set)
+{
+	struct usnic_uiom_interval_node *interval, *tmp;
+	int err = 0;
+	long int pivot = start;
+	LIST_HEAD(intersection_set);
+
+	INIT_LIST_HEAD(diff_set);
+
+	find_intervals_intersection_sorted(root, start, last,
+						&intersection_set);
+
+	list_for_each_entry(interval, &intersection_set, link) {
+		if (pivot < interval->start) {
+			MAKE_NODE_AND_APPEND(tmp, pivot, interval->start - 1,
+						1, flags, err, err_out,
+						diff_set);
+			pivot = interval->start;
+		}
+
+		/*
+		 * Invariant: Set [start, pivot] is either in diff_set or root,
+		 * but not in both.
+		 */
+
+		if (pivot > interval->last) {
+			continue;
+		} else if (pivot <= interval->last &&
+				FLAGS_EQUAL(interval->flags, flags,
+				flag_mask)) {
+			pivot = interval->last + 1;
+		}
+	}
+
+	if (pivot <= last)
+		MAKE_NODE_AND_APPEND(tmp, pivot, last, 1, flags, err, err_out,
+					diff_set);
+
+	return 0;
+
+err_out:
+	list_for_each_entry_safe(interval, tmp, diff_set, link) {
+		list_del(&interval->link);
+		kfree(interval);
+	}
+
+	return err;
+}
+
+void usnic_uiom_put_interval_set(struct list_head *intervals)
+{
+	struct usnic_uiom_interval_node *interval, *tmp;
+	list_for_each_entry_safe(interval, tmp, intervals, link)
+		kfree(interval);
+}
+
+int usnic_uiom_insert_interval(struct rb_root *root, unsigned long start,
+				unsigned long last, int flags)
+{
+	struct usnic_uiom_interval_node *interval, *tmp;
+	unsigned long istart, ilast;
+	int iref_cnt, iflags;
+	unsigned long lpivot = start;
+	int err = 0;
+	LIST_HEAD(to_add);
+	LIST_HEAD(intersection_set);
+
+	find_intervals_intersection_sorted(root, start, last,
+						&intersection_set);
+
+	list_for_each_entry(interval, &intersection_set, link) {
+		/*
+		 * Invariant - lpivot is the left edge of next interval to be
+		 * inserted
+		 */
+		istart = interval->start;
+		ilast = interval->last;
+		iref_cnt = interval->ref_cnt;
+		iflags = interval->flags;
+
+		if (istart < lpivot) {
+			MAKE_NODE_AND_APPEND(tmp, istart, lpivot - 1, iref_cnt,
+						iflags, err, err_out, &to_add);
+		} else if (istart > lpivot) {
+			MAKE_NODE_AND_APPEND(tmp, lpivot, istart - 1, 1, flags,
+						err, err_out, &to_add);
+			lpivot = istart;
+		} else {
+			lpivot = istart;
+		}
+
+		if (ilast > last) {
+			MAKE_NODE_AND_APPEND(tmp, lpivot, last, iref_cnt + 1,
+						iflags | flags, err, err_out,
+						&to_add);
+			MAKE_NODE_AND_APPEND(tmp, last + 1, ilast, iref_cnt,
+						iflags, err, err_out, &to_add);
+		} else {
+			MAKE_NODE_AND_APPEND(tmp, lpivot, ilast, iref_cnt + 1,
+						iflags | flags, err, err_out,
+						&to_add);
+		}
+
+		lpivot = ilast + 1;
+	}
+
+	if (lpivot <= last)
+		MAKE_NODE_AND_APPEND(tmp, lpivot, last, 1, flags, err, err_out,
+					&to_add);
+
+	list_for_each_entry_safe(interval, tmp, &intersection_set, link) {
+		usnic_uiom_interval_tree_remove(interval, root);
+		kfree(interval);
+	}
+
+	list_for_each_entry(interval, &to_add, link)
+		usnic_uiom_interval_tree_insert(interval, root);
+
+	return 0;
+
+err_out:
+	list_for_each_entry_safe(interval, tmp, &to_add, link)
+		kfree(interval);
+
+	return err;
+}
+
+void usnic_uiom_remove_interval(struct rb_root *root, unsigned long start,
+				unsigned long last, struct list_head *removed)
+{
+	struct usnic_uiom_interval_node *interval;
+
+	for (interval = usnic_uiom_interval_tree_iter_first(root, start, last);
+			interval;
+			interval = usnic_uiom_interval_tree_iter_next(interval,
+									start,
+									last)) {
+		if (--interval->ref_cnt == 0)
+			list_add_tail(&interval->link, removed);
+	}
+
+	list_for_each_entry(interval, removed, link)
+		usnic_uiom_interval_tree_remove(interval, root);
+}
+
+INTERVAL_TREE_DEFINE(struct usnic_uiom_interval_node, rb,
+			unsigned long, __subtree_last,
+			START, LAST, , usnic_uiom_interval_tree)

+ 73 - 0
drivers/infiniband/hw/usnic/usnic_uiom_interval_tree.h

@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_UIOM_INTERVAL_TREE_H_
+#define USNIC_UIOM_INTERVAL_TREE_H_
+
+#include <linux/rbtree.h>
+
+struct usnic_uiom_interval_node {
+	struct rb_node			rb;
+	struct list_head		link;
+	unsigned long			start;
+	unsigned long			last;
+	unsigned long			__subtree_last;
+	unsigned int			ref_cnt;
+	int				flags;
+};
+
+extern void
+usnic_uiom_interval_tree_insert(struct usnic_uiom_interval_node *node,
+					struct rb_root *root);
+extern void
+usnic_uiom_interval_tree_remove(struct usnic_uiom_interval_node *node,
+					struct rb_root *root);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_first(struct rb_root *root,
+					unsigned long start,
+					unsigned long last);
+extern struct usnic_uiom_interval_node *
+usnic_uiom_interval_tree_iter_next(struct usnic_uiom_interval_node *node,
+			unsigned long start, unsigned long last);
+/*
+ * Inserts {start...last} into {root}.  If there are overlaps,
+ * nodes will be broken up and merged
+ */
+int usnic_uiom_insert_interval(struct rb_root *root,
+				unsigned long start, unsigned long last,
+				int flags);
+/*
+ * Removed {start...last} from {root}.  The nodes removed are returned in
+ * 'removed.' The caller is responsibile for freeing memory of nodes in
+ * 'removed.'
+ */
+void usnic_uiom_remove_interval(struct rb_root *root,
+				unsigned long start, unsigned long last,
+				struct list_head *removed);
+/*
+ * Returns {start...last} - {root} (relative complement of {start...last} in
+ * {root}) in diff_set sorted ascendingly
+ */
+int usnic_uiom_get_intervals_diff(unsigned long start,
+					unsigned long last, int flags,
+					int flag_mask,
+					struct rb_root *root,
+					struct list_head *diff_set);
+/* Call this to free diff_set returned by usnic_uiom_get_intervals_diff */
+void usnic_uiom_put_interval_set(struct list_head *intervals);
+#endif /* USNIC_UIOM_INTERVAL_TREE_H_ */

+ 467 - 0
drivers/infiniband/hw/usnic/usnic_vnic.c

@@ -0,0 +1,467 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "usnic_ib.h"
+#include "vnic_resource.h"
+#include "usnic_log.h"
+#include "usnic_vnic.h"
+
+struct usnic_vnic {
+	struct vnic_dev			*vdev;
+	struct vnic_dev_bar		bar[PCI_NUM_RESOURCES];
+	struct usnic_vnic_res_chunk	chunks[USNIC_VNIC_RES_TYPE_MAX];
+	spinlock_t			res_lock;
+};
+
+static enum vnic_res_type _to_vnic_res_type(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+		vnic_res_type,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+		vnic_res_type,
+	static enum vnic_res_type usnic_vnic_type_2_vnic_type[] = {
+						USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+	if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+		return RES_TYPE_MAX;
+
+	return usnic_vnic_type_2_vnic_type[res_type];
+}
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type)
+{
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+		desc,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+		desc,
+	static const char * const usnic_vnic_res_type_desc[] = {
+						USNIC_VNIC_RES_TYPES};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+	if (res_type >= USNIC_VNIC_RES_TYPE_MAX)
+		return "unknown";
+
+	return usnic_vnic_res_type_desc[res_type];
+
+}
+
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic)
+{
+	return pci_name(usnic_vnic_get_pdev(vnic));
+}
+
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf,
+			int buf_sz,
+			void *hdr_obj,
+			int (*printtitle)(void *, char*, int),
+			int (*printcols)(char *, int),
+			int (*printrow)(void *, char *, int))
+{
+	struct usnic_vnic_res_chunk *chunk;
+	struct usnic_vnic_res *res;
+	struct vnic_dev_bar *bar0;
+	int i, j, offset;
+
+	offset = 0;
+	bar0 = usnic_vnic_get_bar(vnic, 0);
+	offset += scnprintf(buf + offset, buf_sz - offset,
+			"VF:%hu BAR0 bus_addr=%pa vaddr=0x%p size=%ld ",
+			usnic_vnic_get_index(vnic),
+			&bar0->bus_addr,
+			bar0->vaddr, bar0->len);
+	if (printtitle)
+		offset += printtitle(hdr_obj, buf + offset, buf_sz - offset);
+	offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+	offset += scnprintf(buf + offset, buf_sz - offset,
+			"|RES\t|CTRL_PIN\t\t|IN_USE\t");
+	if (printcols)
+		offset += printcols(buf + offset, buf_sz - offset);
+	offset += scnprintf(buf + offset, buf_sz - offset, "\n");
+
+	spin_lock(&vnic->res_lock);
+	for (i = 0; i < ARRAY_SIZE(vnic->chunks); i++) {
+		chunk = &vnic->chunks[i];
+		for (j = 0; j < chunk->cnt; j++) {
+			res = chunk->res[j];
+			offset += scnprintf(buf + offset, buf_sz - offset,
+					"|%s[%u]\t|0x%p\t|%u\t",
+					usnic_vnic_res_type_to_str(res->type),
+					res->vnic_idx, res->ctrl, !!res->owner);
+			if (printrow) {
+				offset += printrow(res->owner, buf + offset,
+							buf_sz - offset);
+			}
+			offset += scnprintf(buf + offset, buf_sz - offset,
+						"\n");
+		}
+	}
+	spin_unlock(&vnic->res_lock);
+	return offset;
+}
+
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+				enum usnic_vnic_res_type trgt_type,
+				u16 cnt)
+{
+	int i;
+
+	for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+		if (spec->resources[i].type == trgt_type) {
+			spec->resources[i].cnt = cnt;
+			return;
+		}
+	}
+
+	WARN_ON(1);
+}
+
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+					struct usnic_vnic_res_spec *res_spec)
+{
+	int found, i, j;
+
+	for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+		found = 0;
+
+		for (j = 0; j < USNIC_VNIC_RES_TYPE_MAX; j++) {
+			if (res_spec->resources[i].type !=
+				min_spec->resources[i].type)
+				continue;
+			found = 1;
+			if (min_spec->resources[i].cnt >
+					res_spec->resources[i].cnt)
+				return -EINVAL;
+			break;
+		}
+
+		if (!found)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+				struct usnic_vnic_res_spec *res_spec)
+{
+	enum usnic_vnic_res_type res_type;
+	int res_cnt;
+	int i;
+	int offset = 0;
+
+	for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+		res_type = res_spec->resources[i].type;
+		res_cnt = res_spec->resources[i].cnt;
+		offset += scnprintf(buf + offset, buf_sz - offset,
+				"Res: %s Cnt: %d ",
+				usnic_vnic_res_type_to_str(res_type),
+				res_cnt);
+	}
+
+	return offset;
+}
+
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+				struct usnic_vnic_res_spec *res_spec)
+{
+	int i;
+	enum usnic_vnic_res_type res_type;
+	int res_cnt;
+
+	for (i = 0; i < USNIC_VNIC_RES_TYPE_MAX; i++) {
+		res_type = res_spec->resources[i].type;
+		res_cnt = res_spec->resources[i].cnt;
+
+		if (res_type == USNIC_VNIC_RES_TYPE_EOL)
+			break;
+
+		if (res_cnt > usnic_vnic_res_free_cnt(vnic, res_type))
+			return -EBUSY;
+	}
+
+	return 0;
+}
+
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+				enum usnic_vnic_res_type type)
+{
+	return vnic->chunks[type].cnt;
+}
+
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+				enum usnic_vnic_res_type type)
+{
+	return vnic->chunks[type].free_cnt;
+}
+
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic, enum usnic_vnic_res_type type,
+				int cnt, void *owner)
+{
+	struct usnic_vnic_res_chunk *src, *ret;
+	struct usnic_vnic_res *res;
+	int i;
+
+	if (usnic_vnic_res_free_cnt(vnic, type) < cnt || cnt < 1 || !owner)
+		return ERR_PTR(-EINVAL);
+
+	ret = kzalloc(sizeof(*ret), GFP_ATOMIC);
+	if (!ret) {
+		usnic_err("Failed to allocate chunk for %s - Out of memory\n",
+				usnic_vnic_pci_name(vnic));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret->res = kzalloc(sizeof(*(ret->res))*cnt, GFP_ATOMIC);
+	if (!ret->res) {
+		usnic_err("Failed to allocate resources for %s. Out of memory\n",
+				usnic_vnic_pci_name(vnic));
+		kfree(ret);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	spin_lock(&vnic->res_lock);
+	src = &vnic->chunks[type];
+	for (i = 0; i < src->cnt && ret->cnt < cnt; i++) {
+		res = src->res[i];
+		if (!res->owner) {
+			src->free_cnt--;
+			res->owner = owner;
+			ret->res[ret->cnt++] = res;
+		}
+	}
+
+	spin_unlock(&vnic->res_lock);
+	ret->type = type;
+	ret->vnic = vnic;
+	WARN_ON(ret->cnt != cnt);
+
+	return ret;
+}
+
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk)
+{
+
+	struct usnic_vnic_res *res;
+	int i;
+	struct usnic_vnic *vnic = chunk->vnic;
+
+	spin_lock(&vnic->res_lock);
+	while ((i = --chunk->cnt) >= 0) {
+		res = chunk->res[i];
+		chunk->res[i] = NULL;
+		res->owner = NULL;
+		vnic->chunks[res->type].free_cnt++;
+	}
+	spin_unlock(&vnic->res_lock);
+
+	kfree(chunk->res);
+	kfree(chunk);
+}
+
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic)
+{
+	return usnic_vnic_get_pdev(vnic)->devfn - 1;
+}
+
+static int usnic_vnic_alloc_res_chunk(struct usnic_vnic *vnic,
+					enum usnic_vnic_res_type type,
+					struct usnic_vnic_res_chunk *chunk)
+{
+	int cnt, err, i;
+	struct usnic_vnic_res *res;
+
+	cnt = vnic_dev_get_res_count(vnic->vdev, _to_vnic_res_type(type));
+	if (cnt < 1)
+		return -EINVAL;
+
+	chunk->cnt = chunk->free_cnt = cnt;
+	chunk->res = kzalloc(sizeof(*(chunk->res))*cnt, GFP_KERNEL);
+	if (!chunk->res)
+		return -ENOMEM;
+
+	for (i = 0; i < cnt; i++) {
+		res = kzalloc(sizeof(*res), GFP_KERNEL);
+		if (!res) {
+			err = -ENOMEM;
+			goto fail;
+		}
+		res->type = type;
+		res->vnic_idx = i;
+		res->vnic = vnic;
+		res->ctrl = vnic_dev_get_res(vnic->vdev,
+						_to_vnic_res_type(type), i);
+		chunk->res[i] = res;
+	}
+
+	chunk->vnic = vnic;
+	return 0;
+fail:
+	for (i--; i >= 0; i--)
+		kfree(chunk->res[i]);
+	kfree(chunk->res);
+	return err;
+}
+
+static void usnic_vnic_free_res_chunk(struct usnic_vnic_res_chunk *chunk)
+{
+	int i;
+	for (i = 0; i < chunk->cnt; i++)
+		kfree(chunk->res[i]);
+	kfree(chunk->res);
+}
+
+static int usnic_vnic_discover_resources(struct pci_dev *pdev,
+						struct usnic_vnic *vnic)
+{
+	enum usnic_vnic_res_type res_type;
+	int i;
+	int err = 0;
+
+	for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+		if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+			continue;
+		vnic->bar[i].len = pci_resource_len(pdev, i);
+		vnic->bar[i].vaddr = pci_iomap(pdev, i, vnic->bar[i].len);
+		if (!vnic->bar[i].vaddr) {
+			usnic_err("Cannot memory-map BAR %d, aborting\n",
+					i);
+			err = -ENODEV;
+			goto out_clean_bar;
+		}
+		vnic->bar[i].bus_addr = pci_resource_start(pdev, i);
+	}
+
+	vnic->vdev = vnic_dev_register(NULL, pdev, pdev, vnic->bar,
+			ARRAY_SIZE(vnic->bar));
+	if (!vnic->vdev) {
+		usnic_err("Failed to register device %s\n",
+				pci_name(pdev));
+		err = -EINVAL;
+		goto out_clean_bar;
+	}
+
+	for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+			res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++) {
+		err = usnic_vnic_alloc_res_chunk(vnic, res_type,
+						&vnic->chunks[res_type]);
+		if (err) {
+			usnic_err("Failed to alloc res %s with err %d\n",
+					usnic_vnic_res_type_to_str(res_type),
+					err);
+			goto out_clean_chunks;
+		}
+	}
+
+	return 0;
+
+out_clean_chunks:
+	for (res_type--; res_type > USNIC_VNIC_RES_TYPE_EOL; res_type--)
+		usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+	vnic_dev_unregister(vnic->vdev);
+out_clean_bar:
+	for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+		if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+			continue;
+		if (!vnic->bar[i].vaddr)
+			break;
+
+		iounmap(vnic->bar[i].vaddr);
+	}
+
+	return err;
+}
+
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic)
+{
+	return vnic_dev_get_pdev(vnic->vdev);
+}
+
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+				int bar_num)
+{
+	return (bar_num < ARRAY_SIZE(vnic->bar)) ? &vnic->bar[bar_num] : NULL;
+}
+
+static void usnic_vnic_release_resources(struct usnic_vnic *vnic)
+{
+	int i;
+	struct pci_dev *pdev;
+	enum usnic_vnic_res_type res_type;
+
+	pdev = usnic_vnic_get_pdev(vnic);
+
+	for (res_type = USNIC_VNIC_RES_TYPE_EOL + 1;
+			res_type < USNIC_VNIC_RES_TYPE_MAX; res_type++)
+		usnic_vnic_free_res_chunk(&vnic->chunks[res_type]);
+
+	vnic_dev_unregister(vnic->vdev);
+
+	for (i = 0; i < ARRAY_SIZE(vnic->bar); i++) {
+		if (!(pci_resource_flags(pdev, i) & IORESOURCE_MEM))
+			continue;
+		iounmap(vnic->bar[i].vaddr);
+	}
+}
+
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev)
+{
+	struct usnic_vnic *vnic;
+	int err = 0;
+
+	if (!pci_is_enabled(pdev)) {
+		usnic_err("PCI dev %s is disabled\n", pci_name(pdev));
+		return ERR_PTR(-EINVAL);
+	}
+
+	vnic = kzalloc(sizeof(*vnic), GFP_KERNEL);
+	if (!vnic) {
+		usnic_err("Failed to alloc vnic for %s - out of memory\n",
+				pci_name(pdev));
+		return ERR_PTR(-ENOMEM);
+	}
+
+	spin_lock_init(&vnic->res_lock);
+
+	err = usnic_vnic_discover_resources(pdev, vnic);
+	if (err) {
+		usnic_err("Failed to discover %s resources with err %d\n",
+				pci_name(pdev), err);
+		goto out_free_vnic;
+	}
+
+	usnic_dbg("Allocated vnic for %s\n", usnic_vnic_pci_name(vnic));
+
+	return vnic;
+
+out_free_vnic:
+	kfree(vnic);
+
+	return ERR_PTR(err);
+}
+
+void usnic_vnic_free(struct usnic_vnic *vnic)
+{
+	usnic_vnic_release_resources(vnic);
+	kfree(vnic);
+}

+ 103 - 0
drivers/infiniband/hw/usnic/usnic_vnic.h

@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2013, Cisco Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you may redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef USNIC_VNIC_H_
+#define USNIC_VNIC_H_
+
+#include <linux/pci.h>
+
+#include "vnic_dev.h"
+
+/*                      =USNIC_VNIC_RES_TYPE= =VNIC_RES=   =DESC= */
+#define USNIC_VNIC_RES_TYPES \
+	DEFINE_USNIC_VNIC_RES_AT(EOL, RES_TYPE_EOL, "EOL", 0) \
+	DEFINE_USNIC_VNIC_RES(WQ, RES_TYPE_WQ, "WQ") \
+	DEFINE_USNIC_VNIC_RES(RQ, RES_TYPE_RQ, "RQ") \
+	DEFINE_USNIC_VNIC_RES(CQ, RES_TYPE_CQ, "CQ") \
+	DEFINE_USNIC_VNIC_RES(INTR, RES_TYPE_INTR_CTRL, "INT") \
+	DEFINE_USNIC_VNIC_RES(MAX, RES_TYPE_MAX, "MAX")\
+
+#define DEFINE_USNIC_VNIC_RES_AT(usnic_vnic_res_t, vnic_res_type, desc, val) \
+	USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t = val,
+#define DEFINE_USNIC_VNIC_RES(usnic_vnic_res_t, vnic_res_type, desc) \
+	USNIC_VNIC_RES_TYPE_##usnic_vnic_res_t,
+enum usnic_vnic_res_type {
+	USNIC_VNIC_RES_TYPES
+};
+#undef DEFINE_USNIC_VNIC_RES
+#undef DEFINE_USNIC_VNIC_RES_AT
+
+struct usnic_vnic_res {
+	enum usnic_vnic_res_type	type;
+	unsigned int			vnic_idx;
+	struct usnic_vnic		*vnic;
+	void __iomem			*ctrl;
+	void				*owner;
+};
+
+struct usnic_vnic_res_chunk {
+	enum usnic_vnic_res_type	type;
+	int				cnt;
+	int				free_cnt;
+	struct usnic_vnic_res		**res;
+	struct usnic_vnic		*vnic;
+};
+
+struct usnic_vnic_res_desc {
+	enum usnic_vnic_res_type	type;
+	uint16_t			cnt;
+};
+
+struct usnic_vnic_res_spec {
+	struct usnic_vnic_res_desc resources[USNIC_VNIC_RES_TYPE_MAX];
+};
+
+const char *usnic_vnic_res_type_to_str(enum usnic_vnic_res_type res_type);
+const char *usnic_vnic_pci_name(struct usnic_vnic *vnic);
+int usnic_vnic_dump(struct usnic_vnic *vnic, char *buf, int buf_sz,
+			void *hdr_obj,
+			int (*printtitle)(void *, char*, int),
+			int (*printcols)(char *, int),
+			int (*printrow)(void *, char *, int));
+void usnic_vnic_res_spec_update(struct usnic_vnic_res_spec *spec,
+				enum usnic_vnic_res_type trgt_type,
+				u16 cnt);
+int usnic_vnic_res_spec_satisfied(const struct usnic_vnic_res_spec *min_spec,
+					struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_spec_dump(char *buf, int buf_sz,
+				struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_check_room(struct usnic_vnic *vnic,
+				struct usnic_vnic_res_spec *res_spec);
+int usnic_vnic_res_cnt(struct usnic_vnic *vnic,
+				enum usnic_vnic_res_type type);
+int usnic_vnic_res_free_cnt(struct usnic_vnic *vnic,
+				enum usnic_vnic_res_type type);
+struct usnic_vnic_res_chunk *
+usnic_vnic_get_resources(struct usnic_vnic *vnic,
+				enum usnic_vnic_res_type type,
+				int cnt,
+				void *owner);
+void usnic_vnic_put_resources(struct usnic_vnic_res_chunk *chunk);
+struct pci_dev *usnic_vnic_get_pdev(struct usnic_vnic *vnic);
+struct vnic_dev_bar *usnic_vnic_get_bar(struct usnic_vnic *vnic,
+				int bar_num);
+struct usnic_vnic *usnic_vnic_alloc(struct pci_dev *pdev);
+void usnic_vnic_free(struct usnic_vnic *vnic);
+u16 usnic_vnic_get_index(struct usnic_vnic *vnic);
+
+#endif /*!USNIC_VNIC_H_*/

+ 2 - 2
drivers/infiniband/ulp/ipoib/ipoib_main.c

@@ -104,6 +104,8 @@ int ipoib_open(struct net_device *dev)
 
 
 	ipoib_dbg(priv, "bringing up interface\n");
 	ipoib_dbg(priv, "bringing up interface\n");
 
 
+	netif_carrier_off(dev);
+
 	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
 
 	if (ipoib_pkey_dev_delay_open(dev))
 	if (ipoib_pkey_dev_delay_open(dev))
@@ -1366,8 +1368,6 @@ void ipoib_setup(struct net_device *dev)
 
 
 	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
 	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
 
 
-	netif_carrier_off(dev);
-
 	priv->dev = dev;
 	priv->dev = dev;
 
 
 	spin_lock_init(&priv->lock);
 	spin_lock_init(&priv->lock);

+ 3 - 0
drivers/infiniband/ulp/ipoib/ipoib_verbs.c

@@ -192,6 +192,9 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
 	if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
 	if (priv->hca_caps & IB_DEVICE_BLOCK_MULTICAST_LOOPBACK)
 		init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
 		init_attr.create_flags |= IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK;
 
 
+	if (priv->hca_caps & IB_DEVICE_MANAGED_FLOW_STEERING)
+		init_attr.create_flags |= IB_QP_CREATE_NETIF_QP;
+
 	if (dev->features & NETIF_F_SG)
 	if (dev->features & NETIF_F_SG)
 		init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
 		init_attr.cap.max_send_sge = MAX_SKB_FRAGS + 1;
 
 

+ 1 - 0
drivers/infiniband/ulp/srp/ib_srp.c

@@ -660,6 +660,7 @@ static void srp_remove_target(struct srp_target_port *target)
 	srp_rport_get(target->rport);
 	srp_rport_get(target->rport);
 	srp_remove_host(target->scsi_host);
 	srp_remove_host(target->scsi_host);
 	scsi_remove_host(target->scsi_host);
 	scsi_remove_host(target->scsi_host);
+	srp_stop_rport_timers(target->rport);
 	srp_disconnect_target(target);
 	srp_disconnect_target(target);
 	ib_destroy_cm_id(target->cm_id);
 	ib_destroy_cm_id(target->cm_id);
 	srp_free_target_ib(target);
 	srp_free_target_ib(target);

+ 9 - 0
drivers/net/ethernet/mellanox/mlx4/cmd.c

@@ -1371,6 +1371,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.verify = NULL,
 		.wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper
 		.wrapper = mlx4_QP_FLOW_STEERING_DETACH_wrapper
 	},
 	},
+	{
+		.opcode = MLX4_FLOW_STEERING_IB_UC_QP_RANGE,
+		.has_inbox = false,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper
+	},
 };
 };
 
 
 static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
 static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,

+ 10 - 0
drivers/net/ethernet/mellanox/mlx4/fw.c

@@ -513,6 +513,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_XRC_OFFSET		0x67
 #define QUERY_DEV_CAP_MAX_XRC_OFFSET		0x67
 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET	0x68
 #define QUERY_DEV_CAP_MAX_COUNTERS_OFFSET	0x68
 #define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET	0x70
 #define QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET	0x70
+#define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET	0x74
 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET	0x76
 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET	0x76
 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET	0x77
 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET	0x77
 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET	0x80
 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET	0x80
@@ -603,6 +604,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	if (field & 0x80)
 	if (field & 0x80)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FS_EN;
 	dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
 	dev_cap->fs_log_max_ucast_qp_range_size = field & 0x1f;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
+	if (field & 0x80)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_DMFS_IPOIB;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET);
 	dev_cap->fs_max_num_qp_per_entry = field;
 	dev_cap->fs_max_num_qp_per_entry = field;
 	MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
 	MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
@@ -860,6 +864,12 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		MLX4_PUT(outbox->buf, field,
 		MLX4_PUT(outbox->buf, field,
 			 QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
 			 QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET);
 	}
 	}
+
+	/* turn off ipoib managed steering for guests */
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
+	field &= ~0x80;
+	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET);
+
 	return 0;
 	return 0;
 }
 }
 
 

+ 17 - 0
drivers/net/ethernet/mellanox/mlx4/mcg.c

@@ -895,6 +895,23 @@ int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id)
 }
 }
 EXPORT_SYMBOL_GPL(mlx4_flow_detach);
 EXPORT_SYMBOL_GPL(mlx4_flow_detach);
 
 
+int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
+				      u32 max_range_qpn)
+{
+	int err;
+	u64 in_param;
+
+	in_param = ((u64) min_range_qpn) << 32;
+	in_param |= ((u64) max_range_qpn) & 0xFFFFFFFF;
+
+	err = mlx4_cmd(dev, in_param, 0, 0,
+			MLX4_FLOW_STEERING_IB_UC_QP_RANGE,
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_FLOW_STEERING_IB_UC_QP_RANGE);
+
 int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  int block_mcast_loopback, enum mlx4_protocol prot,
 			  int block_mcast_loopback, enum mlx4_protocol prot,
 			  enum mlx4_steer_type steer)
 			  enum mlx4_steer_type steer)

+ 5 - 0
drivers/net/ethernet/mellanox/mlx4/mlx4.h

@@ -1236,6 +1236,11 @@ int mlx4_QP_FLOW_STEERING_DETACH_wrapper(struct mlx4_dev *dev, int slave,
 					 struct mlx4_cmd_mailbox *inbox,
 					 struct mlx4_cmd_mailbox *inbox,
 					 struct mlx4_cmd_mailbox *outbox,
 					 struct mlx4_cmd_mailbox *outbox,
 					 struct mlx4_cmd_info *cmd);
 					 struct mlx4_cmd_info *cmd);
+int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave,
+					      struct mlx4_vhcr *vhcr,
+					      struct mlx4_cmd_mailbox *inbox,
+					      struct mlx4_cmd_mailbox *outbox,
+					      struct mlx4_cmd_info *cmd);
 
 
 int mlx4_get_mgm_entry_size(struct mlx4_dev *dev);
 int mlx4_get_mgm_entry_size(struct mlx4_dev *dev);
 int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);
 int mlx4_get_qp_per_mgm(struct mlx4_dev *dev);

+ 20 - 0
drivers/net/ethernet/mellanox/mlx4/port.c

@@ -123,6 +123,26 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
 	return err;
 	return err;
 }
 }
 
 
+int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+	struct mlx4_mac_table *table = &info->mac_table;
+	int i;
+
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if (!table->refs[i])
+			continue;
+
+		if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
+			*idx = i;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(mlx4_find_cached_mac);
+
 int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];

+ 10 - 0
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c

@@ -3844,6 +3844,16 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
 	return err;
 	return err;
 }
 }
 
 
+int mlx4_FLOW_STEERING_IB_UC_QP_RANGE_wrapper(struct mlx4_dev *dev, int slave,
+					      struct mlx4_vhcr *vhcr,
+					      struct mlx4_cmd_mailbox *inbox,
+					      struct mlx4_cmd_mailbox *outbox,
+					      struct mlx4_cmd_info *cmd)
+{
+	return -EPERM;
+}
+
+
 static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp)
 static void detach_qp(struct mlx4_dev *dev, int slave, struct res_qp *rqp)
 {
 {
 	struct res_gid *rgid;
 	struct res_gid *rgid;

+ 15 - 2
drivers/net/ethernet/mellanox/mlx5/core/cq.c

@@ -201,10 +201,23 @@ EXPORT_SYMBOL(mlx5_core_query_cq);
 
 
 
 
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-			int type, struct mlx5_cq_modify_params *params)
+			struct mlx5_modify_cq_mbox_in *in, int in_sz)
 {
 {
-	return -ENOSYS;
+	struct mlx5_modify_cq_mbox_out out;
+	int err;
+
+	memset(&out, 0, sizeof(out));
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ);
+	err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		return mlx5_cmd_status_to_err(&out.hdr);
+
+	return 0;
 }
 }
+EXPORT_SYMBOL(mlx5_core_modify_cq);
 
 
 int mlx5_init_cq_table(struct mlx5_core_dev *dev)
 int mlx5_init_cq_table(struct mlx5_core_dev *dev)
 {
 {

+ 33 - 6
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c

@@ -275,7 +275,7 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
 }
 }
 
 
 static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
 static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
-			 int index)
+			 int index, int *is_str)
 {
 {
 	struct mlx5_query_qp_mbox_out *out;
 	struct mlx5_query_qp_mbox_out *out;
 	struct mlx5_qp_context *ctx;
 	struct mlx5_qp_context *ctx;
@@ -293,19 +293,40 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
 		goto out;
 		goto out;
 	}
 	}
 
 
+	*is_str = 0;
 	ctx = &out->ctx;
 	ctx = &out->ctx;
 	switch (index) {
 	switch (index) {
 	case QP_PID:
 	case QP_PID:
 		param = qp->pid;
 		param = qp->pid;
 		break;
 		break;
 	case QP_STATE:
 	case QP_STATE:
-		param = be32_to_cpu(ctx->flags) >> 28;
+		param = (u64)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28);
+		*is_str = 1;
 		break;
 		break;
 	case QP_XPORT:
 	case QP_XPORT:
-		param = (be32_to_cpu(ctx->flags) >> 16) & 0xff;
+		param = (u64)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff);
+		*is_str = 1;
 		break;
 		break;
 	case QP_MTU:
 	case QP_MTU:
-		param = ctx->mtu_msgmax >> 5;
+		switch (ctx->mtu_msgmax >> 5) {
+		case IB_MTU_256:
+			param = 256;
+			break;
+		case IB_MTU_512:
+			param = 512;
+			break;
+		case IB_MTU_1024:
+			param = 1024;
+			break;
+		case IB_MTU_2048:
+			param = 2048;
+			break;
+		case IB_MTU_4096:
+			param = 4096;
+			break;
+		default:
+			param = 0;
+		}
 		break;
 		break;
 	case QP_N_RECV:
 	case QP_N_RECV:
 		param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
 		param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
@@ -414,6 +435,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
 	struct mlx5_field_desc *desc;
 	struct mlx5_field_desc *desc;
 	struct mlx5_rsc_debug *d;
 	struct mlx5_rsc_debug *d;
 	char tbuf[18];
 	char tbuf[18];
+	int is_str = 0;
 	u64 field;
 	u64 field;
 	int ret;
 	int ret;
 
 
@@ -424,7 +446,7 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
 	d = (void *)(desc - desc->i) - sizeof(*d);
 	d = (void *)(desc - desc->i) - sizeof(*d);
 	switch (d->type) {
 	switch (d->type) {
 	case MLX5_DBG_RSC_QP:
 	case MLX5_DBG_RSC_QP:
-		field = qp_read_field(d->dev, d->object, desc->i);
+		field = qp_read_field(d->dev, d->object, desc->i, &is_str);
 		break;
 		break;
 
 
 	case MLX5_DBG_RSC_EQ:
 	case MLX5_DBG_RSC_EQ:
@@ -440,7 +462,12 @@ static ssize_t dbg_read(struct file *filp, char __user *buf, size_t count,
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
+
+	if (is_str)
+		ret = snprintf(tbuf, sizeof(tbuf), "%s\n", (const char *)field);
+	else
+		ret = snprintf(tbuf, sizeof(tbuf), "0x%llx\n", field);
+
 	if (ret > 0) {
 	if (ret > 0) {
 		if (copy_to_user(buf, tbuf, ret))
 		if (copy_to_user(buf, tbuf, ret))
 			return -EFAULT;
 			return -EFAULT;

+ 8 - 2
drivers/net/ethernet/mellanox/mlx5/core/main.c

@@ -460,7 +460,10 @@ disable_msix:
 
 
 err_stop_poll:
 err_stop_poll:
 	mlx5_stop_health_poll(dev);
 	mlx5_stop_health_poll(dev);
-	mlx5_cmd_teardown_hca(dev);
+	if (mlx5_cmd_teardown_hca(dev)) {
+		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
+		return err;
+	}
 
 
 err_pagealloc_stop:
 err_pagealloc_stop:
 	mlx5_pagealloc_stop(dev);
 	mlx5_pagealloc_stop(dev);
@@ -503,7 +506,10 @@ void mlx5_dev_cleanup(struct mlx5_core_dev *dev)
 	mlx5_eq_cleanup(dev);
 	mlx5_eq_cleanup(dev);
 	mlx5_disable_msix(dev);
 	mlx5_disable_msix(dev);
 	mlx5_stop_health_poll(dev);
 	mlx5_stop_health_poll(dev);
-	mlx5_cmd_teardown_hca(dev);
+	if (mlx5_cmd_teardown_hca(dev)) {
+		dev_err(&dev->pdev->dev, "tear_down_hca failed, skip cleanup\n");
+		return;
+	}
 	mlx5_pagealloc_stop(dev);
 	mlx5_pagealloc_stop(dev);
 	mlx5_reclaim_startup_pages(dev);
 	mlx5_reclaim_startup_pages(dev);
 	mlx5_core_disable_hca(dev);
 	mlx5_core_disable_hca(dev);

+ 6 - 7
drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c

@@ -99,7 +99,7 @@ enum {
 
 
 enum {
 enum {
 	MLX5_MAX_RECLAIM_TIME_MILI	= 5000,
 	MLX5_MAX_RECLAIM_TIME_MILI	= 5000,
-	MLX5_NUM_4K_IN_PAGE		= PAGE_SIZE / 4096,
+	MLX5_NUM_4K_IN_PAGE		= PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
 };
 };
 
 
 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
 static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
@@ -192,10 +192,8 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
 	struct fw_page *fp;
 	struct fw_page *fp;
 	unsigned n;
 	unsigned n;
 
 
-	if (list_empty(&dev->priv.free_list)) {
+	if (list_empty(&dev->priv.free_list))
 		return -ENOMEM;
 		return -ENOMEM;
-		mlx5_core_warn(dev, "\n");
-	}
 
 
 	fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
 	fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
 	n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
 	n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
@@ -208,7 +206,7 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
 	if (!fp->free_count)
 	if (!fp->free_count)
 		list_del(&fp->list);
 		list_del(&fp->list);
 
 
-	*addr = fp->addr + n * 4096;
+	*addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -224,14 +222,15 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr)
 		return;
 		return;
 	}
 	}
 
 
-	n = (addr & ~PAGE_MASK) % 4096;
+	n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
 	fwp->free_count++;
 	fwp->free_count++;
 	set_bit(n, &fwp->bitmask);
 	set_bit(n, &fwp->bitmask);
 	if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
 	if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
 		rb_erase(&fwp->rb_node, &dev->priv.page_root);
 		rb_erase(&fwp->rb_node, &dev->priv.page_root);
 		if (fwp->free_count != 1)
 		if (fwp->free_count != 1)
 			list_del(&fwp->list);
 			list_del(&fwp->list);
-		dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE,
+			       DMA_BIDIRECTIONAL);
 		__free_page(fwp->page);
 		__free_page(fwp->page);
 		kfree(fwp);
 		kfree(fwp);
 	} else if (fwp->free_count == 1) {
 	} else if (fwp->free_count == 1) {

+ 1 - 1
drivers/net/ethernet/mellanox/mlx5/core/port.c

@@ -57,7 +57,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 	in->arg = cpu_to_be32(arg);
 	in->arg = cpu_to_be32(arg);
 	in->register_id = cpu_to_be16(reg_num);
 	in->register_id = cpu_to_be16(reg_num);
 	err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
 	err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out,
-			    sizeof(out) + size_out);
+			    sizeof(*out) + size_out);
 	if (err)
 	if (err)
 		goto ex2;
 		goto ex2;
 
 

+ 3 - 2
drivers/net/ethernet/mellanox/mlx5/core/qp.c

@@ -74,7 +74,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 	struct mlx5_destroy_qp_mbox_out dout;
 	struct mlx5_destroy_qp_mbox_out dout;
 	int err;
 	int err;
 
 
-	memset(&dout, 0, sizeof(dout));
+	memset(&out, 0, sizeof(out));
 	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
 	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP);
 
 
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
 	err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out));
@@ -84,7 +84,8 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 	}
 	}
 
 
 	if (out.hdr.status) {
 	if (out.hdr.status) {
-		pr_warn("current num of QPs 0x%x\n", atomic_read(&dev->num_qps));
+		mlx5_core_warn(dev, "current num of QPs 0x%x\n",
+			       atomic_read(&dev->num_qps));
 		return mlx5_cmd_status_to_err(&out.hdr);
 		return mlx5_cmd_status_to_err(&out.hdr);
 	}
 	}
 
 

+ 57 - 38
drivers/scsi/scsi_transport_srp.c

@@ -64,10 +64,14 @@ static inline struct Scsi_Host *rport_to_shost(struct srp_rport *r)
 
 
 /**
 /**
  * srp_tmo_valid() - check timeout combination validity
  * srp_tmo_valid() - check timeout combination validity
+ * @reconnect_delay: Reconnect delay in seconds.
+ * @fast_io_fail_tmo: Fast I/O fail timeout in seconds.
+ * @dev_loss_tmo: Device loss timeout in seconds.
  *
  *
  * The combination of the timeout parameters must be such that SCSI commands
  * The combination of the timeout parameters must be such that SCSI commands
  * are finished in a reasonable time. Hence do not allow the fast I/O fail
  * are finished in a reasonable time. Hence do not allow the fast I/O fail
- * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT. Furthermore, these
+ * timeout to exceed SCSI_DEVICE_BLOCK_MAX_TIMEOUT nor allow dev_loss_tmo to
+ * exceed that limit if failing I/O fast has been disabled. Furthermore, these
  * parameters must be such that multipath can detect failed paths timely.
  * parameters must be such that multipath can detect failed paths timely.
  * Hence do not allow all three parameters to be disabled simultaneously.
  * Hence do not allow all three parameters to be disabled simultaneously.
  */
  */
@@ -79,6 +83,9 @@ int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo, int dev_loss_tmo)
 		return -EINVAL;
 		return -EINVAL;
 	if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
 	if (fast_io_fail_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
 		return -EINVAL;
 		return -EINVAL;
+	if (fast_io_fail_tmo < 0 &&
+	    dev_loss_tmo > SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
+		return -EINVAL;
 	if (dev_loss_tmo >= LONG_MAX / HZ)
 	if (dev_loss_tmo >= LONG_MAX / HZ)
 		return -EINVAL;
 		return -EINVAL;
 	if (fast_io_fail_tmo >= 0 && dev_loss_tmo >= 0 &&
 	if (fast_io_fail_tmo >= 0 && dev_loss_tmo >= 0 &&
@@ -368,6 +375,7 @@ invalid:
 
 
 /**
 /**
  * srp_reconnect_work() - reconnect and schedule a new attempt if necessary
  * srp_reconnect_work() - reconnect and schedule a new attempt if necessary
+ * @work: Work structure used for scheduling this operation.
  */
  */
 static void srp_reconnect_work(struct work_struct *work)
 static void srp_reconnect_work(struct work_struct *work)
 {
 {
@@ -408,6 +416,7 @@ static void __rport_fail_io_fast(struct srp_rport *rport)
 
 
 /**
 /**
  * rport_fast_io_fail_timedout() - fast I/O failure timeout handler
  * rport_fast_io_fail_timedout() - fast I/O failure timeout handler
+ * @work: Work structure used for scheduling this operation.
  */
  */
 static void rport_fast_io_fail_timedout(struct work_struct *work)
 static void rport_fast_io_fail_timedout(struct work_struct *work)
 {
 {
@@ -426,6 +435,7 @@ static void rport_fast_io_fail_timedout(struct work_struct *work)
 
 
 /**
 /**
  * rport_dev_loss_timedout() - device loss timeout handler
  * rport_dev_loss_timedout() - device loss timeout handler
+ * @work: Work structure used for scheduling this operation.
  */
  */
 static void rport_dev_loss_timedout(struct work_struct *work)
 static void rport_dev_loss_timedout(struct work_struct *work)
 {
 {
@@ -452,42 +462,35 @@ static void __srp_start_tl_fail_timers(struct srp_rport *rport)
 
 
 	lockdep_assert_held(&rport->mutex);
 	lockdep_assert_held(&rport->mutex);
 
 
-	if (!rport->deleted) {
-		delay = rport->reconnect_delay;
-		fast_io_fail_tmo = rport->fast_io_fail_tmo;
-		dev_loss_tmo = rport->dev_loss_tmo;
-		pr_debug("%s current state: %d\n",
-			 dev_name(&shost->shost_gendev), rport->state);
+	delay = rport->reconnect_delay;
+	fast_io_fail_tmo = rport->fast_io_fail_tmo;
+	dev_loss_tmo = rport->dev_loss_tmo;
+	pr_debug("%s current state: %d\n", dev_name(&shost->shost_gendev),
+		 rport->state);
 
 
-		if (delay > 0)
-			queue_delayed_work(system_long_wq,
-					   &rport->reconnect_work,
-					   1UL * delay * HZ);
-		if (fast_io_fail_tmo >= 0 &&
-		    srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
-			pr_debug("%s new state: %d\n",
-				 dev_name(&shost->shost_gendev),
-				 rport->state);
-			scsi_target_block(&shost->shost_gendev);
+	if (rport->state == SRP_RPORT_LOST)
+		return;
+	if (delay > 0)
+		queue_delayed_work(system_long_wq, &rport->reconnect_work,
+				   1UL * delay * HZ);
+	if (srp_rport_set_state(rport, SRP_RPORT_BLOCKED) == 0) {
+		pr_debug("%s new state: %d\n", dev_name(&shost->shost_gendev),
+			 rport->state);
+		scsi_target_block(&shost->shost_gendev);
+		if (fast_io_fail_tmo >= 0)
 			queue_delayed_work(system_long_wq,
 			queue_delayed_work(system_long_wq,
 					   &rport->fast_io_fail_work,
 					   &rport->fast_io_fail_work,
 					   1UL * fast_io_fail_tmo * HZ);
 					   1UL * fast_io_fail_tmo * HZ);
-		}
 		if (dev_loss_tmo >= 0)
 		if (dev_loss_tmo >= 0)
 			queue_delayed_work(system_long_wq,
 			queue_delayed_work(system_long_wq,
 					   &rport->dev_loss_work,
 					   &rport->dev_loss_work,
 					   1UL * dev_loss_tmo * HZ);
 					   1UL * dev_loss_tmo * HZ);
-	} else {
-		pr_debug("%s has already been deleted\n",
-			 dev_name(&shost->shost_gendev));
-		srp_rport_set_state(rport, SRP_RPORT_FAIL_FAST);
-		scsi_target_unblock(&shost->shost_gendev,
-				    SDEV_TRANSPORT_OFFLINE);
 	}
 	}
 }
 }
 
 
 /**
 /**
  * srp_start_tl_fail_timers() - start the transport layer failure timers
  * srp_start_tl_fail_timers() - start the transport layer failure timers
+ * @rport: SRP target port.
  *
  *
  * Start the transport layer fast I/O failure and device loss timers. Do not
  * Start the transport layer fast I/O failure and device loss timers. Do not
  * modify a timer that was already started.
  * modify a timer that was already started.
@@ -502,6 +505,7 @@ EXPORT_SYMBOL(srp_start_tl_fail_timers);
 
 
 /**
 /**
  * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn()
  * scsi_request_fn_active() - number of kernel threads inside scsi_request_fn()
+ * @shost: SCSI host for which to count the number of scsi_request_fn() callers.
  */
  */
 static int scsi_request_fn_active(struct Scsi_Host *shost)
 static int scsi_request_fn_active(struct Scsi_Host *shost)
 {
 {
@@ -522,6 +526,7 @@ static int scsi_request_fn_active(struct Scsi_Host *shost)
 
 
 /**
 /**
  * srp_reconnect_rport() - reconnect to an SRP target port
  * srp_reconnect_rport() - reconnect to an SRP target port
+ * @rport: SRP target port.
  *
  *
  * Blocks SCSI command queueing before invoking reconnect() such that
  * Blocks SCSI command queueing before invoking reconnect() such that
  * queuecommand() won't be invoked concurrently with reconnect() from outside
  * queuecommand() won't be invoked concurrently with reconnect() from outside
@@ -556,7 +561,7 @@ int srp_reconnect_rport(struct srp_rport *rport)
 	scsi_target_block(&shost->shost_gendev);
 	scsi_target_block(&shost->shost_gendev);
 	while (scsi_request_fn_active(shost))
 	while (scsi_request_fn_active(shost))
 		msleep(20);
 		msleep(20);
-	res = i->f->reconnect(rport);
+	res = rport->state != SRP_RPORT_LOST ? i->f->reconnect(rport) : -ENODEV;
 	pr_debug("%s (state %d): transport.reconnect() returned %d\n",
 	pr_debug("%s (state %d): transport.reconnect() returned %d\n",
 		 dev_name(&shost->shost_gendev), rport->state, res);
 		 dev_name(&shost->shost_gendev), rport->state, res);
 	if (res == 0) {
 	if (res == 0) {
@@ -578,9 +583,9 @@ int srp_reconnect_rport(struct srp_rport *rport)
 		spin_unlock_irq(shost->host_lock);
 		spin_unlock_irq(shost->host_lock);
 	} else if (rport->state == SRP_RPORT_RUNNING) {
 	} else if (rport->state == SRP_RPORT_RUNNING) {
 		/*
 		/*
-		 * srp_reconnect_rport() was invoked with fast_io_fail
-		 * off. Mark the port as failed and start the TL failure
-		 * timers if these had not yet been started.
+		 * srp_reconnect_rport() has been invoked with fast_io_fail
+		 * and dev_loss off. Mark the port as failed and start the TL
+		 * failure timers if these had not yet been started.
 		 */
 		 */
 		__rport_fail_io_fast(rport);
 		__rport_fail_io_fast(rport);
 		scsi_target_unblock(&shost->shost_gendev,
 		scsi_target_unblock(&shost->shost_gendev,
@@ -599,6 +604,7 @@ EXPORT_SYMBOL(srp_reconnect_rport);
 
 
 /**
 /**
  * srp_timed_out() - SRP transport intercept of the SCSI timeout EH
  * srp_timed_out() - SRP transport intercept of the SCSI timeout EH
+ * @scmd: SCSI command.
  *
  *
  * If a timeout occurs while an rport is in the blocked state, ask the SCSI
  * If a timeout occurs while an rport is in the blocked state, ask the SCSI
  * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core
  * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core
@@ -622,10 +628,6 @@ static void srp_rport_release(struct device *dev)
 {
 {
 	struct srp_rport *rport = dev_to_rport(dev);
 	struct srp_rport *rport = dev_to_rport(dev);
 
 
-	cancel_delayed_work_sync(&rport->reconnect_work);
-	cancel_delayed_work_sync(&rport->fast_io_fail_work);
-	cancel_delayed_work_sync(&rport->dev_loss_work);
-
 	put_device(dev->parent);
 	put_device(dev->parent);
 	kfree(rport);
 	kfree(rport);
 }
 }
@@ -674,6 +676,7 @@ static int srp_host_match(struct attribute_container *cont, struct device *dev)
 
 
 /**
 /**
  * srp_rport_get() - increment rport reference count
  * srp_rport_get() - increment rport reference count
+ * @rport: SRP target port.
  */
  */
 void srp_rport_get(struct srp_rport *rport)
 void srp_rport_get(struct srp_rport *rport)
 {
 {
@@ -683,6 +686,7 @@ EXPORT_SYMBOL(srp_rport_get);
 
 
 /**
 /**
  * srp_rport_put() - decrement rport reference count
  * srp_rport_put() - decrement rport reference count
+ * @rport: SRP target port.
  */
  */
 void srp_rport_put(struct srp_rport *rport)
 void srp_rport_put(struct srp_rport *rport)
 {
 {
@@ -780,12 +784,6 @@ void srp_rport_del(struct srp_rport *rport)
 	device_del(dev);
 	device_del(dev);
 	transport_destroy_device(dev);
 	transport_destroy_device(dev);
 
 
-	mutex_lock(&rport->mutex);
-	if (rport->state == SRP_RPORT_BLOCKED)
-		__rport_fail_io_fast(rport);
-	rport->deleted = true;
-	mutex_unlock(&rport->mutex);
-
 	put_device(dev);
 	put_device(dev);
 }
 }
 EXPORT_SYMBOL_GPL(srp_rport_del);
 EXPORT_SYMBOL_GPL(srp_rport_del);
@@ -810,6 +808,27 @@ void srp_remove_host(struct Scsi_Host *shost)
 }
 }
 EXPORT_SYMBOL_GPL(srp_remove_host);
 EXPORT_SYMBOL_GPL(srp_remove_host);
 
 
+/**
+ * srp_stop_rport_timers - stop the transport layer recovery timers
+ *
+ * Must be called after srp_remove_host() and scsi_remove_host(). The caller
+ * must hold a reference on the rport (rport->dev) and on the SCSI host
+ * (rport->dev.parent).
+ */
+void srp_stop_rport_timers(struct srp_rport *rport)
+{
+	mutex_lock(&rport->mutex);
+	if (rport->state == SRP_RPORT_BLOCKED)
+		__rport_fail_io_fast(rport);
+	srp_rport_set_state(rport, SRP_RPORT_LOST);
+	mutex_unlock(&rport->mutex);
+
+	cancel_delayed_work_sync(&rport->reconnect_work);
+	cancel_delayed_work_sync(&rport->fast_io_fail_work);
+	cancel_delayed_work_sync(&rport->dev_loss_work);
+}
+EXPORT_SYMBOL_GPL(srp_stop_rport_timers);
+
 static int srp_tsk_mgmt_response(struct Scsi_Host *shost, u64 nexus, u64 tm_id,
 static int srp_tsk_mgmt_response(struct Scsi_Host *shost, u64 nexus, u64 tm_id,
 				 int result)
 				 int result)
 {
 {

+ 1 - 0
include/linux/mlx4/cmd.h

@@ -157,6 +157,7 @@ enum {
 	/* register/delete flow steering network rules */
 	/* register/delete flow steering network rules */
 	MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
 	MLX4_QP_FLOW_STEERING_ATTACH = 0x65,
 	MLX4_QP_FLOW_STEERING_DETACH = 0x66,
 	MLX4_QP_FLOW_STEERING_DETACH = 0x66,
+	MLX4_FLOW_STEERING_IB_UC_QP_RANGE = 0x64,
 };
 };
 
 
 enum {
 enum {

+ 11 - 4
include/linux/mlx4/cq.h

@@ -34,6 +34,7 @@
 #define MLX4_CQ_H
 #define MLX4_CQ_H
 
 
 #include <linux/types.h>
 #include <linux/types.h>
+#include <uapi/linux/if_ether.h>
 
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
 #include <linux/mlx4/doorbell.h>
@@ -43,10 +44,15 @@ struct mlx4_cqe {
 	__be32			immed_rss_invalid;
 	__be32			immed_rss_invalid;
 	__be32			g_mlpath_rqpn;
 	__be32			g_mlpath_rqpn;
 	__be16			sl_vid;
 	__be16			sl_vid;
-	__be16			rlid;
-	__be16			status;
-	u8			ipv6_ext_mask;
-	u8			badfcs_enc;
+	union {
+		struct {
+			__be16	rlid;
+			__be16  status;
+			u8      ipv6_ext_mask;
+			u8      badfcs_enc;
+		};
+		u8  smac[ETH_ALEN];
+	};
 	__be32			byte_cnt;
 	__be32			byte_cnt;
 	__be16			wqe_index;
 	__be16			wqe_index;
 	__be16			checksum;
 	__be16			checksum;
@@ -83,6 +89,7 @@ struct mlx4_ts_cqe {
 enum {
 enum {
 	MLX4_CQE_VLAN_PRESENT_MASK	= 1 << 29,
 	MLX4_CQE_VLAN_PRESENT_MASK	= 1 << 29,
 	MLX4_CQE_QPN_MASK		= 0xffffff,
 	MLX4_CQE_QPN_MASK		= 0xffffff,
+	MLX4_CQE_VID_MASK		= 0xfff,
 };
 };
 
 
 enum {
 enum {

+ 6 - 1
include/linux/mlx4/device.h

@@ -160,7 +160,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_TS			= 1LL <<  5,
 	MLX4_DEV_CAP_FLAG2_TS			= 1LL <<  5,
 	MLX4_DEV_CAP_FLAG2_VLAN_CONTROL		= 1LL <<  6,
 	MLX4_DEV_CAP_FLAG2_VLAN_CONTROL		= 1LL <<  6,
 	MLX4_DEV_CAP_FLAG2_FSM			= 1LL <<  7,
 	MLX4_DEV_CAP_FLAG2_FSM			= 1LL <<  7,
-	MLX4_DEV_CAP_FLAG2_UPDATE_QP		= 1LL <<  8
+	MLX4_DEV_CAP_FLAG2_UPDATE_QP		= 1LL <<  8,
+	MLX4_DEV_CAP_FLAG2_DMFS_IPOIB		= 1LL <<  9
 };
 };
 
 
 enum {
 enum {
@@ -1095,6 +1096,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
 int mlx4_SET_PORT_PRIO2TC(struct mlx4_dev *dev, u8 port, u8 *prio2tc);
 int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
 int mlx4_SET_PORT_SCHEDULER(struct mlx4_dev *dev, u8 port, u8 *tc_tx_bw,
 		u8 *pg, u16 *ratelimit);
 		u8 *pg, u16 *ratelimit);
+int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
 void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
 void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan);
@@ -1144,6 +1146,9 @@ int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int
 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
 void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid);
 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
 __be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave);
 
 
+int mlx4_FLOW_STEERING_IB_UC_QP_RANGE(struct mlx4_dev *dev, u32 min_range_qpn,
+				      u32 max_range_qpn);
+
 cycle_t mlx4_read_clock(struct mlx4_dev *dev);
 cycle_t mlx4_read_clock(struct mlx4_dev *dev);
 
 
 #endif /* MLX4_DEVICE_H */
 #endif /* MLX4_DEVICE_H */

+ 13 - 5
include/linux/mlx5/cq.h

@@ -79,15 +79,23 @@ enum {
 	MLX5_CQE_RESP_SEND	= 2,
 	MLX5_CQE_RESP_SEND	= 2,
 	MLX5_CQE_RESP_SEND_IMM	= 3,
 	MLX5_CQE_RESP_SEND_IMM	= 3,
 	MLX5_CQE_RESP_SEND_INV	= 4,
 	MLX5_CQE_RESP_SEND_INV	= 4,
-	MLX5_CQE_RESIZE_CQ	= 0xff, /* TBD */
+	MLX5_CQE_RESIZE_CQ	= 5,
 	MLX5_CQE_REQ_ERR	= 13,
 	MLX5_CQE_REQ_ERR	= 13,
 	MLX5_CQE_RESP_ERR	= 14,
 	MLX5_CQE_RESP_ERR	= 14,
+	MLX5_CQE_INVALID	= 15,
 };
 };
 
 
 enum {
 enum {
-	MLX5_CQ_MODIFY_RESEIZE = 0,
-	MLX5_CQ_MODIFY_MODER = 1,
-	MLX5_CQ_MODIFY_MAPPING = 2,
+	MLX5_CQ_MODIFY_PERIOD	= 1 << 0,
+	MLX5_CQ_MODIFY_COUNT	= 1 << 1,
+	MLX5_CQ_MODIFY_OVERRUN	= 1 << 2,
+};
+
+enum {
+	MLX5_CQ_OPMOD_RESIZE		= 1,
+	MLX5_MODIFY_CQ_MASK_LOG_SIZE	= 1 << 0,
+	MLX5_MODIFY_CQ_MASK_PG_OFFSET	= 1 << 1,
+	MLX5_MODIFY_CQ_MASK_PG_SIZE	= 1 << 2,
 };
 };
 
 
 struct mlx5_cq_modify_params {
 struct mlx5_cq_modify_params {
@@ -158,7 +166,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 		       struct mlx5_query_cq_mbox_out *out);
 		       struct mlx5_query_cq_mbox_out *out);
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-			int type, struct mlx5_cq_modify_params *params);
+			struct mlx5_modify_cq_mbox_in *in, int in_sz);
 int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 int mlx5_debug_cq_add(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 void mlx5_debug_cq_remove(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 
 

+ 25 - 6
include/linux/mlx5/device.h

@@ -104,9 +104,10 @@ enum {
 };
 };
 
 
 enum {
 enum {
-	MLX5_BF_REGS_PER_PAGE	= 4,
-	MLX5_MAX_UAR_PAGES	= 1 << 8,
-	MLX5_MAX_UUARS		= MLX5_MAX_UAR_PAGES * MLX5_BF_REGS_PER_PAGE,
+	MLX5_BF_REGS_PER_PAGE		= 4,
+	MLX5_MAX_UAR_PAGES		= 1 << 8,
+	MLX5_NON_FP_BF_REGS_PER_PAGE	= 2,
+	MLX5_MAX_UUARS	= MLX5_MAX_UAR_PAGES * MLX5_NON_FP_BF_REGS_PER_PAGE,
 };
 };
 
 
 enum {
 enum {
@@ -176,6 +177,8 @@ enum {
 	MLX5_DEV_CAP_FLAG_APM		= 1LL << 17,
 	MLX5_DEV_CAP_FLAG_APM		= 1LL << 17,
 	MLX5_DEV_CAP_FLAG_ATOMIC	= 1LL << 18,
 	MLX5_DEV_CAP_FLAG_ATOMIC	= 1LL << 18,
 	MLX5_DEV_CAP_FLAG_ON_DMND_PG	= 1LL << 24,
 	MLX5_DEV_CAP_FLAG_ON_DMND_PG	= 1LL << 24,
+	MLX5_DEV_CAP_FLAG_CQ_MODER	= 1LL << 29,
+	MLX5_DEV_CAP_FLAG_RESIZE_CQ	= 1LL << 30,
 	MLX5_DEV_CAP_FLAG_RESIZE_SRQ	= 1LL << 32,
 	MLX5_DEV_CAP_FLAG_RESIZE_SRQ	= 1LL << 32,
 	MLX5_DEV_CAP_FLAG_REMOTE_FENCE	= 1LL << 38,
 	MLX5_DEV_CAP_FLAG_REMOTE_FENCE	= 1LL << 38,
 	MLX5_DEV_CAP_FLAG_TLP_HINTS	= 1LL << 39,
 	MLX5_DEV_CAP_FLAG_TLP_HINTS	= 1LL << 39,
@@ -231,7 +234,8 @@ enum {
 };
 };
 
 
 enum {
 enum {
-	MLX5_ADAPTER_PAGE_SHIFT		= 12
+	MLX5_ADAPTER_PAGE_SHIFT		= 12,
+	MLX5_ADAPTER_PAGE_SIZE		= 1 << MLX5_ADAPTER_PAGE_SHIFT,
 };
 };
 
 
 enum {
 enum {
@@ -697,6 +701,20 @@ struct mlx5_query_cq_mbox_out {
 	__be64			pas[0];
 	__be64			pas[0];
 };
 };
 
 
+struct mlx5_modify_cq_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	__be32			cqn;
+	__be32			field_select;
+	struct mlx5_cq_context	ctx;
+	u8			rsvd[192];
+	__be64			pas[0];
+};
+
+struct mlx5_modify_cq_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
 struct mlx5_enable_hca_mbox_in {
 struct mlx5_enable_hca_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	struct mlx5_inbox_hdr	hdr;
 	u8			rsvd[8];
 	u8			rsvd[8];
@@ -831,8 +849,8 @@ struct mlx5_create_mkey_mbox_in {
 	struct mlx5_mkey_seg	seg;
 	struct mlx5_mkey_seg	seg;
 	u8			rsvd1[16];
 	u8			rsvd1[16];
 	__be32			xlat_oct_act_size;
 	__be32			xlat_oct_act_size;
-	__be32			bsf_coto_act_size;
-	u8			rsvd2[168];
+	__be32			rsvd2;
+	u8			rsvd3[168];
 	__be64			pas[0];
 	__be64			pas[0];
 };
 };
 
 
@@ -871,6 +889,7 @@ struct mlx5_modify_mkey_mbox_in {
 
 
 struct mlx5_modify_mkey_mbox_out {
 struct mlx5_modify_mkey_mbox_out {
 	struct mlx5_outbox_hdr	hdr;
 	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
 };
 };
 
 
 struct mlx5_dump_mkey_mbox_in {
 struct mlx5_dump_mkey_mbox_in {

+ 45 - 0
include/linux/mlx5/qp.h

@@ -464,4 +464,49 @@ void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
 int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 
 
+static inline const char *mlx5_qp_type_str(int type)
+{
+	switch (type) {
+	case MLX5_QP_ST_RC: return "RC";
+	case MLX5_QP_ST_UC: return "C";
+	case MLX5_QP_ST_UD: return "UD";
+	case MLX5_QP_ST_XRC: return "XRC";
+	case MLX5_QP_ST_MLX: return "MLX";
+	case MLX5_QP_ST_QP0: return "QP0";
+	case MLX5_QP_ST_QP1: return "QP1";
+	case MLX5_QP_ST_RAW_ETHERTYPE: return "RAW_ETHERTYPE";
+	case MLX5_QP_ST_RAW_IPV6: return "RAW_IPV6";
+	case MLX5_QP_ST_SNIFFER: return "SNIFFER";
+	case MLX5_QP_ST_SYNC_UMR: return "SYNC_UMR";
+	case MLX5_QP_ST_PTP_1588: return "PTP_1588";
+	case MLX5_QP_ST_REG_UMR: return "REG_UMR";
+	default: return "Invalid transport type";
+	}
+}
+
+static inline const char *mlx5_qp_state_str(int state)
+{
+	switch (state) {
+	case MLX5_QP_STATE_RST:
+	return "RST";
+	case MLX5_QP_STATE_INIT:
+	return "INIT";
+	case MLX5_QP_STATE_RTR:
+	return "RTR";
+	case MLX5_QP_STATE_RTS:
+	return "RTS";
+	case MLX5_QP_STATE_SQER:
+	return "SQER";
+	case MLX5_QP_STATE_SQD:
+	return "SQD";
+	case MLX5_QP_STATE_ERR:
+	return "ERR";
+	case MLX5_QP_STATE_SQ_DRAINING:
+	return "SQ_DRAINING";
+	case MLX5_QP_STATE_SUSPENDED:
+	return "SUSPENDED";
+	default: return "Invalid QP state";
+	}
+}
+
 #endif /* MLX5_QP_H */
 #endif /* MLX5_QP_H */

+ 49 - 20
include/rdma/ib_addr.h

@@ -38,10 +38,15 @@
 #include <linux/in6.h>
 #include <linux/in6.h>
 #include <linux/if_arp.h>
 #include <linux/if_arp.h>
 #include <linux/netdevice.h>
 #include <linux/netdevice.h>
+#include <linux/inetdevice.h>
 #include <linux/socket.h>
 #include <linux/socket.h>
 #include <linux/if_vlan.h>
 #include <linux/if_vlan.h>
+#include <net/ipv6.h>
+#include <net/if_inet6.h>
+#include <net/ip.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_pack.h>
+#include <net/ipv6.h>
 
 
 struct rdma_addr_client {
 struct rdma_addr_client {
 	atomic_t refcount;
 	atomic_t refcount;
@@ -72,7 +77,8 @@ struct rdma_dev_addr {
  * rdma_translate_ip - Translate a local IP address to an RDMA hardware
  * rdma_translate_ip - Translate a local IP address to an RDMA hardware
  *   address.
  *   address.
  */
  */
-int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
+int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr,
+		      u16 *vlan_id);
 
 
 /**
 /**
  * rdma_resolve_ip - Resolve source and destination IP addresses to
  * rdma_resolve_ip - Resolve source and destination IP addresses to
@@ -104,6 +110,10 @@ int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 
 
 int rdma_addr_size(struct sockaddr *addr);
 int rdma_addr_size(struct sockaddr *addr);
 
 
+int rdma_addr_find_smac_by_sgid(union ib_gid *sgid, u8 *smac, u16 *vlan_id);
+int rdma_addr_find_dmac_by_grh(union ib_gid *sgid, union ib_gid *dgid, u8 *smac,
+			       u16 *vlan_id);
+
 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
 static inline u16 ib_addr_get_pkey(struct rdma_dev_addr *dev_addr)
 {
 {
 	return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
 	return ((u16)dev_addr->broadcast[8] << 8) | (u16)dev_addr->broadcast[9];
@@ -126,41 +136,60 @@ static inline int rdma_addr_gid_offset(struct rdma_dev_addr *dev_addr)
 	return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
 	return dev_addr->dev_type == ARPHRD_INFINIBAND ? 4 : 0;
 }
 }
 
 
-static inline void iboe_mac_vlan_to_ll(union ib_gid *gid, u8 *mac, u16 vid)
+static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev)
 {
 {
-	memset(gid->raw, 0, 16);
-	*((__be32 *) gid->raw) = cpu_to_be32(0xfe800000);
-	if (vid < 0x1000) {
-		gid->raw[12] = vid & 0xff;
-		gid->raw[11] = vid >> 8;
-	} else {
-		gid->raw[12] = 0xfe;
-		gid->raw[11] = 0xff;
+	return dev->priv_flags & IFF_802_1Q_VLAN ?
+		vlan_dev_vlan_id(dev) : 0xffff;
+}
+
+static inline int rdma_ip2gid(struct sockaddr *addr, union ib_gid *gid)
+{
+	switch (addr->sa_family) {
+	case AF_INET:
+		ipv6_addr_set_v4mapped(((struct sockaddr_in *)
+					addr)->sin_addr.s_addr,
+				       (struct in6_addr *)gid);
+		break;
+	case AF_INET6:
+		memcpy(gid->raw, &((struct sockaddr_in6 *)addr)->sin6_addr, 16);
+		break;
+	default:
+		return -EINVAL;
 	}
 	}
-	memcpy(gid->raw + 13, mac + 3, 3);
-	memcpy(gid->raw + 8, mac, 3);
-	gid->raw[8] ^= 2;
+	return 0;
 }
 }
 
 
-static inline u16 rdma_vlan_dev_vlan_id(const struct net_device *dev)
+/* Important - sockaddr should be a union of sockaddr_in and sockaddr_in6 */
+static inline int rdma_gid2ip(struct sockaddr *out, union ib_gid *gid)
 {
 {
-	return dev->priv_flags & IFF_802_1Q_VLAN ?
-		vlan_dev_vlan_id(dev) : 0xffff;
+	if (ipv6_addr_v4mapped((struct in6_addr *)gid)) {
+		struct sockaddr_in *out_in = (struct sockaddr_in *)out;
+		memset(out_in, 0, sizeof(*out_in));
+		out_in->sin_family = AF_INET;
+		memcpy(&out_in->sin_addr.s_addr, gid->raw + 12, 4);
+	} else {
+		struct sockaddr_in6 *out_in = (struct sockaddr_in6 *)out;
+		memset(out_in, 0, sizeof(*out_in));
+		out_in->sin6_family = AF_INET6;
+		memcpy(&out_in->sin6_addr.s6_addr, gid->raw, 16);
+	}
+	return 0;
 }
 }
 
 
 static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
 static inline void iboe_addr_get_sgid(struct rdma_dev_addr *dev_addr,
 				      union ib_gid *gid)
 				      union ib_gid *gid)
 {
 {
 	struct net_device *dev;
 	struct net_device *dev;
-	u16 vid = 0xffff;
+	struct in_device *ip4;
 
 
 	dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
 	dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
 	if (dev) {
 	if (dev) {
-		vid = rdma_vlan_dev_vlan_id(dev);
+		ip4 = (struct in_device *)dev->ip_ptr;
+		if (ip4 && ip4->ifa_list && ip4->ifa_list->ifa_address)
+			ipv6_addr_set_v4mapped(ip4->ifa_list->ifa_address,
+					       (struct in6_addr *)gid);
 		dev_put(dev);
 		dev_put(dev);
 	}
 	}
-
-	iboe_mac_vlan_to_ll(gid, dev_addr->src_dev_addr, vid);
 }
 }
 
 
 static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)
 static inline void rdma_addr_get_sgid(struct rdma_dev_addr *dev_addr, union ib_gid *gid)

+ 1 - 0
include/rdma/ib_cm.h

@@ -601,4 +601,5 @@ struct ib_cm_sidr_rep_param {
 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
 			struct ib_cm_sidr_rep_param *param);
 			struct ib_cm_sidr_rep_param *param);
 
 
+int ib_update_cm_av(struct ib_cm_id *id, const u8 *smac, const u8 *alt_smac);
 #endif /* IB_CM_H */
 #endif /* IB_CM_H */

+ 1 - 0
include/rdma/ib_pack.h

@@ -34,6 +34,7 @@
 #define IB_PACK_H
 #define IB_PACK_H
 
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_verbs.h>
+#include <uapi/linux/if_ether.h>
 
 
 enum {
 enum {
 	IB_LRH_BYTES  = 8,
 	IB_LRH_BYTES  = 8,

+ 3 - 0
include/rdma/ib_sa.h

@@ -154,6 +154,9 @@ struct ib_sa_path_rec {
 	u8           packet_life_time_selector;
 	u8           packet_life_time_selector;
 	u8           packet_life_time;
 	u8           packet_life_time;
 	u8           preference;
 	u8           preference;
+	u8           smac[ETH_ALEN];
+	u8           dmac[ETH_ALEN];
+	u16	     vlan_id;
 };
 };
 
 
 #define IB_SA_MCMEMBER_REC_MGID				IB_SA_COMP_MASK( 0)
 #define IB_SA_MCMEMBER_REC_MGID				IB_SA_COMP_MASK( 0)

+ 38 - 4
include/rdma/ib_verbs.h

@@ -48,6 +48,7 @@
 #include <linux/rwsem.h>
 #include <linux/rwsem.h>
 #include <linux/scatterlist.h>
 #include <linux/scatterlist.h>
 #include <linux/workqueue.h>
 #include <linux/workqueue.h>
+#include <uapi/linux/if_ether.h>
 
 
 #include <linux/atomic.h>
 #include <linux/atomic.h>
 #include <asm/uaccess.h>
 #include <asm/uaccess.h>
@@ -69,12 +70,14 @@ enum rdma_node_type {
 	RDMA_NODE_IB_ROUTER,
 	RDMA_NODE_IB_ROUTER,
 	RDMA_NODE_RNIC,
 	RDMA_NODE_RNIC,
 	RDMA_NODE_USNIC,
 	RDMA_NODE_USNIC,
+	RDMA_NODE_USNIC_UDP,
 };
 };
 
 
 enum rdma_transport_type {
 enum rdma_transport_type {
 	RDMA_TRANSPORT_IB,
 	RDMA_TRANSPORT_IB,
 	RDMA_TRANSPORT_IWARP,
 	RDMA_TRANSPORT_IWARP,
-	RDMA_TRANSPORT_USNIC
+	RDMA_TRANSPORT_USNIC,
+	RDMA_TRANSPORT_USNIC_UDP
 };
 };
 
 
 enum rdma_transport_type
 enum rdma_transport_type
@@ -472,6 +475,8 @@ struct ib_ah_attr {
 	u8			static_rate;
 	u8			static_rate;
 	u8			ah_flags;
 	u8			ah_flags;
 	u8			port_num;
 	u8			port_num;
+	u8			dmac[ETH_ALEN];
+	u16			vlan_id;
 };
 };
 
 
 enum ib_wc_status {
 enum ib_wc_status {
@@ -524,6 +529,8 @@ enum ib_wc_flags {
 	IB_WC_WITH_IMM		= (1<<1),
 	IB_WC_WITH_IMM		= (1<<1),
 	IB_WC_WITH_INVALIDATE	= (1<<2),
 	IB_WC_WITH_INVALIDATE	= (1<<2),
 	IB_WC_IP_CSUM_OK	= (1<<3),
 	IB_WC_IP_CSUM_OK	= (1<<3),
+	IB_WC_WITH_SMAC		= (1<<4),
+	IB_WC_WITH_VLAN		= (1<<5),
 };
 };
 
 
 struct ib_wc {
 struct ib_wc {
@@ -544,6 +551,8 @@ struct ib_wc {
 	u8			sl;
 	u8			sl;
 	u8			dlid_path_bits;
 	u8			dlid_path_bits;
 	u8			port_num;	/* valid only for DR SMPs on switches */
 	u8			port_num;	/* valid only for DR SMPs on switches */
+	u8			smac[ETH_ALEN];
+	u16			vlan_id;
 };
 };
 
 
 enum ib_cq_notify_flags {
 enum ib_cq_notify_flags {
@@ -633,6 +642,7 @@ enum ib_qp_type {
 enum ib_qp_create_flags {
 enum ib_qp_create_flags {
 	IB_QP_CREATE_IPOIB_UD_LSO		= 1 << 0,
 	IB_QP_CREATE_IPOIB_UD_LSO		= 1 << 0,
 	IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK	= 1 << 1,
 	IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK	= 1 << 1,
+	IB_QP_CREATE_NETIF_QP			= 1 << 5,
 	/* reserve bits 26-31 for low level drivers' internal use */
 	/* reserve bits 26-31 for low level drivers' internal use */
 	IB_QP_CREATE_RESERVED_START		= 1 << 26,
 	IB_QP_CREATE_RESERVED_START		= 1 << 26,
 	IB_QP_CREATE_RESERVED_END		= 1 << 31,
 	IB_QP_CREATE_RESERVED_END		= 1 << 31,
@@ -721,7 +731,11 @@ enum ib_qp_attr_mask {
 	IB_QP_MAX_DEST_RD_ATOMIC	= (1<<17),
 	IB_QP_MAX_DEST_RD_ATOMIC	= (1<<17),
 	IB_QP_PATH_MIG_STATE		= (1<<18),
 	IB_QP_PATH_MIG_STATE		= (1<<18),
 	IB_QP_CAP			= (1<<19),
 	IB_QP_CAP			= (1<<19),
-	IB_QP_DEST_QPN			= (1<<20)
+	IB_QP_DEST_QPN			= (1<<20),
+	IB_QP_SMAC			= (1<<21),
+	IB_QP_ALT_SMAC			= (1<<22),
+	IB_QP_VID			= (1<<23),
+	IB_QP_ALT_VID			= (1<<24),
 };
 };
 
 
 enum ib_qp_state {
 enum ib_qp_state {
@@ -771,6 +785,10 @@ struct ib_qp_attr {
 	u8			rnr_retry;
 	u8			rnr_retry;
 	u8			alt_port_num;
 	u8			alt_port_num;
 	u8			alt_timeout;
 	u8			alt_timeout;
+	u8			smac[ETH_ALEN];
+	u8			alt_smac[ETH_ALEN];
+	u16			vlan_id;
+	u16			alt_vlan_id;
 };
 };
 
 
 enum ib_wr_opcode {
 enum ib_wr_opcode {
@@ -1099,13 +1117,14 @@ enum ib_flow_attr_type {
 enum ib_flow_spec_type {
 enum ib_flow_spec_type {
 	/* L2 headers*/
 	/* L2 headers*/
 	IB_FLOW_SPEC_ETH	= 0x20,
 	IB_FLOW_SPEC_ETH	= 0x20,
+	IB_FLOW_SPEC_IB		= 0x22,
 	/* L3 header*/
 	/* L3 header*/
 	IB_FLOW_SPEC_IPV4	= 0x30,
 	IB_FLOW_SPEC_IPV4	= 0x30,
 	/* L4 headers*/
 	/* L4 headers*/
 	IB_FLOW_SPEC_TCP	= 0x40,
 	IB_FLOW_SPEC_TCP	= 0x40,
 	IB_FLOW_SPEC_UDP	= 0x41
 	IB_FLOW_SPEC_UDP	= 0x41
 };
 };
-
+#define IB_FLOW_SPEC_LAYER_MASK	0xF0
 #define IB_FLOW_SPEC_SUPPORT_LAYERS 4
 #define IB_FLOW_SPEC_SUPPORT_LAYERS 4
 
 
 /* Flow steering rule priority is set according to it's domain.
 /* Flow steering rule priority is set according to it's domain.
@@ -1133,6 +1152,18 @@ struct ib_flow_spec_eth {
 	struct ib_flow_eth_filter mask;
 	struct ib_flow_eth_filter mask;
 };
 };
 
 
+struct ib_flow_ib_filter {
+	__be16 dlid;
+	__u8   sl;
+};
+
+struct ib_flow_spec_ib {
+	enum ib_flow_spec_type	 type;
+	u16			 size;
+	struct ib_flow_ib_filter val;
+	struct ib_flow_ib_filter mask;
+};
+
 struct ib_flow_ipv4_filter {
 struct ib_flow_ipv4_filter {
 	__be32	src_ip;
 	__be32	src_ip;
 	__be32	dst_ip;
 	__be32	dst_ip;
@@ -1163,6 +1194,7 @@ union ib_flow_spec {
 		u16			size;
 		u16			size;
 	};
 	};
 	struct ib_flow_spec_eth		eth;
 	struct ib_flow_spec_eth		eth;
+	struct ib_flow_spec_ib		ib;
 	struct ib_flow_spec_ipv4        ipv4;
 	struct ib_flow_spec_ipv4        ipv4;
 	struct ib_flow_spec_tcp_udp	tcp_udp;
 	struct ib_flow_spec_tcp_udp	tcp_udp;
 };
 };
@@ -1488,6 +1520,7 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
  * @next_state: Next QP state
  * @next_state: Next QP state
  * @type: QP type
  * @type: QP type
  * @mask: Mask of supplied QP attributes
  * @mask: Mask of supplied QP attributes
+ * @ll : link layer of port
  *
  *
  * This function is a helper function that a low-level driver's
  * This function is a helper function that a low-level driver's
  * modify_qp method can use to validate the consumer's input.  It
  * modify_qp method can use to validate the consumer's input.  It
@@ -1496,7 +1529,8 @@ static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len
  * and that the attribute mask supplied is allowed for the transition.
  * and that the attribute mask supplied is allowed for the transition.
  */
  */
 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
 int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
-		       enum ib_qp_type type, enum ib_qp_attr_mask mask);
+		       enum ib_qp_type type, enum ib_qp_attr_mask mask,
+		       enum rdma_link_layer ll);
 
 
 int ib_register_event_handler  (struct ib_event_handler *event_handler);
 int ib_register_event_handler  (struct ib_event_handler *event_handler);
 int ib_unregister_event_handler(struct ib_event_handler *event_handler);
 int ib_unregister_event_handler(struct ib_event_handler *event_handler);

+ 30 - 6
include/scsi/scsi_transport_srp.h

@@ -19,7 +19,7 @@ struct srp_rport_identifiers {
  * @SRP_RPORT_BLOCKED:   Transport layer not operational; fast I/O fail timer
  * @SRP_RPORT_BLOCKED:   Transport layer not operational; fast I/O fail timer
  *                       is running and I/O has been blocked.
  *                       is running and I/O has been blocked.
  * @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast.
  * @SRP_RPORT_FAIL_FAST: Fast I/O fail timer has expired; fail I/O fast.
- * @SRP_RPORT_LOST:      Device loss timer has expired; port is being removed.
+ * @SRP_RPORT_LOST:      Port is being removed.
  */
  */
 enum srp_rport_state {
 enum srp_rport_state {
 	SRP_RPORT_RUNNING,
 	SRP_RPORT_RUNNING,
@@ -29,10 +29,26 @@ enum srp_rport_state {
 };
 };
 
 
 /**
 /**
- * struct srp_rport
- * @lld_data: LLD private data.
- * @mutex:    Protects against concurrent rport reconnect / fast_io_fail /
- *   dev_loss_tmo activity.
+ * struct srp_rport - SRP initiator or target port
+ *
+ * Fields that are relevant for SRP initiator and SRP target drivers:
+ * @dev:               Device associated with this rport.
+ * @port_id:           16-byte port identifier.
+ * @roles:             Role of this port - initiator or target.
+ *
+ * Fields that are only relevant for SRP initiator drivers:
+ * @lld_data:          LLD private data.
+ * @mutex:             Protects against concurrent rport reconnect /
+ *                     fast_io_fail / dev_loss_tmo activity.
+ * @state:             rport state.
+ * @deleted:           Whether or not srp_rport_del() has already been invoked.
+ * @reconnect_delay:   Reconnect delay in seconds.
+ * @failed_reconnects: Number of failed reconnect attempts.
+ * @reconnect_work:    Work structure used for scheduling reconnect attempts.
+ * @fast_io_fail_tmo:  Fast I/O fail timeout in seconds.
+ * @dev_loss_tmo:      Device loss timeout in seconds.
+ * @fast_io_fail_work: Work structure used for scheduling fast I/O fail work.
+ * @dev_loss_work:     Work structure used for scheduling device loss work.
  */
  */
 struct srp_rport {
 struct srp_rport {
 	/* for initiator and target drivers */
 	/* for initiator and target drivers */
@@ -48,7 +64,6 @@ struct srp_rport {
 
 
 	struct mutex		mutex;
 	struct mutex		mutex;
 	enum srp_rport_state	state;
 	enum srp_rport_state	state;
-	bool			deleted;
 	int			reconnect_delay;
 	int			reconnect_delay;
 	int			failed_reconnects;
 	int			failed_reconnects;
 	struct delayed_work	reconnect_work;
 	struct delayed_work	reconnect_work;
@@ -60,6 +75,8 @@ struct srp_rport {
 
 
 /**
 /**
  * struct srp_function_template
  * struct srp_function_template
+ *
+ * Fields that are only relevant for SRP initiator drivers:
  * @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and
  * @has_rport_state: Whether or not to create the state, fast_io_fail_tmo and
  *     dev_loss_tmo sysfs attribute for an rport.
  *     dev_loss_tmo sysfs attribute for an rport.
  * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command
  * @reset_timer_if_blocked: Whether or srp_timed_out() should reset the command
@@ -71,6 +88,11 @@ struct srp_rport {
  *     srp_reconnect_rport().
  *     srp_reconnect_rport().
  * @terminate_rport_io: Callback function for terminating all outstanding I/O
  * @terminate_rport_io: Callback function for terminating all outstanding I/O
  *     requests for an rport.
  *     requests for an rport.
+ * @rport_delete: Callback function that deletes an rport.
+ *
+ * Fields that are only relevant for SRP target drivers:
+ * @tsk_mgmt_response: Callback function for sending a task management response.
+ * @it_nexus_response: Callback function for processing an IT nexus response.
  */
  */
 struct srp_function_template {
 struct srp_function_template {
 	/* for initiator drivers */
 	/* for initiator drivers */
@@ -101,9 +123,11 @@ extern int srp_tmo_valid(int reconnect_delay, int fast_io_fail_tmo,
 extern int srp_reconnect_rport(struct srp_rport *rport);
 extern int srp_reconnect_rport(struct srp_rport *rport);
 extern void srp_start_tl_fail_timers(struct srp_rport *rport);
 extern void srp_start_tl_fail_timers(struct srp_rport *rport);
 extern void srp_remove_host(struct Scsi_Host *);
 extern void srp_remove_host(struct Scsi_Host *);
+extern void srp_stop_rport_timers(struct srp_rport *rport);
 
 
 /**
 /**
  * srp_chkready() - evaluate the transport layer state before I/O
  * srp_chkready() - evaluate the transport layer state before I/O
+ * @rport: SRP target port pointer.
  *
  *
  * Returns a SCSI result code that can be returned by the LLD queuecommand()
  * Returns a SCSI result code that can be returned by the LLD queuecommand()
  * implementation. The role of this function is similar to that of
  * implementation. The role of this function is similar to that of