Browse Source

Merge branches 'i40iw', 'sriov' and 'hfi1' into k.o/for-4.6

Doug Ledford 9 years ago
parent
commit
520a07bff6
100 changed files with 5624 additions and 6980 deletions
  1. 2 1
      Documentation/infiniband/sysfs.txt
  2. 6 0
      MAINTAINERS
  3. 2 0
      drivers/infiniband/Kconfig
  4. 1 0
      drivers/infiniband/Makefile
  5. 7 8
      drivers/infiniband/core/cache.c
  6. 15 7
      drivers/infiniband/core/cma.c
  7. 29 15
      drivers/infiniband/core/device.c
  8. 15 22
      drivers/infiniband/core/fmr_pool.c
  9. 164 26
      drivers/infiniband/core/iwcm.c
  10. 6 6
      drivers/infiniband/core/iwpm_msg.c
  11. 7 7
      drivers/infiniband/core/iwpm_util.c
  12. 1 1
      drivers/infiniband/core/iwpm_util.h
  13. 6 8
      drivers/infiniband/core/packer.c
  14. 11 7
      drivers/infiniband/core/sa_query.c
  15. 4 4
      drivers/infiniband/core/ucm.c
  16. 3 3
      drivers/infiniband/core/ucma.c
  17. 11 12
      drivers/infiniband/core/ud_header.c
  18. 25 17
      drivers/infiniband/core/uverbs_cmd.c
  19. 39 41
      drivers/infiniband/core/uverbs_main.c
  20. 206 0
      drivers/infiniband/core/verbs.c
  21. 8 8
      drivers/infiniband/hw/cxgb3/iwch_cm.c
  22. 2 1
      drivers/infiniband/hw/cxgb3/iwch_provider.c
  23. 80 194
      drivers/infiniband/hw/cxgb4/cm.c
  24. 8 1
      drivers/infiniband/hw/cxgb4/cq.c
  25. 22 50
      drivers/infiniband/hw/cxgb4/device.c
  26. 6 43
      drivers/infiniband/hw/cxgb4/iw_cxgb4.h
  27. 8 4
      drivers/infiniband/hw/cxgb4/mem.c
  28. 4 1
      drivers/infiniband/hw/cxgb4/provider.c
  29. 62 45
      drivers/infiniband/hw/cxgb4/qp.c
  30. 3 3
      drivers/infiniband/hw/mlx4/alias_GUID.c
  31. 69 3
      drivers/infiniband/hw/mlx4/main.c
  32. 2 1
      drivers/infiniband/hw/mlx4/mlx4_ib.h
  33. 3 1
      drivers/infiniband/hw/mlx4/mr.c
  34. 1 1
      drivers/infiniband/hw/mlx5/Makefile
  35. 90 14
      drivers/infiniband/hw/mlx5/cq.c
  36. 548 0
      drivers/infiniband/hw/mlx5/gsi.c
  37. 194 0
      drivers/infiniband/hw/mlx5/ib_virt.c
  38. 155 11
      drivers/infiniband/hw/mlx5/mad.c
  39. 116 15
      drivers/infiniband/hw/mlx5/main.c
  40. 99 17
      drivers/infiniband/hw/mlx5/mlx5_ib.h
  41. 471 130
      drivers/infiniband/hw/mlx5/mr.c
  42. 5 5
      drivers/infiniband/hw/mlx5/odp.c
  43. 242 29
      drivers/infiniband/hw/mlx5/qp.c
  44. 22 19
      drivers/infiniband/hw/mlx5/srq.c
  45. 7 0
      drivers/infiniband/hw/mlx5/user.h
  46. 0 1
      drivers/infiniband/hw/nes/Kconfig
  47. 0 25
      drivers/infiniband/hw/nes/nes.c
  48. 86 275
      drivers/infiniband/hw/nes/nes_cm.c
  49. 2 9
      drivers/infiniband/hw/nes/nes_cm.h
  50. 1 43
      drivers/infiniband/hw/nes/nes_hw.c
  51. 0 7
      drivers/infiniband/hw/nes/nes_hw.h
  52. 0 7
      drivers/infiniband/hw/nes/nes_nic.c
  53. 4 1
      drivers/infiniband/hw/nes/nes_verbs.c
  54. 8 0
      drivers/infiniband/hw/ocrdma/ocrdma.h
  55. 64 13
      drivers/infiniband/hw/ocrdma/ocrdma_ah.c
  56. 3 2
      drivers/infiniband/hw/ocrdma/ocrdma_ah.h
  57. 27 6
      drivers/infiniband/hw/ocrdma/ocrdma_hw.c
  58. 4 0
      drivers/infiniband/hw/ocrdma/ocrdma_main.c
  59. 13 3
      drivers/infiniband/hw/ocrdma/ocrdma_sli.h
  60. 2 2
      drivers/infiniband/hw/ocrdma/ocrdma_stats.c
  61. 26 12
      drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
  62. 1 1
      drivers/infiniband/hw/qib/Kconfig
  63. 5 5
      drivers/infiniband/hw/qib/Makefile
  64. 21 12
      drivers/infiniband/hw/qib/qib.h
  65. 0 3
      drivers/infiniband/hw/qib/qib_common.h
  66. 0 545
      drivers/infiniband/hw/qib/qib_cq.c
  67. 39 32
      drivers/infiniband/hw/qib/qib_driver.c
  68. 4 4
      drivers/infiniband/hw/qib/qib_iba6120.c
  69. 2 4
      drivers/infiniband/hw/qib/qib_iba7322.c
  70. 18 7
      drivers/infiniband/hw/qib/qib_init.c
  71. 1 1
      drivers/infiniband/hw/qib/qib_intr.c
  72. 16 170
      drivers/infiniband/hw/qib/qib_keys.c
  73. 149 189
      drivers/infiniband/hw/qib/qib_mad.c
  74. 0 174
      drivers/infiniband/hw/qib/qib_mmap.c
  75. 0 490
      drivers/infiniband/hw/qib/qib_mr.c
  76. 191 987
      drivers/infiniband/hw/qib/qib_qp.c
  77. 198 211
      drivers/infiniband/hw/qib/qib_rc.c
  78. 107 84
      drivers/infiniband/hw/qib/qib_ruc.c
  79. 23 18
      drivers/infiniband/hw/qib/qib_sdma.c
  80. 0 380
      drivers/infiniband/hw/qib/qib_srq.c
  81. 69 16
      drivers/infiniband/hw/qib/qib_sysfs.c
  82. 38 41
      drivers/infiniband/hw/qib/qib_uc.c
  83. 74 68
      drivers/infiniband/hw/qib/qib_ud.c
  84. 327 896
      drivers/infiniband/hw/qib/qib_verbs.c
  85. 64 748
      drivers/infiniband/hw/qib/qib_verbs.h
  86. 0 363
      drivers/infiniband/hw/qib/qib_verbs_mcast.c
  87. 1 0
      drivers/infiniband/sw/Makefile
  88. 6 0
      drivers/infiniband/sw/rdmavt/Kconfig
  89. 13 0
      drivers/infiniband/sw/rdmavt/Makefile
  90. 196 0
      drivers/infiniband/sw/rdmavt/ah.c
  91. 59 0
      drivers/infiniband/sw/rdmavt/ah.h
  92. 159 166
      drivers/infiniband/sw/rdmavt/cq.c
  93. 64 0
      drivers/infiniband/sw/rdmavt/cq.h
  94. 184 0
      drivers/infiniband/sw/rdmavt/dma.c
  95. 53 0
      drivers/infiniband/sw/rdmavt/dma.h
  96. 171 0
      drivers/infiniband/sw/rdmavt/mad.c
  97. 60 0
      drivers/infiniband/sw/rdmavt/mad.h
  98. 147 115
      drivers/infiniband/sw/rdmavt/mcast.c
  99. 58 0
      drivers/infiniband/sw/rdmavt/mcast.h
  100. 79 63
      drivers/infiniband/sw/rdmavt/mmap.c

+ 2 - 1
Documentation/infiniband/sysfs.txt

@@ -78,9 +78,10 @@ HFI1
    chip_reset - diagnostic (root only)
    chip_reset - diagnostic (root only)
    boardversion - board version
    boardversion - board version
    ports/1/
    ports/1/
-          CMgtA/
+          CCMgtA/
                cc_settings_bin - CCA tables used by PSM2
                cc_settings_bin - CCA tables used by PSM2
                cc_table_bin
                cc_table_bin
+               cc_prescan - enable prescaning for faster BECN response
           sc2v/ - 32 files (0 - 31) used to translate sl->vl
           sc2v/ - 32 files (0 - 31) used to translate sl->vl
           sl2sc/ - 32 files (0 - 31) used to translate sl->sc
           sl2sc/ - 32 files (0 - 31) used to translate sl->sc
           vl2mtu/ - 16 (0 - 15) files used to determine MTU for vl
           vl2mtu/ - 16 (0 - 15) files used to determine MTU for vl

+ 6 - 0
MAINTAINERS

@@ -9085,6 +9085,12 @@ L:	rds-devel@oss.oracle.com (moderated for non-subscribers)
 S:	Supported
 S:	Supported
 F:	net/rds/
 F:	net/rds/
 
 
+RDMAVT - RDMA verbs software
+M:	Dennis Dalessandro <dennis.dalessandro@intel.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/sw/rdmavt
+
 READ-COPY UPDATE (RCU)
 READ-COPY UPDATE (RCU)
 M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
 M:	"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
 M:	Josh Triplett <josh@joshtriplett.org>
 M:	Josh Triplett <josh@joshtriplett.org>

+ 2 - 0
drivers/infiniband/Kconfig

@@ -83,4 +83,6 @@ source "drivers/infiniband/ulp/srpt/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 source "drivers/infiniband/ulp/isert/Kconfig"
 
 
+source "drivers/infiniband/sw/rdmavt/Kconfig"
+
 endif # INFINIBAND
 endif # INFINIBAND

+ 1 - 0
drivers/infiniband/Makefile

@@ -1,3 +1,4 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND)		+= hw/
 obj-$(CONFIG_INFINIBAND)		+= hw/
 obj-$(CONFIG_INFINIBAND)		+= ulp/
 obj-$(CONFIG_INFINIBAND)		+= ulp/
+obj-$(CONFIG_INFINIBAND)		+= sw/

+ 7 - 8
drivers/infiniband/core/cache.c

@@ -1043,8 +1043,8 @@ static void ib_cache_update(struct ib_device *device,
 
 
 	ret = ib_query_port(device, port, tprops);
 	ret = ib_query_port(device, port, tprops);
 	if (ret) {
 	if (ret) {
-		printk(KERN_WARNING "ib_query_port failed (%d) for %s\n",
-		       ret, device->name);
+		pr_warn("ib_query_port failed (%d) for %s\n",
+			ret, device->name);
 		goto err;
 		goto err;
 	}
 	}
 
 
@@ -1067,8 +1067,8 @@ static void ib_cache_update(struct ib_device *device,
 	for (i = 0; i < pkey_cache->table_len; ++i) {
 	for (i = 0; i < pkey_cache->table_len; ++i) {
 		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
 		ret = ib_query_pkey(device, port, i, pkey_cache->table + i);
 		if (ret) {
 		if (ret) {
-			printk(KERN_WARNING "ib_query_pkey failed (%d) for %s (index %d)\n",
-			       ret, device->name, i);
+			pr_warn("ib_query_pkey failed (%d) for %s (index %d)\n",
+				ret, device->name, i);
 			goto err;
 			goto err;
 		}
 		}
 	}
 	}
@@ -1078,8 +1078,8 @@ static void ib_cache_update(struct ib_device *device,
 			ret = ib_query_gid(device, port, i,
 			ret = ib_query_gid(device, port, i,
 					   gid_cache->table + i, NULL);
 					   gid_cache->table + i, NULL);
 			if (ret) {
 			if (ret) {
-				printk(KERN_WARNING "ib_query_gid failed (%d) for %s (index %d)\n",
-				       ret, device->name, i);
+				pr_warn("ib_query_gid failed (%d) for %s (index %d)\n",
+					ret, device->name, i);
 				goto err;
 				goto err;
 			}
 			}
 		}
 		}
@@ -1161,8 +1161,7 @@ int ib_cache_setup_one(struct ib_device *device)
 					  GFP_KERNEL);
 					  GFP_KERNEL);
 	if (!device->cache.pkey_cache ||
 	if (!device->cache.pkey_cache ||
 	    !device->cache.lmc_cache) {
 	    !device->cache.lmc_cache) {
-		printk(KERN_WARNING "Couldn't allocate cache "
-		       "for %s\n", device->name);
+		pr_warn("Couldn't allocate cache for %s\n", device->name);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 

+ 15 - 7
drivers/infiniband/core/cma.c

@@ -1206,6 +1206,10 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
 		req->has_gid	= true;
 		req->has_gid	= true;
 		req->service_id	= req_param->primary_path->service_id;
 		req->service_id	= req_param->primary_path->service_id;
 		req->pkey	= be16_to_cpu(req_param->primary_path->pkey);
 		req->pkey	= be16_to_cpu(req_param->primary_path->pkey);
+		if (req->pkey != req_param->bth_pkey)
+			pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and primary path P_Key (0x%x)\n"
+					    "RDMA CMA: in the future this may cause the request to be dropped\n",
+					    req_param->bth_pkey, req->pkey);
 		break;
 		break;
 	case IB_CM_SIDR_REQ_RECEIVED:
 	case IB_CM_SIDR_REQ_RECEIVED:
 		req->device	= sidr_param->listen_id->device;
 		req->device	= sidr_param->listen_id->device;
@@ -1213,6 +1217,10 @@ static int cma_save_req_info(const struct ib_cm_event *ib_event,
 		req->has_gid	= false;
 		req->has_gid	= false;
 		req->service_id	= sidr_param->service_id;
 		req->service_id	= sidr_param->service_id;
 		req->pkey	= sidr_param->pkey;
 		req->pkey	= sidr_param->pkey;
+		if (req->pkey != sidr_param->bth_pkey)
+			pr_warn_ratelimited("RDMA CMA: got different BTH P_Key (0x%x) and SIDR request payload P_Key (0x%x)\n"
+					    "RDMA CMA: in the future this may cause the request to be dropped\n",
+					    sidr_param->bth_pkey, req->pkey);
 		break;
 		break;
 	default:
 	default:
 		return -EINVAL;
 		return -EINVAL;
@@ -1713,7 +1721,7 @@ static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
 		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
 		event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
 		break;
 		break;
 	default:
 	default:
-		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
+		pr_err("RDMA CMA: unexpected IB CM event: %d\n",
 		       ib_event->event);
 		       ib_event->event);
 		goto out;
 		goto out;
 	}
 	}
@@ -2186,8 +2194,8 @@ static void cma_listen_on_dev(struct rdma_id_private *id_priv,
 
 
 	ret = rdma_listen(id, id_priv->backlog);
 	ret = rdma_listen(id, id_priv->backlog);
 	if (ret)
 	if (ret)
-		printk(KERN_WARNING "RDMA CMA: cma_listen_on_dev, error %d, "
-		       "listening on device %s\n", ret, cma_dev->device->name);
+		pr_warn("RDMA CMA: cma_listen_on_dev, error %d, listening on device %s\n",
+			ret, cma_dev->device->name);
 }
 }
 
 
 static void cma_listen_on_all(struct rdma_id_private *id_priv)
 static void cma_listen_on_all(struct rdma_id_private *id_priv)
@@ -3239,7 +3247,7 @@ static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
 		event.status = 0;
 		event.status = 0;
 		break;
 		break;
 	default:
 	default:
-		printk(KERN_ERR "RDMA CMA: unexpected IB CM event: %d\n",
+		pr_err("RDMA CMA: unexpected IB CM event: %d\n",
 		       ib_event->event);
 		       ib_event->event);
 		goto out;
 		goto out;
 	}
 	}
@@ -4003,8 +4011,8 @@ static int cma_netdev_change(struct net_device *ndev, struct rdma_id_private *id
 	if ((dev_addr->bound_dev_if == ndev->ifindex) &&
 	if ((dev_addr->bound_dev_if == ndev->ifindex) &&
 	    (net_eq(dev_net(ndev), dev_addr->net)) &&
 	    (net_eq(dev_net(ndev), dev_addr->net)) &&
 	    memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
 	    memcmp(dev_addr->src_dev_addr, ndev->dev_addr, ndev->addr_len)) {
-		printk(KERN_INFO "RDMA CM addr change for ndev %s used by id %p\n",
-		       ndev->name, &id_priv->id);
+		pr_info("RDMA CM addr change for ndev %s used by id %p\n",
+			ndev->name, &id_priv->id);
 		work = kzalloc(sizeof *work, GFP_KERNEL);
 		work = kzalloc(sizeof *work, GFP_KERNEL);
 		if (!work)
 		if (!work)
 			return -ENOMEM;
 			return -ENOMEM;
@@ -4287,7 +4295,7 @@ static int __init cma_init(void)
 		goto err;
 		goto err;
 
 
 	if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
 	if (ibnl_add_client(RDMA_NL_RDMA_CM, RDMA_NL_RDMA_CM_NUM_OPS, cma_cb_table))
-		printk(KERN_WARNING "RDMA CMA: failed to add netlink callback\n");
+		pr_warn("RDMA CMA: failed to add netlink callback\n");
 	cma_configfs_init();
 	cma_configfs_init();
 
 
 	return 0;
 	return 0;

+ 29 - 15
drivers/infiniband/core/device.c

@@ -115,8 +115,8 @@ static int ib_device_check_mandatory(struct ib_device *device)
 
 
 	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
 	for (i = 0; i < ARRAY_SIZE(mandatory_table); ++i) {
 		if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
 		if (!*(void **) ((void *) device + mandatory_table[i].offset)) {
-			printk(KERN_WARNING "Device %s is missing mandatory function %s\n",
-			       device->name, mandatory_table[i].name);
+			pr_warn("Device %s is missing mandatory function %s\n",
+				device->name, mandatory_table[i].name);
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 	}
 	}
@@ -255,8 +255,8 @@ static int add_client_context(struct ib_device *device, struct ib_client *client
 
 
 	context = kmalloc(sizeof *context, GFP_KERNEL);
 	context = kmalloc(sizeof *context, GFP_KERNEL);
 	if (!context) {
 	if (!context) {
-		printk(KERN_WARNING "Couldn't allocate client context for %s/%s\n",
-		       device->name, client->name);
+		pr_warn("Couldn't allocate client context for %s/%s\n",
+			device->name, client->name);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
@@ -343,28 +343,29 @@ int ib_register_device(struct ib_device *device,
 
 
 	ret = read_port_immutable(device);
 	ret = read_port_immutable(device);
 	if (ret) {
 	if (ret) {
-		printk(KERN_WARNING "Couldn't create per port immutable data %s\n",
-		       device->name);
+		pr_warn("Couldn't create per port immutable data %s\n",
+			device->name);
 		goto out;
 		goto out;
 	}
 	}
 
 
 	ret = ib_cache_setup_one(device);
 	ret = ib_cache_setup_one(device);
 	if (ret) {
 	if (ret) {
-		printk(KERN_WARNING "Couldn't set up InfiniBand P_Key/GID cache\n");
+		pr_warn("Couldn't set up InfiniBand P_Key/GID cache\n");
 		goto out;
 		goto out;
 	}
 	}
 
 
 	memset(&device->attrs, 0, sizeof(device->attrs));
 	memset(&device->attrs, 0, sizeof(device->attrs));
 	ret = device->query_device(device, &device->attrs, &uhw);
 	ret = device->query_device(device, &device->attrs, &uhw);
 	if (ret) {
 	if (ret) {
-		printk(KERN_WARNING "Couldn't query the device attributes\n");
+		pr_warn("Couldn't query the device attributes\n");
+		ib_cache_cleanup_one(device);
 		goto out;
 		goto out;
 	}
 	}
 
 
 	ret = ib_device_register_sysfs(device, port_callback);
 	ret = ib_device_register_sysfs(device, port_callback);
 	if (ret) {
 	if (ret) {
-		printk(KERN_WARNING "Couldn't register device %s with driver model\n",
-		       device->name);
+		pr_warn("Couldn't register device %s with driver model\n",
+			device->name);
 		ib_cache_cleanup_one(device);
 		ib_cache_cleanup_one(device);
 		goto out;
 		goto out;
 	}
 	}
@@ -565,8 +566,8 @@ void ib_set_client_data(struct ib_device *device, struct ib_client *client,
 			goto out;
 			goto out;
 		}
 		}
 
 
-	printk(KERN_WARNING "No client context found for %s/%s\n",
-	       device->name, client->name);
+	pr_warn("No client context found for %s/%s\n",
+		device->name, client->name);
 
 
 out:
 out:
 	spin_unlock_irqrestore(&device->client_data_lock, flags);
 	spin_unlock_irqrestore(&device->client_data_lock, flags);
@@ -649,10 +650,23 @@ int ib_query_port(struct ib_device *device,
 		  u8 port_num,
 		  u8 port_num,
 		  struct ib_port_attr *port_attr)
 		  struct ib_port_attr *port_attr)
 {
 {
+	union ib_gid gid;
+	int err;
+
 	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 	if (port_num < rdma_start_port(device) || port_num > rdma_end_port(device))
 		return -EINVAL;
 		return -EINVAL;
 
 
-	return device->query_port(device, port_num, port_attr);
+	memset(port_attr, 0, sizeof(*port_attr));
+	err = device->query_port(device, port_num, port_attr);
+	if (err || port_attr->subnet_prefix)
+		return err;
+
+	err = ib_query_gid(device, port_num, 0, &gid, NULL);
+	if (err)
+		return err;
+
+	port_attr->subnet_prefix = be64_to_cpu(gid.global.subnet_prefix);
+	return 0;
 }
 }
 EXPORT_SYMBOL(ib_query_port);
 EXPORT_SYMBOL(ib_query_port);
 
 
@@ -959,13 +973,13 @@ static int __init ib_core_init(void)
 
 
 	ret = class_register(&ib_class);
 	ret = class_register(&ib_class);
 	if (ret) {
 	if (ret) {
-		printk(KERN_WARNING "Couldn't create InfiniBand device class\n");
+		pr_warn("Couldn't create InfiniBand device class\n");
 		goto err_comp;
 		goto err_comp;
 	}
 	}
 
 
 	ret = ibnl_init();
 	ret = ibnl_init();
 	if (ret) {
 	if (ret) {
-		printk(KERN_WARNING "Couldn't init IB netlink interface\n");
+		pr_warn("Couldn't init IB netlink interface\n");
 		goto err_sysfs;
 		goto err_sysfs;
 	}
 	}
 
 

+ 15 - 22
drivers/infiniband/core/fmr_pool.c

@@ -150,8 +150,8 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
 
 
 #ifdef DEBUG
 #ifdef DEBUG
 		if (fmr->ref_count !=0) {
 		if (fmr->ref_count !=0) {
-			printk(KERN_WARNING PFX "Unmapping FMR 0x%08x with ref count %d\n",
-			       fmr, fmr->ref_count);
+			pr_warn(PFX "Unmapping FMR 0x%08x with ref count %d\n",
+				fmr, fmr->ref_count);
 		}
 		}
 #endif
 #endif
 	}
 	}
@@ -167,7 +167,7 @@ static void ib_fmr_batch_release(struct ib_fmr_pool *pool)
 
 
 	ret = ib_unmap_fmr(&fmr_list);
 	ret = ib_unmap_fmr(&fmr_list);
 	if (ret)
 	if (ret)
-		printk(KERN_WARNING PFX "ib_unmap_fmr returned %d\n", ret);
+		pr_warn(PFX "ib_unmap_fmr returned %d\n", ret);
 
 
 	spin_lock_irq(&pool->pool_lock);
 	spin_lock_irq(&pool->pool_lock);
 	list_splice(&unmap_list, &pool->free_list);
 	list_splice(&unmap_list, &pool->free_list);
@@ -222,8 +222,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 	device = pd->device;
 	device = pd->device;
 	if (!device->alloc_fmr    || !device->dealloc_fmr  ||
 	if (!device->alloc_fmr    || !device->dealloc_fmr  ||
 	    !device->map_phys_fmr || !device->unmap_fmr) {
 	    !device->map_phys_fmr || !device->unmap_fmr) {
-		printk(KERN_INFO PFX "Device %s does not support FMRs\n",
-		       device->name);
+		pr_info(PFX "Device %s does not support FMRs\n", device->name);
 		return ERR_PTR(-ENOSYS);
 		return ERR_PTR(-ENOSYS);
 	}
 	}
 
 
@@ -233,13 +232,10 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 		max_remaps = device->attrs.max_map_per_fmr;
 		max_remaps = device->attrs.max_map_per_fmr;
 
 
 	pool = kmalloc(sizeof *pool, GFP_KERNEL);
 	pool = kmalloc(sizeof *pool, GFP_KERNEL);
-	if (!pool) {
-		printk(KERN_WARNING PFX "couldn't allocate pool struct\n");
+	if (!pool)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
-	}
 
 
 	pool->cache_bucket   = NULL;
 	pool->cache_bucket   = NULL;
-
 	pool->flush_function = params->flush_function;
 	pool->flush_function = params->flush_function;
 	pool->flush_arg      = params->flush_arg;
 	pool->flush_arg      = params->flush_arg;
 
 
@@ -251,7 +247,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 			kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
 			kmalloc(IB_FMR_HASH_SIZE * sizeof *pool->cache_bucket,
 				GFP_KERNEL);
 				GFP_KERNEL);
 		if (!pool->cache_bucket) {
 		if (!pool->cache_bucket) {
-			printk(KERN_WARNING PFX "Failed to allocate cache in pool\n");
+			pr_warn(PFX "Failed to allocate cache in pool\n");
 			ret = -ENOMEM;
 			ret = -ENOMEM;
 			goto out_free_pool;
 			goto out_free_pool;
 		}
 		}
@@ -275,7 +271,7 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 				   "ib_fmr(%s)",
 				   "ib_fmr(%s)",
 				   device->name);
 				   device->name);
 	if (IS_ERR(pool->thread)) {
 	if (IS_ERR(pool->thread)) {
-		printk(KERN_WARNING PFX "couldn't start cleanup thread\n");
+		pr_warn(PFX "couldn't start cleanup thread\n");
 		ret = PTR_ERR(pool->thread);
 		ret = PTR_ERR(pool->thread);
 		goto out_free_pool;
 		goto out_free_pool;
 	}
 	}
@@ -294,11 +290,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 
 
 		for (i = 0; i < params->pool_size; ++i) {
 		for (i = 0; i < params->pool_size; ++i) {
 			fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
 			fmr = kmalloc(bytes_per_fmr, GFP_KERNEL);
-			if (!fmr) {
-				printk(KERN_WARNING PFX "failed to allocate fmr "
-				       "struct for FMR %d\n", i);
+			if (!fmr)
 				goto out_fail;
 				goto out_fail;
-			}
 
 
 			fmr->pool             = pool;
 			fmr->pool             = pool;
 			fmr->remap_count      = 0;
 			fmr->remap_count      = 0;
@@ -307,8 +300,8 @@ struct ib_fmr_pool *ib_create_fmr_pool(struct ib_pd             *pd,
 
 
 			fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
 			fmr->fmr = ib_alloc_fmr(pd, params->access, &fmr_attr);
 			if (IS_ERR(fmr->fmr)) {
 			if (IS_ERR(fmr->fmr)) {
-				printk(KERN_WARNING PFX "fmr_create failed "
-				       "for FMR %d\n", i);
+				pr_warn(PFX "fmr_create failed for FMR %d\n",
+					i);
 				kfree(fmr);
 				kfree(fmr);
 				goto out_fail;
 				goto out_fail;
 			}
 			}
@@ -363,8 +356,8 @@ void ib_destroy_fmr_pool(struct ib_fmr_pool *pool)
 	}
 	}
 
 
 	if (i < pool->pool_size)
 	if (i < pool->pool_size)
-		printk(KERN_WARNING PFX "pool still has %d regions registered\n",
-		       pool->pool_size - i);
+		pr_warn(PFX "pool still has %d regions registered\n",
+			pool->pool_size - i);
 
 
 	kfree(pool->cache_bucket);
 	kfree(pool->cache_bucket);
 	kfree(pool);
 	kfree(pool);
@@ -463,7 +456,7 @@ struct ib_pool_fmr *ib_fmr_pool_map_phys(struct ib_fmr_pool *pool_handle,
 		list_add(&fmr->list, &pool->free_list);
 		list_add(&fmr->list, &pool->free_list);
 		spin_unlock_irqrestore(&pool->pool_lock, flags);
 		spin_unlock_irqrestore(&pool->pool_lock, flags);
 
 
-		printk(KERN_WARNING PFX "fmr_map returns %d\n", result);
+		pr_warn(PFX "fmr_map returns %d\n", result);
 
 
 		return ERR_PTR(result);
 		return ERR_PTR(result);
 	}
 	}
@@ -517,8 +510,8 @@ int ib_fmr_pool_unmap(struct ib_pool_fmr *fmr)
 
 
 #ifdef DEBUG
 #ifdef DEBUG
 	if (fmr->ref_count < 0)
 	if (fmr->ref_count < 0)
-		printk(KERN_WARNING PFX "FMR %p has ref count %d < 0\n",
-		       fmr, fmr->ref_count);
+		pr_warn(PFX "FMR %p has ref count %d < 0\n",
+			fmr, fmr->ref_count);
 #endif
 #endif
 
 
 	spin_unlock_irqrestore(&pool->pool_lock, flags);
 	spin_unlock_irqrestore(&pool->pool_lock, flags);

+ 164 - 26
drivers/infiniband/core/iwcm.c

@@ -50,6 +50,8 @@
 
 
 #include <rdma/iw_cm.h>
 #include <rdma/iw_cm.h>
 #include <rdma/ib_addr.h>
 #include <rdma/ib_addr.h>
+#include <rdma/iw_portmap.h>
+#include <rdma/rdma_netlink.h>
 
 
 #include "iwcm.h"
 #include "iwcm.h"
 
 
@@ -57,6 +59,16 @@ MODULE_AUTHOR("Tom Tucker");
 MODULE_DESCRIPTION("iWARP CM");
 MODULE_DESCRIPTION("iWARP CM");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_LICENSE("Dual BSD/GPL");
 
 
+static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
+	[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
+	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
+	[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
+	[RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
+	[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
+	[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
+	[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
+};
+
 static struct workqueue_struct *iwcm_wq;
 static struct workqueue_struct *iwcm_wq;
 struct iwcm_work {
 struct iwcm_work {
 	struct work_struct work;
 	struct work_struct work;
@@ -402,6 +414,11 @@ static void destroy_cm_id(struct iw_cm_id *cm_id)
 	}
 	}
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 
+	if (cm_id->mapped) {
+		iwpm_remove_mapinfo(&cm_id->local_addr, &cm_id->m_local_addr);
+		iwpm_remove_mapping(&cm_id->local_addr, RDMA_NL_IWCM);
+	}
+
 	(void)iwcm_deref_id(cm_id_priv);
 	(void)iwcm_deref_id(cm_id_priv);
 }
 }
 
 
@@ -426,6 +443,97 @@ void iw_destroy_cm_id(struct iw_cm_id *cm_id)
 }
 }
 EXPORT_SYMBOL(iw_destroy_cm_id);
 EXPORT_SYMBOL(iw_destroy_cm_id);
 
 
+/**
+ * iw_cm_check_wildcard - If IP address is 0 then use original
+ * @pm_addr: sockaddr containing the ip to check for wildcard
+ * @cm_addr: sockaddr containing the actual IP address
+ * @cm_outaddr: sockaddr to set IP addr which leaving port
+ *
+ *  Checks the pm_addr for wildcard and then sets cm_outaddr's
+ *  IP to the actual (cm_addr).
+ */
+static void iw_cm_check_wildcard(struct sockaddr_storage *pm_addr,
+				 struct sockaddr_storage *cm_addr,
+				 struct sockaddr_storage *cm_outaddr)
+{
+	if (pm_addr->ss_family == AF_INET) {
+		struct sockaddr_in *pm4_addr = (struct sockaddr_in *)pm_addr;
+
+		if (pm4_addr->sin_addr.s_addr == INADDR_ANY) {
+			struct sockaddr_in *cm4_addr =
+				(struct sockaddr_in *)cm_addr;
+			struct sockaddr_in *cm4_outaddr =
+				(struct sockaddr_in *)cm_outaddr;
+
+			cm4_outaddr->sin_addr = cm4_addr->sin_addr;
+		}
+	} else {
+		struct sockaddr_in6 *pm6_addr = (struct sockaddr_in6 *)pm_addr;
+
+		if (ipv6_addr_type(&pm6_addr->sin6_addr) == IPV6_ADDR_ANY) {
+			struct sockaddr_in6 *cm6_addr =
+				(struct sockaddr_in6 *)cm_addr;
+			struct sockaddr_in6 *cm6_outaddr =
+				(struct sockaddr_in6 *)cm_outaddr;
+
+			cm6_outaddr->sin6_addr = cm6_addr->sin6_addr;
+		}
+	}
+}
+
+/**
+ * iw_cm_map - Use portmapper to map the ports
+ * @cm_id: connection manager pointer
+ * @active: Indicates the active side when true
+ * returns nonzero for error only if iwpm_create_mapinfo() fails
+ *
+ * Tries to add a mapping for a port using the Portmapper. If
+ * successful in mapping the IP/Port it will check the remote
+ * mapped IP address for a wildcard IP address and replace the
+ * zero IP address with the remote_addr.
+ */
+static int iw_cm_map(struct iw_cm_id *cm_id, bool active)
+{
+	struct iwpm_dev_data pm_reg_msg;
+	struct iwpm_sa_data pm_msg;
+	int status;
+
+	cm_id->m_local_addr = cm_id->local_addr;
+	cm_id->m_remote_addr = cm_id->remote_addr;
+
+	memcpy(pm_reg_msg.dev_name, cm_id->device->name,
+	       sizeof(pm_reg_msg.dev_name));
+	memcpy(pm_reg_msg.if_name, cm_id->device->iwcm->ifname,
+	       sizeof(pm_reg_msg.if_name));
+
+	if (iwpm_register_pid(&pm_reg_msg, RDMA_NL_IWCM) ||
+	    !iwpm_valid_pid())
+		return 0;
+
+	cm_id->mapped = true;
+	pm_msg.loc_addr = cm_id->local_addr;
+	pm_msg.rem_addr = cm_id->remote_addr;
+	if (active)
+		status = iwpm_add_and_query_mapping(&pm_msg,
+						    RDMA_NL_IWCM);
+	else
+		status = iwpm_add_mapping(&pm_msg, RDMA_NL_IWCM);
+
+	if (!status) {
+		cm_id->m_local_addr = pm_msg.mapped_loc_addr;
+		if (active) {
+			cm_id->m_remote_addr = pm_msg.mapped_rem_addr;
+			iw_cm_check_wildcard(&pm_msg.mapped_rem_addr,
+					     &cm_id->remote_addr,
+					     &cm_id->m_remote_addr);
+		}
+	}
+
+	return iwpm_create_mapinfo(&cm_id->local_addr,
+				   &cm_id->m_local_addr,
+				   RDMA_NL_IWCM);
+}
+
 /*
 /*
  * CM_ID <-- LISTEN
  * CM_ID <-- LISTEN
  *
  *
@@ -452,7 +560,9 @@ int iw_cm_listen(struct iw_cm_id *cm_id, int backlog)
 	case IW_CM_STATE_IDLE:
 	case IW_CM_STATE_IDLE:
 		cm_id_priv->state = IW_CM_STATE_LISTEN;
 		cm_id_priv->state = IW_CM_STATE_LISTEN;
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
+		ret = iw_cm_map(cm_id, false);
+		if (!ret)
+			ret = cm_id->device->iwcm->create_listen(cm_id, backlog);
 		if (ret)
 		if (ret)
 			cm_id_priv->state = IW_CM_STATE_IDLE;
 			cm_id_priv->state = IW_CM_STATE_IDLE;
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
 		spin_lock_irqsave(&cm_id_priv->lock, flags);
@@ -582,39 +692,37 @@ int iw_cm_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *iw_param)
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 	spin_lock_irqsave(&cm_id_priv->lock, flags);
 
 
 	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
 	if (cm_id_priv->state != IW_CM_STATE_IDLE) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
-		wake_up_all(&cm_id_priv->connect_wait);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err;
 	}
 	}
 
 
 	/* Get the ib_qp given the QPN */
 	/* Get the ib_qp given the QPN */
 	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
 	qp = cm_id->device->iwcm->get_qp(cm_id->device, iw_param->qpn);
 	if (!qp) {
 	if (!qp) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
-		wake_up_all(&cm_id_priv->connect_wait);
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err;
 	}
 	}
 	cm_id->device->iwcm->add_ref(qp);
 	cm_id->device->iwcm->add_ref(qp);
 	cm_id_priv->qp = qp;
 	cm_id_priv->qp = qp;
 	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
 	cm_id_priv->state = IW_CM_STATE_CONN_SENT;
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
 
 
-	ret = cm_id->device->iwcm->connect(cm_id, iw_param);
-	if (ret) {
-		spin_lock_irqsave(&cm_id_priv->lock, flags);
-		if (cm_id_priv->qp) {
-			cm_id->device->iwcm->rem_ref(qp);
-			cm_id_priv->qp = NULL;
-		}
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
-		cm_id_priv->state = IW_CM_STATE_IDLE;
-		clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
-		wake_up_all(&cm_id_priv->connect_wait);
-	}
+	ret = iw_cm_map(cm_id, true);
+	if (!ret)
+		ret = cm_id->device->iwcm->connect(cm_id, iw_param);
+	if (!ret)
+		return 0;	/* success */
 
 
+	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	if (cm_id_priv->qp) {
+		cm_id->device->iwcm->rem_ref(qp);
+		cm_id_priv->qp = NULL;
+	}
+	cm_id_priv->state = IW_CM_STATE_IDLE;
+err:
+	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
+	wake_up_all(&cm_id_priv->connect_wait);
 	return ret;
 	return ret;
 }
 }
 EXPORT_SYMBOL(iw_cm_connect);
 EXPORT_SYMBOL(iw_cm_connect);
@@ -656,8 +764,23 @@ static void cm_conn_req_handler(struct iwcm_id_private *listen_id_priv,
 		goto out;
 		goto out;
 
 
 	cm_id->provider_data = iw_event->provider_data;
 	cm_id->provider_data = iw_event->provider_data;
-	cm_id->local_addr = iw_event->local_addr;
-	cm_id->remote_addr = iw_event->remote_addr;
+	cm_id->m_local_addr = iw_event->local_addr;
+	cm_id->m_remote_addr = iw_event->remote_addr;
+	cm_id->local_addr = listen_id_priv->id.local_addr;
+
+	ret = iwpm_get_remote_info(&listen_id_priv->id.m_local_addr,
+				   &iw_event->remote_addr,
+				   &cm_id->remote_addr,
+				   RDMA_NL_IWCM);
+	if (ret) {
+		cm_id->remote_addr = iw_event->remote_addr;
+	} else {
+		iw_cm_check_wildcard(&listen_id_priv->id.m_local_addr,
+				     &iw_event->local_addr,
+				     &cm_id->local_addr);
+		iw_event->local_addr = cm_id->local_addr;
+		iw_event->remote_addr = cm_id->remote_addr;
+	}
 
 
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	cm_id_priv = container_of(cm_id, struct iwcm_id_private, id);
 	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
 	cm_id_priv->state = IW_CM_STATE_CONN_RECV;
@@ -753,8 +876,10 @@ static int cm_conn_rep_handler(struct iwcm_id_private *cm_id_priv,
 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 	clear_bit(IWCM_F_CONNECT_WAIT, &cm_id_priv->flags);
 	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
 	BUG_ON(cm_id_priv->state != IW_CM_STATE_CONN_SENT);
 	if (iw_event->status == 0) {
 	if (iw_event->status == 0) {
-		cm_id_priv->id.local_addr = iw_event->local_addr;
-		cm_id_priv->id.remote_addr = iw_event->remote_addr;
+		cm_id_priv->id.m_local_addr = iw_event->local_addr;
+		cm_id_priv->id.m_remote_addr = iw_event->remote_addr;
+		iw_event->local_addr = cm_id_priv->id.local_addr;
+		iw_event->remote_addr = cm_id_priv->id.remote_addr;
 		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
 		cm_id_priv->state = IW_CM_STATE_ESTABLISHED;
 	} else {
 	} else {
 		/* REJECTED or RESET */
 		/* REJECTED or RESET */
@@ -1044,6 +1169,17 @@ EXPORT_SYMBOL(iw_cm_init_qp_attr);
 
 
 static int __init iw_cm_init(void)
 static int __init iw_cm_init(void)
 {
 {
+	int ret;
+
+	ret = iwpm_init(RDMA_NL_IWCM);
+	if (ret)
+		pr_err("iw_cm: couldn't init iwpm\n");
+
+	ret = ibnl_add_client(RDMA_NL_IWCM, RDMA_NL_IWPM_NUM_OPS,
+			      iwcm_nl_cb_table);
+	if (ret)
+		pr_err("iw_cm: couldn't register netlink callbacks\n");
+
 	iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
 	iwcm_wq = create_singlethread_workqueue("iw_cm_wq");
 	if (!iwcm_wq)
 	if (!iwcm_wq)
 		return -ENOMEM;
 		return -ENOMEM;
@@ -1063,6 +1199,8 @@ static void __exit iw_cm_cleanup(void)
 {
 {
 	unregister_net_sysctl_table(iwcm_ctl_table_hdr);
 	unregister_net_sysctl_table(iwcm_ctl_table_hdr);
 	destroy_workqueue(iwcm_wq);
 	destroy_workqueue(iwcm_wq);
+	ibnl_remove_client(RDMA_NL_IWCM);
+	iwpm_exit(RDMA_NL_IWCM);
 }
 }
 
 
 module_init(iw_cm_init);
 module_init(iw_cm_init);

+ 6 - 6
drivers/infiniband/core/iwpm_msg.c

@@ -88,8 +88,8 @@ int iwpm_register_pid(struct iwpm_dev_data *pm_msg, u8 nl_client)
 	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ);
 	ret = ibnl_put_attr(skb, nlh, sizeof(u32), &msg_seq, IWPM_NLA_REG_PID_SEQ);
 	if (ret)
 	if (ret)
 		goto pid_query_error;
 		goto pid_query_error;
-	ret = ibnl_put_attr(skb, nlh, IWPM_IFNAME_SIZE,
-				pm_msg->if_name, IWPM_NLA_REG_IF_NAME);
+	ret = ibnl_put_attr(skb, nlh, IFNAMSIZ,
+			    pm_msg->if_name, IWPM_NLA_REG_IF_NAME);
 	if (ret)
 	if (ret)
 		goto pid_query_error;
 		goto pid_query_error;
 	ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE,
 	ret = ibnl_put_attr(skb, nlh, IWPM_DEVNAME_SIZE,
@@ -394,7 +394,7 @@ register_pid_response_exit:
 	/* always for found nlmsg_request */
 	/* always for found nlmsg_request */
 	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
 	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
 	barrier();
 	barrier();
-	wake_up(&nlmsg_request->waitq);
+	up(&nlmsg_request->sem);
 	return 0;
 	return 0;
 }
 }
 EXPORT_SYMBOL(iwpm_register_pid_cb);
 EXPORT_SYMBOL(iwpm_register_pid_cb);
@@ -463,7 +463,7 @@ add_mapping_response_exit:
 	/* always for found request */
 	/* always for found request */
 	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
 	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
 	barrier();
 	barrier();
-	wake_up(&nlmsg_request->waitq);
+	up(&nlmsg_request->sem);
 	return 0;
 	return 0;
 }
 }
 EXPORT_SYMBOL(iwpm_add_mapping_cb);
 EXPORT_SYMBOL(iwpm_add_mapping_cb);
@@ -555,7 +555,7 @@ query_mapping_response_exit:
 	/* always for found request */
 	/* always for found request */
 	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
 	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
 	barrier();
 	barrier();
-	wake_up(&nlmsg_request->waitq);
+	up(&nlmsg_request->sem);
 	return 0;
 	return 0;
 }
 }
 EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
 EXPORT_SYMBOL(iwpm_add_and_query_mapping_cb);
@@ -749,7 +749,7 @@ int iwpm_mapping_error_cb(struct sk_buff *skb, struct netlink_callback *cb)
 	/* always for found request */
 	/* always for found request */
 	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
 	kref_put(&nlmsg_request->kref, iwpm_free_nlmsg_request);
 	barrier();
 	barrier();
-	wake_up(&nlmsg_request->waitq);
+	up(&nlmsg_request->sem);
 	return 0;
 	return 0;
 }
 }
 EXPORT_SYMBOL(iwpm_mapping_error_cb);
 EXPORT_SYMBOL(iwpm_mapping_error_cb);

+ 7 - 7
drivers/infiniband/core/iwpm_util.c

@@ -254,9 +254,9 @@ void iwpm_add_remote_info(struct iwpm_remote_info *rem_info)
 }
 }
 
 
 int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
 int iwpm_get_remote_info(struct sockaddr_storage *mapped_loc_addr,
-				struct sockaddr_storage *mapped_rem_addr,
-				struct sockaddr_storage *remote_addr,
-				u8 nl_client)
+			 struct sockaddr_storage *mapped_rem_addr,
+			 struct sockaddr_storage *remote_addr,
+			 u8 nl_client)
 {
 {
 	struct hlist_node *tmp_hlist_node;
 	struct hlist_node *tmp_hlist_node;
 	struct hlist_head *hash_bucket_head;
 	struct hlist_head *hash_bucket_head;
@@ -322,6 +322,8 @@ struct iwpm_nlmsg_request *iwpm_get_nlmsg_request(__u32 nlmsg_seq,
 	nlmsg_request->nl_client = nl_client;
 	nlmsg_request->nl_client = nl_client;
 	nlmsg_request->request_done = 0;
 	nlmsg_request->request_done = 0;
 	nlmsg_request->err_code = 0;
 	nlmsg_request->err_code = 0;
+	sema_init(&nlmsg_request->sem, 1);
+	down(&nlmsg_request->sem);
 	return nlmsg_request;
 	return nlmsg_request;
 }
 }
 
 
@@ -364,11 +366,9 @@ struct iwpm_nlmsg_request *iwpm_find_nlmsg_request(__u32 echo_seq)
 int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request)
 int iwpm_wait_complete_req(struct iwpm_nlmsg_request *nlmsg_request)
 {
 {
 	int ret;
 	int ret;
-	init_waitqueue_head(&nlmsg_request->waitq);
 
 
-	ret = wait_event_timeout(nlmsg_request->waitq,
-			(nlmsg_request->request_done != 0), IWPM_NL_TIMEOUT);
-	if (!ret) {
+	ret = down_timeout(&nlmsg_request->sem, IWPM_NL_TIMEOUT);
+	if (ret) {
 		ret = -EINVAL;
 		ret = -EINVAL;
 		pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n",
 		pr_info("%s: Timeout %d sec for netlink request (seq = %u)\n",
 			__func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq);
 			__func__, (IWPM_NL_TIMEOUT/HZ), nlmsg_request->nlmsg_seq);

+ 1 - 1
drivers/infiniband/core/iwpm_util.h

@@ -69,7 +69,7 @@ struct iwpm_nlmsg_request {
 	u8	            nl_client;
 	u8	            nl_client;
 	u8                  request_done;
 	u8                  request_done;
 	u16                 err_code;
 	u16                 err_code;
-	wait_queue_head_t   waitq;
+	struct semaphore    sem;
 	struct kref         kref;
 	struct kref         kref;
 };
 };
 
 

+ 6 - 8
drivers/infiniband/core/packer.c

@@ -44,7 +44,7 @@ static u64 value_read(int offset, int size, void *structure)
 	case 4: return be32_to_cpup((__be32 *) (structure + offset));
 	case 4: return be32_to_cpup((__be32 *) (structure + offset));
 	case 8: return be64_to_cpup((__be64 *) (structure + offset));
 	case 8: return be64_to_cpup((__be64 *) (structure + offset));
 	default:
 	default:
-		printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
+		pr_warn("Field size %d bits not handled\n", size * 8);
 		return 0;
 		return 0;
 	}
 	}
 }
 }
@@ -104,9 +104,8 @@ void ib_pack(const struct ib_field        *desc,
 		} else {
 		} else {
 			if (desc[i].offset_bits % 8 ||
 			if (desc[i].offset_bits % 8 ||
 			    desc[i].size_bits   % 8) {
 			    desc[i].size_bits   % 8) {
-				printk(KERN_WARNING "Structure field %s of size %d "
-				       "bits is not byte-aligned\n",
-				       desc[i].field_name, desc[i].size_bits);
+				pr_warn("Structure field %s of size %d bits is not byte-aligned\n",
+					desc[i].field_name, desc[i].size_bits);
 			}
 			}
 
 
 			if (desc[i].struct_size_bytes)
 			if (desc[i].struct_size_bytes)
@@ -132,7 +131,7 @@ static void value_write(int offset, int size, u64 val, void *structure)
 	case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break;
 	case 32: *(__be32 *) (structure + offset) = cpu_to_be32(val); break;
 	case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break;
 	case 64: *(__be64 *) (structure + offset) = cpu_to_be64(val); break;
 	default:
 	default:
-		printk(KERN_WARNING "Field size %d bits not handled\n", size * 8);
+		pr_warn("Field size %d bits not handled\n", size * 8);
 	}
 	}
 }
 }
 
 
@@ -188,9 +187,8 @@ void ib_unpack(const struct ib_field        *desc,
 		} else {
 		} else {
 			if (desc[i].offset_bits % 8 ||
 			if (desc[i].offset_bits % 8 ||
 			    desc[i].size_bits   % 8) {
 			    desc[i].size_bits   % 8) {
-				printk(KERN_WARNING "Structure field %s of size %d "
-				       "bits is not byte-aligned\n",
-				       desc[i].field_name, desc[i].size_bits);
+				pr_warn("Structure field %s of size %d bits is not byte-aligned\n",
+					desc[i].field_name, desc[i].size_bits);
 			}
 			}
 
 
 			memcpy(structure + desc[i].struct_offset_bytes,
 			memcpy(structure + desc[i].struct_offset_bytes,

+ 11 - 7
drivers/infiniband/core/sa_query.c

@@ -864,13 +864,12 @@ static void update_sm_ah(struct work_struct *work)
 	struct ib_ah_attr   ah_attr;
 	struct ib_ah_attr   ah_attr;
 
 
 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
-		printk(KERN_WARNING "Couldn't query port\n");
+		pr_warn("Couldn't query port\n");
 		return;
 		return;
 	}
 	}
 
 
 	new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
 	new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
 	if (!new_ah) {
 	if (!new_ah) {
-		printk(KERN_WARNING "Couldn't allocate new SM AH\n");
 		return;
 		return;
 	}
 	}
 
 
@@ -880,16 +879,21 @@ static void update_sm_ah(struct work_struct *work)
 	new_ah->pkey_index = 0;
 	new_ah->pkey_index = 0;
 	if (ib_find_pkey(port->agent->device, port->port_num,
 	if (ib_find_pkey(port->agent->device, port->port_num,
 			 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
 			 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index))
-		printk(KERN_ERR "Couldn't find index for default PKey\n");
+		pr_err("Couldn't find index for default PKey\n");
 
 
 	memset(&ah_attr, 0, sizeof ah_attr);
 	memset(&ah_attr, 0, sizeof ah_attr);
 	ah_attr.dlid     = port_attr.sm_lid;
 	ah_attr.dlid     = port_attr.sm_lid;
 	ah_attr.sl       = port_attr.sm_sl;
 	ah_attr.sl       = port_attr.sm_sl;
 	ah_attr.port_num = port->port_num;
 	ah_attr.port_num = port->port_num;
+	if (port_attr.grh_required) {
+		ah_attr.ah_flags = IB_AH_GRH;
+		ah_attr.grh.dgid.global.subnet_prefix = cpu_to_be64(port_attr.subnet_prefix);
+		ah_attr.grh.dgid.global.interface_id = cpu_to_be64(IB_SA_WELL_KNOWN_GUID);
+	}
 
 
 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
 	if (IS_ERR(new_ah->ah)) {
 	if (IS_ERR(new_ah->ah)) {
-		printk(KERN_WARNING "Couldn't create new SM AH\n");
+		pr_warn("Couldn't create new SM AH\n");
 		kfree(new_ah);
 		kfree(new_ah);
 		return;
 		return;
 	}
 	}
@@ -1221,7 +1225,7 @@ static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
 		rec.net = NULL;
 		rec.net = NULL;
 		rec.ifindex = 0;
 		rec.ifindex = 0;
 		rec.gid_type = IB_GID_TYPE_IB;
 		rec.gid_type = IB_GID_TYPE_IB;
-		memset(rec.dmac, 0, ETH_ALEN);
+		eth_zero_addr(rec.dmac);
 		query->callback(status, &rec, query->context);
 		query->callback(status, &rec, query->context);
 	} else
 	} else
 		query->callback(status, NULL, query->context);
 		query->callback(status, NULL, query->context);
@@ -1800,13 +1804,13 @@ static int __init ib_sa_init(void)
 
 
 	ret = ib_register_client(&sa_client);
 	ret = ib_register_client(&sa_client);
 	if (ret) {
 	if (ret) {
-		printk(KERN_ERR "Couldn't register ib_sa client\n");
+		pr_err("Couldn't register ib_sa client\n");
 		goto err1;
 		goto err1;
 	}
 	}
 
 
 	ret = mcast_init();
 	ret = mcast_init();
 	if (ret) {
 	if (ret) {
-		printk(KERN_ERR "Couldn't initialize multicast handling\n");
+		pr_err("Couldn't initialize multicast handling\n");
 		goto err2;
 		goto err2;
 	}
 	}
 
 

+ 4 - 4
drivers/infiniband/core/ucm.c

@@ -1234,7 +1234,7 @@ static int find_overflow_devnum(void)
 		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
 		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UCM_MAX_DEVICES,
 					  "infiniband_cm");
 					  "infiniband_cm");
 		if (ret) {
 		if (ret) {
-			printk(KERN_ERR "ucm: couldn't register dynamic device number\n");
+			pr_err("ucm: couldn't register dynamic device number\n");
 			return ret;
 			return ret;
 		}
 		}
 	}
 	}
@@ -1329,19 +1329,19 @@ static int __init ib_ucm_init(void)
 	ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
 	ret = register_chrdev_region(IB_UCM_BASE_DEV, IB_UCM_MAX_DEVICES,
 				     "infiniband_cm");
 				     "infiniband_cm");
 	if (ret) {
 	if (ret) {
-		printk(KERN_ERR "ucm: couldn't register device number\n");
+		pr_err("ucm: couldn't register device number\n");
 		goto error1;
 		goto error1;
 	}
 	}
 
 
 	ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
 	ret = class_create_file(&cm_class, &class_attr_abi_version.attr);
 	if (ret) {
 	if (ret) {
-		printk(KERN_ERR "ucm: couldn't create abi_version attribute\n");
+		pr_err("ucm: couldn't create abi_version attribute\n");
 		goto error2;
 		goto error2;
 	}
 	}
 
 
 	ret = ib_register_client(&ucm_client);
 	ret = ib_register_client(&ucm_client);
 	if (ret) {
 	if (ret) {
-		printk(KERN_ERR "ucm: couldn't register client\n");
+		pr_err("ucm: couldn't register client\n");
 		goto error3;
 		goto error3;
 	}
 	}
 	return 0;
 	return 0;

+ 3 - 3
drivers/infiniband/core/ucma.c

@@ -314,7 +314,7 @@ static void ucma_removal_event_handler(struct rdma_cm_id *cm_id)
 		}
 		}
 	}
 	}
 	if (!event_found)
 	if (!event_found)
-		printk(KERN_ERR "ucma_removal_event_handler: warning: connect request event wasn't found\n");
+		pr_err("ucma_removal_event_handler: warning: connect request event wasn't found\n");
 }
 }
 
 
 static int ucma_event_handler(struct rdma_cm_id *cm_id,
 static int ucma_event_handler(struct rdma_cm_id *cm_id,
@@ -1716,13 +1716,13 @@ static int __init ucma_init(void)
 
 
 	ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
 	ret = device_create_file(ucma_misc.this_device, &dev_attr_abi_version);
 	if (ret) {
 	if (ret) {
-		printk(KERN_ERR "rdma_ucm: couldn't create abi_version attr\n");
+		pr_err("rdma_ucm: couldn't create abi_version attr\n");
 		goto err1;
 		goto err1;
 	}
 	}
 
 
 	ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);
 	ucma_ctl_table_hdr = register_net_sysctl(&init_net, "net/rdma_ucm", ucma_ctl_table);
 	if (!ucma_ctl_table_hdr) {
 	if (!ucma_ctl_table_hdr) {
-		printk(KERN_ERR "rdma_ucm: couldn't register sysctl paths\n");
+		pr_err("rdma_ucm: couldn't register sysctl paths\n");
 		ret = -ENOMEM;
 		ret = -ENOMEM;
 		goto err2;
 		goto err2;
 	}
 	}

+ 11 - 12
drivers/infiniband/core/ud_header.c

@@ -479,8 +479,8 @@ int ib_ud_header_unpack(void                *buf,
 	buf += IB_LRH_BYTES;
 	buf += IB_LRH_BYTES;
 
 
 	if (header->lrh.link_version != 0) {
 	if (header->lrh.link_version != 0) {
-		printk(KERN_WARNING "Invalid LRH.link_version %d\n",
-		       header->lrh.link_version);
+		pr_warn("Invalid LRH.link_version %d\n",
+			header->lrh.link_version);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
@@ -496,20 +496,20 @@ int ib_ud_header_unpack(void                *buf,
 		buf += IB_GRH_BYTES;
 		buf += IB_GRH_BYTES;
 
 
 		if (header->grh.ip_version != 6) {
 		if (header->grh.ip_version != 6) {
-			printk(KERN_WARNING "Invalid GRH.ip_version %d\n",
-			       header->grh.ip_version);
+			pr_warn("Invalid GRH.ip_version %d\n",
+				header->grh.ip_version);
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 		if (header->grh.next_header != 0x1b) {
 		if (header->grh.next_header != 0x1b) {
-			printk(KERN_WARNING "Invalid GRH.next_header 0x%02x\n",
-			       header->grh.next_header);
+			pr_warn("Invalid GRH.next_header 0x%02x\n",
+				header->grh.next_header);
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 		break;
 		break;
 
 
 	default:
 	default:
-		printk(KERN_WARNING "Invalid LRH.link_next_header %d\n",
-		       header->lrh.link_next_header);
+		pr_warn("Invalid LRH.link_next_header %d\n",
+			header->lrh.link_next_header);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
@@ -525,14 +525,13 @@ int ib_ud_header_unpack(void                *buf,
 		header->immediate_present = 1;
 		header->immediate_present = 1;
 		break;
 		break;
 	default:
 	default:
-		printk(KERN_WARNING "Invalid BTH.opcode 0x%02x\n",
-		       header->bth.opcode);
+		pr_warn("Invalid BTH.opcode 0x%02x\n", header->bth.opcode);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
 	if (header->bth.transport_header_version != 0) {
 	if (header->bth.transport_header_version != 0) {
-		printk(KERN_WARNING "Invalid BTH.transport_header_version %d\n",
-		       header->bth.transport_header_version);
+		pr_warn("Invalid BTH.transport_header_version %d\n",
+			header->bth.transport_header_version);
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 

+ 25 - 17
drivers/infiniband/core/uverbs_cmd.c

@@ -402,7 +402,7 @@ static void copy_query_dev_fields(struct ib_uverbs_file *file,
 	resp->hw_ver		= attr->hw_ver;
 	resp->hw_ver		= attr->hw_ver;
 	resp->max_qp		= attr->max_qp;
 	resp->max_qp		= attr->max_qp;
 	resp->max_qp_wr		= attr->max_qp_wr;
 	resp->max_qp_wr		= attr->max_qp_wr;
-	resp->device_cap_flags	= attr->device_cap_flags;
+	resp->device_cap_flags	= lower_32_bits(attr->device_cap_flags);
 	resp->max_sge		= attr->max_sge;
 	resp->max_sge		= attr->max_sge;
 	resp->max_sge_rd	= attr->max_sge_rd;
 	resp->max_sge_rd	= attr->max_sge_rd;
 	resp->max_cq		= attr->max_cq;
 	resp->max_cq		= attr->max_cq;
@@ -1174,6 +1174,7 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 	struct ib_uobject             *uobj;
 	struct ib_uobject             *uobj;
 	struct ib_pd                  *pd;
 	struct ib_pd                  *pd;
 	struct ib_mw                  *mw;
 	struct ib_mw                  *mw;
+	struct ib_udata		       udata;
 	int                            ret;
 	int                            ret;
 
 
 	if (out_len < sizeof(resp))
 	if (out_len < sizeof(resp))
@@ -1195,7 +1196,12 @@ ssize_t ib_uverbs_alloc_mw(struct ib_uverbs_file *file,
 		goto err_free;
 		goto err_free;
 	}
 	}
 
 
-	mw = pd->device->alloc_mw(pd, cmd.mw_type);
+	INIT_UDATA(&udata, buf + sizeof(cmd),
+		   (unsigned long)cmd.response + sizeof(resp),
+		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - sizeof(resp));
+
+	mw = pd->device->alloc_mw(pd, cmd.mw_type, &udata);
 	if (IS_ERR(mw)) {
 	if (IS_ERR(mw)) {
 		ret = PTR_ERR(mw);
 		ret = PTR_ERR(mw);
 		goto err_put;
 		goto err_put;
@@ -1970,7 +1976,8 @@ ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file,
 		   resp_size);
 		   resp_size);
 	INIT_UDATA(&uhw, buf + sizeof(cmd),
 	INIT_UDATA(&uhw, buf + sizeof(cmd),
 		   (unsigned long)cmd.response + resp_size,
 		   (unsigned long)cmd.response + resp_size,
-		   in_len - sizeof(cmd), out_len - resp_size);
+		   in_len - sizeof(cmd) - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - resp_size);
 
 
 	memset(&cmd_ex, 0, sizeof(cmd_ex));
 	memset(&cmd_ex, 0, sizeof(cmd_ex));
 	cmd_ex.user_handle = cmd.user_handle;
 	cmd_ex.user_handle = cmd.user_handle;
@@ -3085,6 +3092,14 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file,
 	     !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
 	     !capable(CAP_NET_ADMIN)) || !capable(CAP_NET_RAW))
 		return -EPERM;
 		return -EPERM;
 
 
+	if (cmd.flow_attr.flags >= IB_FLOW_ATTR_FLAGS_RESERVED)
+		return -EINVAL;
+
+	if ((cmd.flow_attr.flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
+	    ((cmd.flow_attr.type == IB_FLOW_ATTR_ALL_DEFAULT) ||
+	     (cmd.flow_attr.type == IB_FLOW_ATTR_MC_DEFAULT)))
+		return -EINVAL;
+
 	if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
 	if (cmd.flow_attr.num_of_specs > IB_FLOW_SPEC_SUPPORT_LAYERS)
 		return -EINVAL;
 		return -EINVAL;
 
 
@@ -3413,7 +3428,8 @@ ssize_t ib_uverbs_create_srq(struct ib_uverbs_file *file,
 
 
 	INIT_UDATA(&udata, buf + sizeof cmd,
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
 		   (unsigned long) cmd.response + sizeof resp,
-		   in_len - sizeof cmd, out_len - sizeof resp);
+		   in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - sizeof resp);
 
 
 	ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
 	ret = __uverbs_create_xsrq(file, ib_dev, &xcmd, &udata);
 	if (ret)
 	if (ret)
@@ -3439,7 +3455,8 @@ ssize_t ib_uverbs_create_xsrq(struct ib_uverbs_file *file,
 
 
 	INIT_UDATA(&udata, buf + sizeof cmd,
 	INIT_UDATA(&udata, buf + sizeof cmd,
 		   (unsigned long) cmd.response + sizeof resp,
 		   (unsigned long) cmd.response + sizeof resp,
-		   in_len - sizeof cmd, out_len - sizeof resp);
+		   in_len - sizeof cmd - sizeof(struct ib_uverbs_cmd_hdr),
+		   out_len - sizeof resp);
 
 
 	ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
 	ret = __uverbs_create_xsrq(file, ib_dev, &cmd, &udata);
 	if (ret)
 	if (ret)
@@ -3583,9 +3600,9 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 			      struct ib_udata *ucore,
 			      struct ib_udata *ucore,
 			      struct ib_udata *uhw)
 			      struct ib_udata *uhw)
 {
 {
-	struct ib_uverbs_ex_query_device_resp resp;
+	struct ib_uverbs_ex_query_device_resp resp = { {0} };
 	struct ib_uverbs_ex_query_device  cmd;
 	struct ib_uverbs_ex_query_device  cmd;
-	struct ib_device_attr attr;
+	struct ib_device_attr attr = {0};
 	int err;
 	int err;
 
 
 	if (ucore->inlen < sizeof(cmd))
 	if (ucore->inlen < sizeof(cmd))
@@ -3606,14 +3623,11 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 	if (ucore->outlen < resp.response_length)
 	if (ucore->outlen < resp.response_length)
 		return -ENOSPC;
 		return -ENOSPC;
 
 
-	memset(&attr, 0, sizeof(attr));
-
 	err = ib_dev->query_device(ib_dev, &attr, uhw);
 	err = ib_dev->query_device(ib_dev, &attr, uhw);
 	if (err)
 	if (err)
 		return err;
 		return err;
 
 
 	copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
 	copy_query_dev_fields(file, ib_dev, &resp.base, &attr);
-	resp.comp_mask = 0;
 
 
 	if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
 	if (ucore->outlen < resp.response_length + sizeof(resp.odp_caps))
 		goto end;
 		goto end;
@@ -3626,9 +3640,6 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 		attr.odp_caps.per_transport_caps.uc_odp_caps;
 		attr.odp_caps.per_transport_caps.uc_odp_caps;
 	resp.odp_caps.per_transport_caps.ud_odp_caps =
 	resp.odp_caps.per_transport_caps.ud_odp_caps =
 		attr.odp_caps.per_transport_caps.ud_odp_caps;
 		attr.odp_caps.per_transport_caps.ud_odp_caps;
-	resp.odp_caps.reserved = 0;
-#else
-	memset(&resp.odp_caps, 0, sizeof(resp.odp_caps));
 #endif
 #endif
 	resp.response_length += sizeof(resp.odp_caps);
 	resp.response_length += sizeof(resp.odp_caps);
 
 
@@ -3646,8 +3657,5 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 
 
 end:
 end:
 	err = ib_copy_to_udata(ucore, &resp, resp.response_length);
 	err = ib_copy_to_udata(ucore, &resp, resp.response_length);
-	if (err)
-		return err;
-
-	return 0;
+	return err;
 }
 }

+ 39 - 41
drivers/infiniband/core/uverbs_main.c

@@ -683,12 +683,28 @@ out:
 	return ev_file;
 	return ev_file;
 }
 }
 
 
+static int verify_command_mask(struct ib_device *ib_dev, __u32 command)
+{
+	u64 mask;
+
+	if (command <= IB_USER_VERBS_CMD_OPEN_QP)
+		mask = ib_dev->uverbs_cmd_mask;
+	else
+		mask = ib_dev->uverbs_ex_cmd_mask;
+
+	if (mask & ((u64)1 << command))
+		return 0;
+
+	return -1;
+}
+
 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 			     size_t count, loff_t *pos)
 			     size_t count, loff_t *pos)
 {
 {
 	struct ib_uverbs_file *file = filp->private_data;
 	struct ib_uverbs_file *file = filp->private_data;
 	struct ib_device *ib_dev;
 	struct ib_device *ib_dev;
 	struct ib_uverbs_cmd_hdr hdr;
 	struct ib_uverbs_cmd_hdr hdr;
+	__u32 command;
 	__u32 flags;
 	__u32 flags;
 	int srcu_key;
 	int srcu_key;
 	ssize_t ret;
 	ssize_t ret;
@@ -707,37 +723,34 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 		goto out;
 		goto out;
 	}
 	}
 
 
-	flags = (hdr.command &
-		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
+	if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
+				   IB_USER_VERBS_CMD_COMMAND_MASK)) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 
-	if (!flags) {
-		__u32 command;
+	command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+	if (verify_command_mask(ib_dev, command)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 
 
-		if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
-					   IB_USER_VERBS_CMD_COMMAND_MASK)) {
-			ret = -EINVAL;
-			goto out;
-		}
+	if (!file->ucontext &&
+	    command != IB_USER_VERBS_CMD_GET_CONTEXT) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 
-		command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
+	flags = (hdr.command &
+		 IB_USER_VERBS_CMD_FLAGS_MASK) >> IB_USER_VERBS_CMD_FLAGS_SHIFT;
 
 
+	if (!flags) {
 		if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
 		if (command >= ARRAY_SIZE(uverbs_cmd_table) ||
 		    !uverbs_cmd_table[command]) {
 		    !uverbs_cmd_table[command]) {
 			ret = -EINVAL;
 			ret = -EINVAL;
 			goto out;
 			goto out;
 		}
 		}
 
 
-		if (!file->ucontext &&
-		    command != IB_USER_VERBS_CMD_GET_CONTEXT) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		if (!(ib_dev->uverbs_cmd_mask & (1ull << command))) {
-			ret = -ENOSYS;
-			goto out;
-		}
-
 		if (hdr.in_words * 4 != count) {
 		if (hdr.in_words * 4 != count) {
 			ret = -EINVAL;
 			ret = -EINVAL;
 			goto out;
 			goto out;
@@ -749,21 +762,11 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 						 hdr.out_words * 4);
 						 hdr.out_words * 4);
 
 
 	} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
 	} else if (flags == IB_USER_VERBS_CMD_FLAG_EXTENDED) {
-		__u32 command;
-
 		struct ib_uverbs_ex_cmd_hdr ex_hdr;
 		struct ib_uverbs_ex_cmd_hdr ex_hdr;
 		struct ib_udata ucore;
 		struct ib_udata ucore;
 		struct ib_udata uhw;
 		struct ib_udata uhw;
 		size_t written_count = count;
 		size_t written_count = count;
 
 
-		if (hdr.command & ~(__u32)(IB_USER_VERBS_CMD_FLAGS_MASK |
-					   IB_USER_VERBS_CMD_COMMAND_MASK)) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		command = hdr.command & IB_USER_VERBS_CMD_COMMAND_MASK;
-
 		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
 		if (command >= ARRAY_SIZE(uverbs_ex_cmd_table) ||
 		    !uverbs_ex_cmd_table[command]) {
 		    !uverbs_ex_cmd_table[command]) {
 			ret = -ENOSYS;
 			ret = -ENOSYS;
@@ -775,11 +778,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
 			goto out;
 			goto out;
 		}
 		}
 
 
-		if (!(ib_dev->uverbs_ex_cmd_mask & (1ull << command))) {
-			ret = -ENOSYS;
-			goto out;
-		}
-
 		if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
 		if (count < (sizeof(hdr) + sizeof(ex_hdr))) {
 			ret = -EINVAL;
 			ret = -EINVAL;
 			goto out;
 			goto out;
@@ -1058,7 +1056,7 @@ static int find_overflow_devnum(void)
 		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
 		ret = alloc_chrdev_region(&overflow_maj, 0, IB_UVERBS_MAX_DEVICES,
 					  "infiniband_verbs");
 					  "infiniband_verbs");
 		if (ret) {
 		if (ret) {
-			printk(KERN_ERR "user_verbs: couldn't register dynamic device number\n");
+			pr_err("user_verbs: couldn't register dynamic device number\n");
 			return ret;
 			return ret;
 		}
 		}
 	}
 	}
@@ -1279,14 +1277,14 @@ static int __init ib_uverbs_init(void)
 	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
 	ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES,
 				     "infiniband_verbs");
 				     "infiniband_verbs");
 	if (ret) {
 	if (ret) {
-		printk(KERN_ERR "user_verbs: couldn't register device number\n");
+		pr_err("user_verbs: couldn't register device number\n");
 		goto out;
 		goto out;
 	}
 	}
 
 
 	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
 	uverbs_class = class_create(THIS_MODULE, "infiniband_verbs");
 	if (IS_ERR(uverbs_class)) {
 	if (IS_ERR(uverbs_class)) {
 		ret = PTR_ERR(uverbs_class);
 		ret = PTR_ERR(uverbs_class);
-		printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n");
+		pr_err("user_verbs: couldn't create class infiniband_verbs\n");
 		goto out_chrdev;
 		goto out_chrdev;
 	}
 	}
 
 
@@ -1294,13 +1292,13 @@ static int __init ib_uverbs_init(void)
 
 
 	ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
 	ret = class_create_file(uverbs_class, &class_attr_abi_version.attr);
 	if (ret) {
 	if (ret) {
-		printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n");
+		pr_err("user_verbs: couldn't create abi_version attribute\n");
 		goto out_class;
 		goto out_class;
 	}
 	}
 
 
 	ret = ib_register_client(&uverbs_client);
 	ret = ib_register_client(&uverbs_client);
 	if (ret) {
 	if (ret) {
-		printk(KERN_ERR "user_verbs: couldn't register client\n");
+		pr_err("user_verbs: couldn't register client\n");
 		goto out_class;
 		goto out_class;
 	}
 	}
 
 

+ 206 - 0
drivers/infiniband/core/verbs.c

@@ -1551,6 +1551,46 @@ int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
 }
 }
 EXPORT_SYMBOL(ib_check_mr_status);
 EXPORT_SYMBOL(ib_check_mr_status);
 
 
+int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
+			 int state)
+{
+	if (!device->set_vf_link_state)
+		return -ENOSYS;
+
+	return device->set_vf_link_state(device, vf, port, state);
+}
+EXPORT_SYMBOL(ib_set_vf_link_state);
+
+int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+		     struct ifla_vf_info *info)
+{
+	if (!device->get_vf_config)
+		return -ENOSYS;
+
+	return device->get_vf_config(device, vf, port, info);
+}
+EXPORT_SYMBOL(ib_get_vf_config);
+
+int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
+		    struct ifla_vf_stats *stats)
+{
+	if (!device->get_vf_stats)
+		return -ENOSYS;
+
+	return device->get_vf_stats(device, vf, port, stats);
+}
+EXPORT_SYMBOL(ib_get_vf_stats);
+
+int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
+		   int type)
+{
+	if (!device->set_vf_guid)
+		return -ENOSYS;
+
+	return device->set_vf_guid(device, vf, port, guid, type);
+}
+EXPORT_SYMBOL(ib_set_vf_guid);
+
 /**
 /**
  * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
  * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
  *     and set it the memory region.
  *     and set it the memory region.
@@ -1567,6 +1607,8 @@ EXPORT_SYMBOL(ib_check_mr_status);
  * - The last sg element is allowed to have length less than page_size.
  * - The last sg element is allowed to have length less than page_size.
  * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
  * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
  *   then only max_num_sg entries will be mapped.
  *   then only max_num_sg entries will be mapped.
+ * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS_REG, non of these
+ *   constraints holds and the page_size argument is ignored.
  *
  *
  * Returns the number of sg elements that were mapped to the memory region.
  * Returns the number of sg elements that were mapped to the memory region.
  *
  *
@@ -1657,3 +1699,167 @@ next_page:
 	return i;
 	return i;
 }
 }
 EXPORT_SYMBOL(ib_sg_to_pages);
 EXPORT_SYMBOL(ib_sg_to_pages);
+
+struct ib_drain_cqe {
+	struct ib_cqe cqe;
+	struct completion done;
+};
+
+static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe,
+						cqe);
+
+	complete(&cqe->done);
+}
+
+/*
+ * Post a WR and block until its completion is reaped for the SQ.
+ */
+static void __ib_drain_sq(struct ib_qp *qp)
+{
+	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+	struct ib_drain_cqe sdrain;
+	struct ib_send_wr swr = {}, *bad_swr;
+	int ret;
+
+	if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
+		WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
+			  "IB_POLL_DIRECT poll_ctx not supported for drain\n");
+		return;
+	}
+
+	swr.wr_cqe = &sdrain.cqe;
+	sdrain.cqe.done = ib_drain_qp_done;
+	init_completion(&sdrain.done);
+
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+	if (ret) {
+		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+		return;
+	}
+
+	ret = ib_post_send(qp, &swr, &bad_swr);
+	if (ret) {
+		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
+		return;
+	}
+
+	wait_for_completion(&sdrain.done);
+}
+
+/*
+ * Post a WR and block until its completion is reaped for the RQ.
+ */
+static void __ib_drain_rq(struct ib_qp *qp)
+{
+	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
+	struct ib_drain_cqe rdrain;
+	struct ib_recv_wr rwr = {}, *bad_rwr;
+	int ret;
+
+	if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
+		WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
+			  "IB_POLL_DIRECT poll_ctx not supported for drain\n");
+		return;
+	}
+
+	rwr.wr_cqe = &rdrain.cqe;
+	rdrain.cqe.done = ib_drain_qp_done;
+	init_completion(&rdrain.done);
+
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+	if (ret) {
+		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+		return;
+	}
+
+	ret = ib_post_recv(qp, &rwr, &bad_rwr);
+	if (ret) {
+		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
+		return;
+	}
+
+	wait_for_completion(&rdrain.done);
+}
+
+/**
+ * ib_drain_sq() - Block until all SQ CQEs have been consumed by the
+ *		   application.
+ * @qp:            queue pair to drain
+ *
+ * If the device has a provider-specific drain function, then
+ * call that.  Otherwise call the generic drain function
+ * __ib_drain_sq().
+ *
+ * The caller must:
+ *
+ * ensure there is room in the CQ and SQ for the drain work request and
+ * completion.
+ *
+ * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
+ * IB_POLL_DIRECT.
+ *
+ * ensure that there are no other contexts that are posting WRs concurrently.
+ * Otherwise the drain is not guaranteed.
+ */
+void ib_drain_sq(struct ib_qp *qp)
+{
+	if (qp->device->drain_sq)
+		qp->device->drain_sq(qp);
+	else
+		__ib_drain_sq(qp);
+}
+EXPORT_SYMBOL(ib_drain_sq);
+
+/**
+ * ib_drain_rq() - Block until all RQ CQEs have been consumed by the
+ *		   application.
+ * @qp:            queue pair to drain
+ *
+ * If the device has a provider-specific drain function, then
+ * call that.  Otherwise call the generic drain function
+ * __ib_drain_rq().
+ *
+ * The caller must:
+ *
+ * ensure there is room in the CQ and RQ for the drain work request and
+ * completion.
+ *
+ * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
+ * IB_POLL_DIRECT.
+ *
+ * ensure that there are no other contexts that are posting WRs concurrently.
+ * Otherwise the drain is not guaranteed.
+ */
+void ib_drain_rq(struct ib_qp *qp)
+{
+	if (qp->device->drain_rq)
+		qp->device->drain_rq(qp);
+	else
+		__ib_drain_rq(qp);
+}
+EXPORT_SYMBOL(ib_drain_rq);
+
+/**
+ * ib_drain_qp() - Block until all CQEs have been consumed by the
+ *		   application on both the RQ and SQ.
+ * @qp:            queue pair to drain
+ *
+ * The caller must:
+ *
+ * ensure there is room in the CQ(s), SQ, and RQ for drain work requests
+ * and completions.
+ *
+ * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
+ * IB_POLL_DIRECT.
+ *
+ * ensure that there are no other contexts that are posting WRs concurrently.
+ * Otherwise the drain is not guaranteed.
+ */
+void ib_drain_qp(struct ib_qp *qp)
+{
+	ib_drain_sq(qp);
+	ib_drain_rq(qp);
+}
+EXPORT_SYMBOL(ib_drain_qp);

+ 8 - 8
drivers/infiniband/hw/cxgb3/iwch_cm.c

@@ -1877,7 +1877,7 @@ err:
 static int is_loopback_dst(struct iw_cm_id *cm_id)
 static int is_loopback_dst(struct iw_cm_id *cm_id)
 {
 {
 	struct net_device *dev;
 	struct net_device *dev;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
 
 
 	dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
 	dev = ip_dev_find(&init_net, raddr->sin_addr.s_addr);
 	if (!dev)
 	if (!dev)
@@ -1892,10 +1892,10 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	struct iwch_ep *ep;
 	struct iwch_ep *ep;
 	struct rtable *rt;
 	struct rtable *rt;
 	int err = 0;
 	int err = 0;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
 
 
-	if (cm_id->remote_addr.ss_family != PF_INET) {
+	if (cm_id->m_remote_addr.ss_family != PF_INET) {
 		err = -ENOSYS;
 		err = -ENOSYS;
 		goto out;
 		goto out;
 	}
 	}
@@ -1961,9 +1961,9 @@ int iwch_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
 
 	state_set(&ep->com, CONNECTING);
 	state_set(&ep->com, CONNECTING);
 	ep->tos = IPTOS_LOWDELAY;
 	ep->tos = IPTOS_LOWDELAY;
-	memcpy(&ep->com.local_addr, &cm_id->local_addr,
+	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
 	       sizeof(ep->com.local_addr));
 	       sizeof(ep->com.local_addr));
-	memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+	memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
 	       sizeof(ep->com.remote_addr));
 	       sizeof(ep->com.remote_addr));
 
 
 	/* send connect request to rnic */
 	/* send connect request to rnic */
@@ -1992,7 +1992,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
 
 
 	might_sleep();
 	might_sleep();
 
 
-	if (cm_id->local_addr.ss_family != PF_INET) {
+	if (cm_id->m_local_addr.ss_family != PF_INET) {
 		err = -ENOSYS;
 		err = -ENOSYS;
 		goto fail1;
 		goto fail1;
 	}
 	}
@@ -2008,7 +2008,7 @@ int iwch_create_listen(struct iw_cm_id *cm_id, int backlog)
 	cm_id->add_ref(cm_id);
 	cm_id->add_ref(cm_id);
 	ep->com.cm_id = cm_id;
 	ep->com.cm_id = cm_id;
 	ep->backlog = backlog;
 	ep->backlog = backlog;
-	memcpy(&ep->com.local_addr, &cm_id->local_addr,
+	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
 	       sizeof(ep->com.local_addr));
 	       sizeof(ep->com.local_addr));
 
 
 	/*
 	/*

+ 2 - 1
drivers/infiniband/hw/cxgb3/iwch_provider.c

@@ -657,7 +657,8 @@ err:
 	return ERR_PTR(err);
 	return ERR_PTR(err);
 }
 }
 
 
-static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
+static struct ib_mw *iwch_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+				   struct ib_udata *udata)
 {
 {
 	struct iwch_dev *rhp;
 	struct iwch_dev *rhp;
 	struct iwch_pd *php;
 	struct iwch_pd *php;

+ 80 - 194
drivers/infiniband/hw/cxgb4/cm.c

@@ -302,7 +302,7 @@ void _c4iw_free_ep(struct kref *kref)
 		if (ep->com.remote_addr.ss_family == AF_INET6) {
 		if (ep->com.remote_addr.ss_family == AF_INET6) {
 			struct sockaddr_in6 *sin6 =
 			struct sockaddr_in6 *sin6 =
 					(struct sockaddr_in6 *)
 					(struct sockaddr_in6 *)
-					&ep->com.mapped_local_addr;
+					&ep->com.local_addr;
 
 
 			cxgb4_clip_release(
 			cxgb4_clip_release(
 					ep->com.dev->rdev.lldi.ports[0],
 					ep->com.dev->rdev.lldi.ports[0],
@@ -314,12 +314,6 @@ void _c4iw_free_ep(struct kref *kref)
 		dst_release(ep->dst);
 		dst_release(ep->dst);
 		cxgb4_l2t_release(ep->l2t);
 		cxgb4_l2t_release(ep->l2t);
 	}
 	}
-	if (test_bit(RELEASE_MAPINFO, &ep->com.flags)) {
-		print_addr(&ep->com, __func__, "remove_mapinfo/mapping");
-		iwpm_remove_mapinfo(&ep->com.local_addr,
-				    &ep->com.mapped_local_addr);
-		iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
-	}
 	kfree(ep);
 	kfree(ep);
 }
 }
 
 
@@ -455,7 +449,7 @@ static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
 	state_set(&ep->com, DEAD);
 	state_set(&ep->com, DEAD);
 	if (ep->com.remote_addr.ss_family == AF_INET6) {
 	if (ep->com.remote_addr.ss_family == AF_INET6) {
 		struct sockaddr_in6 *sin6 =
 		struct sockaddr_in6 *sin6 =
-			(struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+			(struct sockaddr_in6 *)&ep->com.local_addr;
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 	}
 	}
@@ -485,12 +479,19 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
 	unsigned int flowclen = 80;
 	unsigned int flowclen = 80;
 	struct fw_flowc_wr *flowc;
 	struct fw_flowc_wr *flowc;
 	int i;
 	int i;
+	u16 vlan = ep->l2t->vlan;
+	int nparams;
+
+	if (vlan == CPL_L2T_VLAN_NONE)
+		nparams = 8;
+	else
+		nparams = 9;
 
 
 	skb = get_skb(skb, flowclen, GFP_KERNEL);
 	skb = get_skb(skb, flowclen, GFP_KERNEL);
 	flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
 	flowc = (struct fw_flowc_wr *)__skb_put(skb, flowclen);
 
 
 	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
 	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
-					   FW_FLOWC_WR_NPARAMS_V(8));
+					   FW_FLOWC_WR_NPARAMS_V(nparams));
 	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen,
 	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(DIV_ROUND_UP(flowclen,
 					  16)) | FW_WR_FLOWID_V(ep->hwtid));
 					  16)) | FW_WR_FLOWID_V(ep->hwtid));
 
 
@@ -511,9 +512,17 @@ static void send_flowc(struct c4iw_ep *ep, struct sk_buff *skb)
 	flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
 	flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
 	flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
 	flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
 	flowc->mnemval[7].val = cpu_to_be32(ep->emss);
 	flowc->mnemval[7].val = cpu_to_be32(ep->emss);
-	/* Pad WR to 16 byte boundary */
-	flowc->mnemval[8].mnemonic = 0;
-	flowc->mnemval[8].val = 0;
+	if (nparams == 9) {
+		u16 pri;
+
+		pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+		flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
+		flowc->mnemval[8].val = cpu_to_be32(pri);
+	} else {
+		/* Pad WR to 16 byte boundary */
+		flowc->mnemval[8].mnemonic = 0;
+		flowc->mnemval[8].val = 0;
+	}
 	for (i = 0; i < 9; i++) {
 	for (i = 0; i < 9; i++) {
 		flowc->mnemval[i].r4[0] = 0;
 		flowc->mnemval[i].r4[0] = 0;
 		flowc->mnemval[i].r4[1] = 0;
 		flowc->mnemval[i].r4[1] = 0;
@@ -568,54 +577,6 @@ static int send_abort(struct c4iw_ep *ep, struct sk_buff *skb, gfp_t gfp)
 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
 }
 }
 
 
-/*
- * c4iw_form_pm_msg - Form a port mapper message with mapping info
- */
-static void c4iw_form_pm_msg(struct c4iw_ep *ep,
-				struct iwpm_sa_data *pm_msg)
-{
-	memcpy(&pm_msg->loc_addr, &ep->com.local_addr,
-		sizeof(ep->com.local_addr));
-	memcpy(&pm_msg->rem_addr, &ep->com.remote_addr,
-		sizeof(ep->com.remote_addr));
-}
-
-/*
- * c4iw_form_reg_msg - Form a port mapper message with dev info
- */
-static void c4iw_form_reg_msg(struct c4iw_dev *dev,
-				struct iwpm_dev_data *pm_msg)
-{
-	memcpy(pm_msg->dev_name, dev->ibdev.name, IWPM_DEVNAME_SIZE);
-	memcpy(pm_msg->if_name, dev->rdev.lldi.ports[0]->name,
-				IWPM_IFNAME_SIZE);
-}
-
-static void c4iw_record_pm_msg(struct c4iw_ep *ep,
-			struct iwpm_sa_data *pm_msg)
-{
-	memcpy(&ep->com.mapped_local_addr, &pm_msg->mapped_loc_addr,
-		sizeof(ep->com.mapped_local_addr));
-	memcpy(&ep->com.mapped_remote_addr, &pm_msg->mapped_rem_addr,
-		sizeof(ep->com.mapped_remote_addr));
-}
-
-static int get_remote_addr(struct c4iw_ep *parent_ep, struct c4iw_ep *child_ep)
-{
-	int ret;
-
-	print_addr(&parent_ep->com, __func__, "get_remote_addr parent_ep ");
-	print_addr(&child_ep->com, __func__, "get_remote_addr child_ep ");
-
-	ret = iwpm_get_remote_info(&parent_ep->com.mapped_local_addr,
-				   &child_ep->com.mapped_remote_addr,
-				   &child_ep->com.remote_addr, RDMA_NL_C4IW);
-	if (ret)
-		PDBG("Unable to find remote peer addr info - err %d\n", ret);
-
-	return ret;
-}
-
 static void best_mtu(const unsigned short *mtus, unsigned short mtu,
 static void best_mtu(const unsigned short *mtus, unsigned short mtu,
 		     unsigned int *idx, int use_ts, int ipv6)
 		     unsigned int *idx, int use_ts, int ipv6)
 {
 {
@@ -645,13 +606,13 @@ static int send_connect(struct c4iw_ep *ep)
 	int wscale;
 	int wscale;
 	int win, sizev4, sizev6, wrlen;
 	int win, sizev4, sizev6, wrlen;
 	struct sockaddr_in *la = (struct sockaddr_in *)
 	struct sockaddr_in *la = (struct sockaddr_in *)
-				 &ep->com.mapped_local_addr;
+				 &ep->com.local_addr;
 	struct sockaddr_in *ra = (struct sockaddr_in *)
 	struct sockaddr_in *ra = (struct sockaddr_in *)
-				 &ep->com.mapped_remote_addr;
+				 &ep->com.remote_addr;
 	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
 	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
-				   &ep->com.mapped_local_addr;
+				   &ep->com.local_addr;
 	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
 	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
-				   &ep->com.mapped_remote_addr;
+				   &ep->com.remote_addr;
 	int ret;
 	int ret;
 	enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
 	enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
 	u32 isn = (prandom_u32() & ~7UL) - 1;
 	u32 isn = (prandom_u32() & ~7UL) - 1;
@@ -710,7 +671,7 @@ static int send_connect(struct c4iw_ep *ep)
 	       L2T_IDX_V(ep->l2t->idx) |
 	       L2T_IDX_V(ep->l2t->idx) |
 	       TX_CHAN_V(ep->tx_chan) |
 	       TX_CHAN_V(ep->tx_chan) |
 	       SMAC_SEL_V(ep->smac_idx) |
 	       SMAC_SEL_V(ep->smac_idx) |
-	       DSCP_V(ep->tos) |
+	       DSCP_V(ep->tos >> 2) |
 	       ULP_MODE_V(ULP_MODE_TCPDDP) |
 	       ULP_MODE_V(ULP_MODE_TCPDDP) |
 	       RCV_BUFSIZ_V(win);
 	       RCV_BUFSIZ_V(win);
 	opt2 = RX_CHANNEL_V(0) |
 	opt2 = RX_CHANNEL_V(0) |
@@ -1829,10 +1790,10 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
 	req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
 	req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
 				     ep->com.dev->rdev.lldi.ports[0],
 				     ep->com.dev->rdev.lldi.ports[0],
 				     ep->l2t));
 				     ep->l2t));
-	sin = (struct sockaddr_in *)&ep->com.mapped_local_addr;
+	sin = (struct sockaddr_in *)&ep->com.local_addr;
 	req->le.lport = sin->sin_port;
 	req->le.lport = sin->sin_port;
 	req->le.u.ipv4.lip = sin->sin_addr.s_addr;
 	req->le.u.ipv4.lip = sin->sin_addr.s_addr;
-	sin = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
+	sin = (struct sockaddr_in *)&ep->com.remote_addr;
 	req->le.pport = sin->sin_port;
 	req->le.pport = sin->sin_port;
 	req->le.u.ipv4.pip = sin->sin_addr.s_addr;
 	req->le.u.ipv4.pip = sin->sin_addr.s_addr;
 	req->tcb.t_state_to_astid =
 	req->tcb.t_state_to_astid =
@@ -1864,7 +1825,7 @@ static void send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
 		L2T_IDX_V(ep->l2t->idx) |
 		L2T_IDX_V(ep->l2t->idx) |
 		TX_CHAN_V(ep->tx_chan) |
 		TX_CHAN_V(ep->tx_chan) |
 		SMAC_SEL_V(ep->smac_idx) |
 		SMAC_SEL_V(ep->smac_idx) |
-		DSCP_V(ep->tos) |
+		DSCP_V(ep->tos >> 2) |
 		ULP_MODE_V(ULP_MODE_TCPDDP) |
 		ULP_MODE_V(ULP_MODE_TCPDDP) |
 		RCV_BUFSIZ_V(win));
 		RCV_BUFSIZ_V(win));
 	req->tcb.opt2 = (__force __be32) (PACE_V(1) |
 	req->tcb.opt2 = (__force __be32) (PACE_V(1) |
@@ -1928,7 +1889,7 @@ static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
 
 
 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
 static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
 		     struct dst_entry *dst, struct c4iw_dev *cdev,
 		     struct dst_entry *dst, struct c4iw_dev *cdev,
-		     bool clear_mpa_v1, enum chip_type adapter_type)
+		     bool clear_mpa_v1, enum chip_type adapter_type, u8 tos)
 {
 {
 	struct neighbour *n;
 	struct neighbour *n;
 	int err, step;
 	int err, step;
@@ -1958,7 +1919,7 @@ static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
 			goto out;
 			goto out;
 		}
 		}
 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
 		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
-					n, pdev, 0);
+					n, pdev, rt_tos2priority(tos));
 		if (!ep->l2t)
 		if (!ep->l2t)
 			goto out;
 			goto out;
 		ep->mtu = pdev->mtu;
 		ep->mtu = pdev->mtu;
@@ -2013,13 +1974,13 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
 {
 {
 	int err = 0;
 	int err = 0;
 	struct sockaddr_in *laddr = (struct sockaddr_in *)
 	struct sockaddr_in *laddr = (struct sockaddr_in *)
-				    &ep->com.cm_id->local_addr;
+				    &ep->com.cm_id->m_local_addr;
 	struct sockaddr_in *raddr = (struct sockaddr_in *)
 	struct sockaddr_in *raddr = (struct sockaddr_in *)
-				    &ep->com.cm_id->remote_addr;
+				    &ep->com.cm_id->m_remote_addr;
 	struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
 	struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
-				      &ep->com.cm_id->local_addr;
+				      &ep->com.cm_id->m_local_addr;
 	struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
 	struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
-				      &ep->com.cm_id->remote_addr;
+				      &ep->com.cm_id->m_remote_addr;
 	int iptype;
 	int iptype;
 	__u8 *ra;
 	__u8 *ra;
 
 
@@ -2038,10 +1999,10 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
 	insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
 	insert_handle(ep->com.dev, &ep->com.dev->atid_idr, ep, ep->atid);
 
 
 	/* find a route */
 	/* find a route */
-	if (ep->com.cm_id->local_addr.ss_family == AF_INET) {
+	if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
 		ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
 		ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr,
 				     raddr->sin_addr.s_addr, laddr->sin_port,
 				     raddr->sin_addr.s_addr, laddr->sin_port,
-				     raddr->sin_port, 0);
+				     raddr->sin_port, ep->com.cm_id->tos);
 		iptype = 4;
 		iptype = 4;
 		ra = (__u8 *)&raddr->sin_addr;
 		ra = (__u8 *)&raddr->sin_addr;
 	} else {
 	} else {
@@ -2058,7 +2019,8 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
 		goto fail3;
 		goto fail3;
 	}
 	}
 	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
 	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
-			ep->com.dev->rdev.lldi.adapter_type);
+			ep->com.dev->rdev.lldi.adapter_type,
+			ep->com.cm_id->tos);
 	if (err) {
 	if (err) {
 		pr_err("%s - cannot alloc l2e.\n", __func__);
 		pr_err("%s - cannot alloc l2e.\n", __func__);
 		goto fail4;
 		goto fail4;
@@ -2069,7 +2031,7 @@ static int c4iw_reconnect(struct c4iw_ep *ep)
 	     ep->l2t->idx);
 	     ep->l2t->idx);
 
 
 	state_set(&ep->com, CONNECTING);
 	state_set(&ep->com, CONNECTING);
-	ep->tos = 0;
+	ep->tos = ep->com.cm_id->tos;
 
 
 	/* send connect request to rnic */
 	/* send connect request to rnic */
 	err = send_connect(ep);
 	err = send_connect(ep);
@@ -2109,10 +2071,10 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 	struct sockaddr_in6 *ra6;
 	struct sockaddr_in6 *ra6;
 
 
 	ep = lookup_atid(t, atid);
 	ep = lookup_atid(t, atid);
-	la = (struct sockaddr_in *)&ep->com.mapped_local_addr;
-	ra = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
-	la6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
-	ra6 = (struct sockaddr_in6 *)&ep->com.mapped_remote_addr;
+	la = (struct sockaddr_in *)&ep->com.local_addr;
+	ra = (struct sockaddr_in *)&ep->com.remote_addr;
+	la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+	ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
 
 
 	PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
 	PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid,
 	     status, status2errno(status));
 	     status, status2errno(status));
@@ -2154,7 +2116,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 			if (ep->com.remote_addr.ss_family == AF_INET6) {
 			if (ep->com.remote_addr.ss_family == AF_INET6) {
 				struct sockaddr_in6 *sin6 =
 				struct sockaddr_in6 *sin6 =
 						(struct sockaddr_in6 *)
 						(struct sockaddr_in6 *)
-						&ep->com.mapped_local_addr;
+						&ep->com.local_addr;
 				cxgb4_clip_release(
 				cxgb4_clip_release(
 						ep->com.dev->rdev.lldi.ports[0],
 						ep->com.dev->rdev.lldi.ports[0],
 						(const u32 *)
 						(const u32 *)
@@ -2189,7 +2151,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
 
 
 	if (ep->com.remote_addr.ss_family == AF_INET6) {
 	if (ep->com.remote_addr.ss_family == AF_INET6) {
 		struct sockaddr_in6 *sin6 =
 		struct sockaddr_in6 *sin6 =
-			(struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+			(struct sockaddr_in6 *)&ep->com.local_addr;
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 	}
 	}
@@ -2391,6 +2353,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 	u16 peer_mss = ntohs(req->tcpopt.mss);
 	u16 peer_mss = ntohs(req->tcpopt.mss);
 	int iptype;
 	int iptype;
 	unsigned short hdrs;
 	unsigned short hdrs;
+	u8 tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
 
 
 	parent_ep = lookup_stid(t, stid);
 	parent_ep = lookup_stid(t, stid);
 	if (!parent_ep) {
 	if (!parent_ep) {
@@ -2399,8 +2362,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 	}
 	}
 
 
 	if (state_read(&parent_ep->com) != LISTEN) {
 	if (state_read(&parent_ep->com) != LISTEN) {
-		printk(KERN_ERR "%s - listening ep not in LISTEN\n",
-		       __func__);
+		PDBG("%s - listening ep not in LISTEN\n", __func__);
 		goto reject;
 		goto reject;
 	}
 	}
 
 
@@ -2415,7 +2377,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 		     ntohs(peer_port), peer_mss);
 		     ntohs(peer_port), peer_mss);
 		dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
 		dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip,
 				 local_port, peer_port,
 				 local_port, peer_port,
-				 PASS_OPEN_TOS_G(ntohl(req->tos_stid)));
+				 tos);
 	} else {
 	} else {
 		PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
 		PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
 		     , __func__, parent_ep, hwtid,
 		     , __func__, parent_ep, hwtid,
@@ -2441,7 +2403,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 	}
 	}
 
 
 	err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
 	err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
-			parent_ep->com.dev->rdev.lldi.adapter_type);
+			parent_ep->com.dev->rdev.lldi.adapter_type, tos);
 	if (err) {
 	if (err) {
 		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
 		printk(KERN_ERR MOD "%s - failed to allocate l2t entry!\n",
 		       __func__);
 		       __func__);
@@ -2459,18 +2421,9 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 	child_ep->com.dev = dev;
 	child_ep->com.dev = dev;
 	child_ep->com.cm_id = NULL;
 	child_ep->com.cm_id = NULL;
 
 
-	/*
-	 * The mapped_local and mapped_remote addresses get setup with
-	 * the actual 4-tuple.  The local address will be based on the
-	 * actual local address of the connection, but on the port number
-	 * of the parent listening endpoint.  The remote address is
-	 * setup based on a query to the IWPM since we don't know what it
-	 * originally was before mapping.  If no mapping was done, then
-	 * mapped_remote == remote, and mapped_local == local.
-	 */
 	if (iptype == 4) {
 	if (iptype == 4) {
 		struct sockaddr_in *sin = (struct sockaddr_in *)
 		struct sockaddr_in *sin = (struct sockaddr_in *)
-			&child_ep->com.mapped_local_addr;
+			&child_ep->com.local_addr;
 
 
 		sin->sin_family = PF_INET;
 		sin->sin_family = PF_INET;
 		sin->sin_port = local_port;
 		sin->sin_port = local_port;
@@ -2482,12 +2435,12 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 				 &parent_ep->com.local_addr)->sin_port;
 				 &parent_ep->com.local_addr)->sin_port;
 		sin->sin_addr.s_addr = *(__be32 *)local_ip;
 		sin->sin_addr.s_addr = *(__be32 *)local_ip;
 
 
-		sin = (struct sockaddr_in *)&child_ep->com.mapped_remote_addr;
+		sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
 		sin->sin_family = PF_INET;
 		sin->sin_family = PF_INET;
 		sin->sin_port = peer_port;
 		sin->sin_port = peer_port;
 		sin->sin_addr.s_addr = *(__be32 *)peer_ip;
 		sin->sin_addr.s_addr = *(__be32 *)peer_ip;
 	} else {
 	} else {
-		sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr;
+		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
 		sin6->sin6_family = PF_INET6;
 		sin6->sin6_family = PF_INET6;
 		sin6->sin6_port = local_port;
 		sin6->sin6_port = local_port;
 		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
 		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
@@ -2498,18 +2451,15 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 				   &parent_ep->com.local_addr)->sin6_port;
 				   &parent_ep->com.local_addr)->sin6_port;
 		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
 		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
 
 
-		sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_remote_addr;
+		sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
 		sin6->sin6_family = PF_INET6;
 		sin6->sin6_family = PF_INET6;
 		sin6->sin6_port = peer_port;
 		sin6->sin6_port = peer_port;
 		memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
 		memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
 	}
 	}
-	memcpy(&child_ep->com.remote_addr, &child_ep->com.mapped_remote_addr,
-	       sizeof(child_ep->com.remote_addr));
-	get_remote_addr(parent_ep, child_ep);
 
 
 	c4iw_get_ep(&parent_ep->com);
 	c4iw_get_ep(&parent_ep->com);
 	child_ep->parent_ep = parent_ep;
 	child_ep->parent_ep = parent_ep;
-	child_ep->tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
+	child_ep->tos = tos;
 	child_ep->dst = dst;
 	child_ep->dst = dst;
 	child_ep->hwtid = hwtid;
 	child_ep->hwtid = hwtid;
 
 
@@ -2522,7 +2472,7 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
 	accept_cr(child_ep, skb, req);
 	accept_cr(child_ep, skb, req);
 	set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
 	set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
 	if (iptype == 6) {
 	if (iptype == 6) {
-		sin6 = (struct sockaddr_in6 *)&child_ep->com.mapped_local_addr;
+		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
 		cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
 		cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
 			       (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 			       (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 	}
 	}
@@ -2765,7 +2715,7 @@ out:
 		if (ep->com.remote_addr.ss_family == AF_INET6) {
 		if (ep->com.remote_addr.ss_family == AF_INET6) {
 			struct sockaddr_in6 *sin6 =
 			struct sockaddr_in6 *sin6 =
 					(struct sockaddr_in6 *)
 					(struct sockaddr_in6 *)
-					&ep->com.mapped_local_addr;
+					&ep->com.local_addr;
 			cxgb4_clip_release(
 			cxgb4_clip_release(
 					ep->com.dev->rdev.lldi.ports[0],
 					ep->com.dev->rdev.lldi.ports[0],
 					(const u32 *)&sin6->sin6_addr.s6_addr,
 					(const u32 *)&sin6->sin6_addr.s6_addr,
@@ -3026,8 +2976,8 @@ static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
 {
 {
 	struct in_device *ind;
 	struct in_device *ind;
 	int found = 0;
 	int found = 0;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
 
 
 	ind = in_dev_get(dev->rdev.lldi.ports[0]);
 	ind = in_dev_get(dev->rdev.lldi.ports[0]);
 	if (!ind)
 	if (!ind)
@@ -3072,8 +3022,8 @@ static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
 static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
 static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
 {
 {
 	struct in6_addr uninitialized_var(addr);
 	struct in6_addr uninitialized_var(addr);
-	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->local_addr;
-	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->remote_addr;
+	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
+	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
 
 
 	if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
 	if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
 		memcpy(la6->sin6_addr.s6_addr, &addr, 16);
 		memcpy(la6->sin6_addr.s6_addr, &addr, 16);
@@ -3092,11 +3042,8 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	struct sockaddr_in *raddr;
 	struct sockaddr_in *raddr;
 	struct sockaddr_in6 *laddr6;
 	struct sockaddr_in6 *laddr6;
 	struct sockaddr_in6 *raddr6;
 	struct sockaddr_in6 *raddr6;
-	struct iwpm_dev_data pm_reg_msg;
-	struct iwpm_sa_data pm_msg;
 	__u8 *ra;
 	__u8 *ra;
 	int iptype;
 	int iptype;
-	int iwpm_err = 0;
 
 
 	if ((conn_param->ord > cur_max_read_depth(dev)) ||
 	if ((conn_param->ord > cur_max_read_depth(dev)) ||
 	    (conn_param->ird > cur_max_read_depth(dev))) {
 	    (conn_param->ird > cur_max_read_depth(dev))) {
@@ -3144,47 +3091,17 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	}
 	}
 	insert_handle(dev, &dev->atid_idr, ep, ep->atid);
 	insert_handle(dev, &dev->atid_idr, ep, ep->atid);
 
 
-	memcpy(&ep->com.local_addr, &cm_id->local_addr,
+	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
 	       sizeof(ep->com.local_addr));
 	       sizeof(ep->com.local_addr));
-	memcpy(&ep->com.remote_addr, &cm_id->remote_addr,
+	memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
 	       sizeof(ep->com.remote_addr));
 	       sizeof(ep->com.remote_addr));
 
 
-	/* No port mapper available, go with the specified peer information */
-	memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
-	       sizeof(ep->com.mapped_local_addr));
-	memcpy(&ep->com.mapped_remote_addr, &cm_id->remote_addr,
-	       sizeof(ep->com.mapped_remote_addr));
-
-	c4iw_form_reg_msg(dev, &pm_reg_msg);
-	iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
-	if (iwpm_err) {
-		PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
-			__func__, iwpm_err);
-	}
-	if (iwpm_valid_pid() && !iwpm_err) {
-		c4iw_form_pm_msg(ep, &pm_msg);
-		iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_C4IW);
-		if (iwpm_err)
-			PDBG("%s: Port Mapper query fail (err = %d).\n",
-				__func__, iwpm_err);
-		else
-			c4iw_record_pm_msg(ep, &pm_msg);
-	}
-	if (iwpm_create_mapinfo(&ep->com.local_addr,
-				&ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
-		iwpm_remove_mapping(&ep->com.local_addr, RDMA_NL_C4IW);
-		err = -ENOMEM;
-		goto fail1;
-	}
-	print_addr(&ep->com, __func__, "add_query/create_mapinfo");
-	set_bit(RELEASE_MAPINFO, &ep->com.flags);
-
-	laddr = (struct sockaddr_in *)&ep->com.mapped_local_addr;
-	raddr = (struct sockaddr_in *)&ep->com.mapped_remote_addr;
-	laddr6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
-	raddr6 = (struct sockaddr_in6 *) &ep->com.mapped_remote_addr;
+	laddr = (struct sockaddr_in *)&ep->com.local_addr;
+	raddr = (struct sockaddr_in *)&ep->com.remote_addr;
+	laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr;
+	raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr;
 
 
-	if (cm_id->remote_addr.ss_family == AF_INET) {
+	if (cm_id->m_remote_addr.ss_family == AF_INET) {
 		iptype = 4;
 		iptype = 4;
 		ra = (__u8 *)&raddr->sin_addr;
 		ra = (__u8 *)&raddr->sin_addr;
 
 
@@ -3203,7 +3120,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		     ra, ntohs(raddr->sin_port));
 		     ra, ntohs(raddr->sin_port));
 		ep->dst = find_route(dev, laddr->sin_addr.s_addr,
 		ep->dst = find_route(dev, laddr->sin_addr.s_addr,
 				     raddr->sin_addr.s_addr, laddr->sin_port,
 				     raddr->sin_addr.s_addr, laddr->sin_port,
-				     raddr->sin_port, 0);
+				     raddr->sin_port, cm_id->tos);
 	} else {
 	} else {
 		iptype = 6;
 		iptype = 6;
 		ra = (__u8 *)&raddr6->sin6_addr;
 		ra = (__u8 *)&raddr6->sin6_addr;
@@ -3234,7 +3151,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	}
 	}
 
 
 	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
 	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
-			ep->com.dev->rdev.lldi.adapter_type);
+			ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
 	if (err) {
 	if (err) {
 		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
 		printk(KERN_ERR MOD "%s - cannot alloc l2e.\n", __func__);
 		goto fail3;
 		goto fail3;
@@ -3245,7 +3162,7 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 		ep->l2t->idx);
 		ep->l2t->idx);
 
 
 	state_set(&ep->com, CONNECTING);
 	state_set(&ep->com, CONNECTING);
-	ep->tos = 0;
+	ep->tos = cm_id->tos;
 
 
 	/* send connect request to rnic */
 	/* send connect request to rnic */
 	err = send_connect(ep);
 	err = send_connect(ep);
@@ -3269,7 +3186,7 @@ static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
 {
 {
 	int err;
 	int err;
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
-				    &ep->com.mapped_local_addr;
+				    &ep->com.local_addr;
 
 
 	if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
 	if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
 		err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
 		err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
@@ -3302,7 +3219,7 @@ static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
 {
 {
 	int err;
 	int err;
 	struct sockaddr_in *sin = (struct sockaddr_in *)
 	struct sockaddr_in *sin = (struct sockaddr_in *)
-				  &ep->com.mapped_local_addr;
+				  &ep->com.local_addr;
 
 
 	if (dev->rdev.lldi.enable_fw_ofld_conn) {
 	if (dev->rdev.lldi.enable_fw_ofld_conn) {
 		do {
 		do {
@@ -3343,9 +3260,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 	int err = 0;
 	int err = 0;
 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
 	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
 	struct c4iw_listen_ep *ep;
 	struct c4iw_listen_ep *ep;
-	struct iwpm_dev_data pm_reg_msg;
-	struct iwpm_sa_data pm_msg;
-	int iwpm_err = 0;
 
 
 	might_sleep();
 	might_sleep();
 
 
@@ -3360,7 +3274,7 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 	ep->com.cm_id = cm_id;
 	ep->com.cm_id = cm_id;
 	ep->com.dev = dev;
 	ep->com.dev = dev;
 	ep->backlog = backlog;
 	ep->backlog = backlog;
-	memcpy(&ep->com.local_addr, &cm_id->local_addr,
+	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
 	       sizeof(ep->com.local_addr));
 	       sizeof(ep->com.local_addr));
 
 
 	/*
 	/*
@@ -3369,10 +3283,10 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 	if (dev->rdev.lldi.enable_fw_ofld_conn &&
 	if (dev->rdev.lldi.enable_fw_ofld_conn &&
 	    ep->com.local_addr.ss_family == AF_INET)
 	    ep->com.local_addr.ss_family == AF_INET)
 		ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
 		ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
-					     cm_id->local_addr.ss_family, ep);
+					     cm_id->m_local_addr.ss_family, ep);
 	else
 	else
 		ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
 		ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
-					    cm_id->local_addr.ss_family, ep);
+					    cm_id->m_local_addr.ss_family, ep);
 
 
 	if (ep->stid == -1) {
 	if (ep->stid == -1) {
 		printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
 		printk(KERN_ERR MOD "%s - cannot alloc stid.\n", __func__);
@@ -3381,36 +3295,9 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 	}
 	}
 	insert_handle(dev, &dev->stid_idr, ep, ep->stid);
 	insert_handle(dev, &dev->stid_idr, ep, ep->stid);
 
 
-	/* No port mapper available, go with the specified info */
-	memcpy(&ep->com.mapped_local_addr, &cm_id->local_addr,
-	       sizeof(ep->com.mapped_local_addr));
-
-	c4iw_form_reg_msg(dev, &pm_reg_msg);
-	iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_C4IW);
-	if (iwpm_err) {
-		PDBG("%s: Port Mapper reg pid fail (err = %d).\n",
-			__func__, iwpm_err);
-	}
-	if (iwpm_valid_pid() && !iwpm_err) {
-		memcpy(&pm_msg.loc_addr, &ep->com.local_addr,
-				sizeof(ep->com.local_addr));
-		iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_C4IW);
-		if (iwpm_err)
-			PDBG("%s: Port Mapper query fail (err = %d).\n",
-				__func__, iwpm_err);
-		else
-			memcpy(&ep->com.mapped_local_addr,
-				&pm_msg.mapped_loc_addr,
-				sizeof(ep->com.mapped_local_addr));
-	}
-	if (iwpm_create_mapinfo(&ep->com.local_addr,
-				&ep->com.mapped_local_addr, RDMA_NL_C4IW)) {
-		err = -ENOMEM;
-		goto fail3;
-	}
-	print_addr(&ep->com, __func__, "add_mapping/create_mapinfo");
+	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
+	       sizeof(ep->com.local_addr));
 
 
-	set_bit(RELEASE_MAPINFO, &ep->com.flags);
 	state_set(&ep->com, LISTEN);
 	state_set(&ep->com, LISTEN);
 	if (ep->com.local_addr.ss_family == AF_INET)
 	if (ep->com.local_addr.ss_family == AF_INET)
 		err = create_server4(dev, ep);
 		err = create_server4(dev, ep);
@@ -3421,7 +3308,6 @@ int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
 		goto out;
 		goto out;
 	}
 	}
 
 
-fail3:
 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
 	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
 			ep->com.local_addr.ss_family);
 			ep->com.local_addr.ss_family);
 fail2:
 fail2:
@@ -3456,7 +3342,7 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
 			goto done;
 			goto done;
 		err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
 		err = c4iw_wait_for_reply(&ep->com.dev->rdev, &ep->com.wr_wait,
 					  0, 0, __func__);
 					  0, 0, __func__);
-		sin6 = (struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+		sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 	}
 	}
@@ -3580,7 +3466,7 @@ static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
 	state_set(&ep->com, DEAD);
 	state_set(&ep->com, DEAD);
 	if (ep->com.remote_addr.ss_family == AF_INET6) {
 	if (ep->com.remote_addr.ss_family == AF_INET6) {
 		struct sockaddr_in6 *sin6 =
 		struct sockaddr_in6 *sin6 =
-			(struct sockaddr_in6 *)&ep->com.mapped_local_addr;
+			(struct sockaddr_in6 *)&ep->com.local_addr;
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
 	}
 	}

+ 8 - 1
drivers/infiniband/hw/cxgb4/cq.c

@@ -815,8 +815,15 @@ static int c4iw_poll_cq_one(struct c4iw_cq *chp, struct ib_wc *wc)
 		}
 		}
 	}
 	}
 out:
 out:
-	if (wq)
+	if (wq) {
+		if (unlikely(qhp->attr.state != C4IW_QP_STATE_RTS)) {
+			if (t4_sq_empty(wq))
+				complete(&qhp->sq_drained);
+			if (t4_rq_empty(wq))
+				complete(&qhp->rq_drained);
+		}
 		spin_unlock(&qhp->lock);
 		spin_unlock(&qhp->lock);
+	}
 	return ret;
 	return ret;
 }
 }
 
 

+ 22 - 50
drivers/infiniband/hw/cxgb4/device.c

@@ -87,17 +87,6 @@ struct c4iw_debugfs_data {
 	int pos;
 	int pos;
 };
 };
 
 
-/* registered cxgb4 netlink callbacks */
-static struct ibnl_client_cbs c4iw_nl_cb_table[] = {
-	[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
-	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
-	[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
-	[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
-	[RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
-	[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
-	[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
-};
-
 static int count_idrs(int id, void *p, void *data)
 static int count_idrs(int id, void *p, void *data)
 {
 {
 	int *countp = data;
 	int *countp = data;
@@ -242,13 +231,13 @@ static int dump_qp(int id, void *p, void *data)
 	if (qp->ep) {
 	if (qp->ep) {
 		if (qp->ep->com.local_addr.ss_family == AF_INET) {
 		if (qp->ep->com.local_addr.ss_family == AF_INET) {
 			struct sockaddr_in *lsin = (struct sockaddr_in *)
 			struct sockaddr_in *lsin = (struct sockaddr_in *)
-				&qp->ep->com.local_addr;
+				&qp->ep->com.cm_id->local_addr;
 			struct sockaddr_in *rsin = (struct sockaddr_in *)
 			struct sockaddr_in *rsin = (struct sockaddr_in *)
-				&qp->ep->com.remote_addr;
+				&qp->ep->com.cm_id->remote_addr;
 			struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 			struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
-				&qp->ep->com.mapped_local_addr;
+				&qp->ep->com.cm_id->m_local_addr;
 			struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
 			struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
-				&qp->ep->com.mapped_remote_addr;
+				&qp->ep->com.cm_id->m_remote_addr;
 
 
 			cc = snprintf(qpd->buf + qpd->pos, space,
 			cc = snprintf(qpd->buf + qpd->pos, space,
 				      "rc qp sq id %u rq id %u state %u "
 				      "rc qp sq id %u rq id %u state %u "
@@ -264,15 +253,15 @@ static int dump_qp(int id, void *p, void *data)
 				      ntohs(mapped_rsin->sin_port));
 				      ntohs(mapped_rsin->sin_port));
 		} else {
 		} else {
 			struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 			struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
-				&qp->ep->com.local_addr;
+				&qp->ep->com.cm_id->local_addr;
 			struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 			struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
-				&qp->ep->com.remote_addr;
+				&qp->ep->com.cm_id->remote_addr;
 			struct sockaddr_in6 *mapped_lsin6 =
 			struct sockaddr_in6 *mapped_lsin6 =
 				(struct sockaddr_in6 *)
 				(struct sockaddr_in6 *)
-				&qp->ep->com.mapped_local_addr;
+				&qp->ep->com.cm_id->m_local_addr;
 			struct sockaddr_in6 *mapped_rsin6 =
 			struct sockaddr_in6 *mapped_rsin6 =
 				(struct sockaddr_in6 *)
 				(struct sockaddr_in6 *)
-				&qp->ep->com.mapped_remote_addr;
+				&qp->ep->com.cm_id->m_remote_addr;
 
 
 			cc = snprintf(qpd->buf + qpd->pos, space,
 			cc = snprintf(qpd->buf + qpd->pos, space,
 				      "rc qp sq id %u rq id %u state %u "
 				      "rc qp sq id %u rq id %u state %u "
@@ -545,13 +534,13 @@ static int dump_ep(int id, void *p, void *data)
 
 
 	if (ep->com.local_addr.ss_family == AF_INET) {
 	if (ep->com.local_addr.ss_family == AF_INET) {
 		struct sockaddr_in *lsin = (struct sockaddr_in *)
 		struct sockaddr_in *lsin = (struct sockaddr_in *)
-			&ep->com.local_addr;
+			&ep->com.cm_id->local_addr;
 		struct sockaddr_in *rsin = (struct sockaddr_in *)
 		struct sockaddr_in *rsin = (struct sockaddr_in *)
-			&ep->com.remote_addr;
+			&ep->com.cm_id->remote_addr;
 		struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 		struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
-			&ep->com.mapped_local_addr;
+			&ep->com.cm_id->m_local_addr;
 		struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
 		struct sockaddr_in *mapped_rsin = (struct sockaddr_in *)
-			&ep->com.mapped_remote_addr;
+			&ep->com.cm_id->m_remote_addr;
 
 
 		cc = snprintf(epd->buf + epd->pos, space,
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
 			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
@@ -569,13 +558,13 @@ static int dump_ep(int id, void *p, void *data)
 			      ntohs(mapped_rsin->sin_port));
 			      ntohs(mapped_rsin->sin_port));
 	} else {
 	} else {
 		struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 		struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
-			&ep->com.local_addr;
+			&ep->com.cm_id->local_addr;
 		struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
 		struct sockaddr_in6 *rsin6 = (struct sockaddr_in6 *)
-			&ep->com.remote_addr;
+			&ep->com.cm_id->remote_addr;
 		struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
 		struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
-			&ep->com.mapped_local_addr;
+			&ep->com.cm_id->m_local_addr;
 		struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
 		struct sockaddr_in6 *mapped_rsin6 = (struct sockaddr_in6 *)
-			&ep->com.mapped_remote_addr;
+			&ep->com.cm_id->m_remote_addr;
 
 
 		cc = snprintf(epd->buf + epd->pos, space,
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
 			      "ep %p cm_id %p qp %p state %d flags 0x%lx "
@@ -610,9 +599,9 @@ static int dump_listen_ep(int id, void *p, void *data)
 
 
 	if (ep->com.local_addr.ss_family == AF_INET) {
 	if (ep->com.local_addr.ss_family == AF_INET) {
 		struct sockaddr_in *lsin = (struct sockaddr_in *)
 		struct sockaddr_in *lsin = (struct sockaddr_in *)
-			&ep->com.local_addr;
+			&ep->com.cm_id->local_addr;
 		struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
 		struct sockaddr_in *mapped_lsin = (struct sockaddr_in *)
-			&ep->com.mapped_local_addr;
+			&ep->com.cm_id->m_local_addr;
 
 
 		cc = snprintf(epd->buf + epd->pos, space,
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p state %d flags 0x%lx stid %d "
 			      "ep %p cm_id %p state %d flags 0x%lx stid %d "
@@ -623,9 +612,9 @@ static int dump_listen_ep(int id, void *p, void *data)
 			      ntohs(mapped_lsin->sin_port));
 			      ntohs(mapped_lsin->sin_port));
 	} else {
 	} else {
 		struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
 		struct sockaddr_in6 *lsin6 = (struct sockaddr_in6 *)
-			&ep->com.local_addr;
+			&ep->com.cm_id->local_addr;
 		struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
 		struct sockaddr_in6 *mapped_lsin6 = (struct sockaddr_in6 *)
-			&ep->com.mapped_local_addr;
+			&ep->com.cm_id->m_local_addr;
 
 
 		cc = snprintf(epd->buf + epd->pos, space,
 		cc = snprintf(epd->buf + epd->pos, space,
 			      "ep %p cm_id %p state %d flags 0x%lx stid %d "
 			      "ep %p cm_id %p state %d flags 0x%lx stid %d "
@@ -801,10 +790,9 @@ static int c4iw_rdev_open(struct c4iw_rdev *rdev)
 	     rdev->lldi.vr->qp.size,
 	     rdev->lldi.vr->qp.size,
 	     rdev->lldi.vr->cq.start,
 	     rdev->lldi.vr->cq.start,
 	     rdev->lldi.vr->cq.size);
 	     rdev->lldi.vr->cq.size);
-	PDBG("udb len 0x%x udb base %p db_reg %p gts_reg %p "
+	PDBG("udb %pR db_reg %p gts_reg %p "
 	     "qpmask 0x%x cqmask 0x%x\n",
 	     "qpmask 0x%x cqmask 0x%x\n",
-	     (unsigned)pci_resource_len(rdev->lldi.pdev, 2),
-	     (void *)pci_resource_start(rdev->lldi.pdev, 2),
+		&rdev->lldi.pdev->resource[2],
 	     rdev->lldi.db_reg, rdev->lldi.gts_reg,
 	     rdev->lldi.db_reg, rdev->lldi.gts_reg,
 	     rdev->qpmask, rdev->cqmask);
 	     rdev->qpmask, rdev->cqmask);
 
 
@@ -1506,20 +1494,6 @@ static int __init c4iw_init_module(void)
 		printk(KERN_WARNING MOD
 		printk(KERN_WARNING MOD
 		       "could not create debugfs entry, continuing\n");
 		       "could not create debugfs entry, continuing\n");
 
 
-	if (ibnl_add_client(RDMA_NL_C4IW, RDMA_NL_IWPM_NUM_OPS,
-			    c4iw_nl_cb_table))
-		pr_err("%s[%u]: Failed to add netlink callback\n"
-		       , __func__, __LINE__);
-
-	err = iwpm_init(RDMA_NL_C4IW);
-	if (err) {
-		pr_err("port mapper initialization failed with %d\n", err);
-		ibnl_remove_client(RDMA_NL_C4IW);
-		c4iw_cm_term();
-		debugfs_remove_recursive(c4iw_debugfs_root);
-		return err;
-	}
-
 	cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
 	cxgb4_register_uld(CXGB4_ULD_RDMA, &c4iw_uld_info);
 
 
 	return 0;
 	return 0;
@@ -1537,8 +1511,6 @@ static void __exit c4iw_exit_module(void)
 	}
 	}
 	mutex_unlock(&dev_mutex);
 	mutex_unlock(&dev_mutex);
 	cxgb4_unregister_uld(CXGB4_ULD_RDMA);
 	cxgb4_unregister_uld(CXGB4_ULD_RDMA);
-	iwpm_exit(RDMA_NL_C4IW);
-	ibnl_remove_client(RDMA_NL_C4IW);
 	c4iw_cm_term();
 	c4iw_cm_term();
 	debugfs_remove_recursive(c4iw_debugfs_root);
 	debugfs_remove_recursive(c4iw_debugfs_root);
 }
 }

+ 6 - 43
drivers/infiniband/hw/cxgb4/iw_cxgb4.h

@@ -476,6 +476,8 @@ struct c4iw_qp {
 	wait_queue_head_t wait;
 	wait_queue_head_t wait;
 	struct timer_list timer;
 	struct timer_list timer;
 	int sq_sig_all;
 	int sq_sig_all;
+	struct completion rq_drained;
+	struct completion sq_drained;
 };
 };
 
 
 static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
 static inline struct c4iw_qp *to_c4iw_qp(struct ib_qp *ibqp)
@@ -753,7 +755,6 @@ enum c4iw_ep_flags {
 	CLOSE_SENT		= 3,
 	CLOSE_SENT		= 3,
 	TIMEOUT                 = 4,
 	TIMEOUT                 = 4,
 	QP_REFERENCED           = 5,
 	QP_REFERENCED           = 5,
-	RELEASE_MAPINFO		= 6,
 };
 };
 
 
 enum c4iw_ep_history {
 enum c4iw_ep_history {
@@ -790,8 +791,6 @@ struct c4iw_ep_common {
 	struct mutex mutex;
 	struct mutex mutex;
 	struct sockaddr_storage local_addr;
 	struct sockaddr_storage local_addr;
 	struct sockaddr_storage remote_addr;
 	struct sockaddr_storage remote_addr;
-	struct sockaddr_storage mapped_local_addr;
-	struct sockaddr_storage mapped_remote_addr;
 	struct c4iw_wr_wait wr_wait;
 	struct c4iw_wr_wait wr_wait;
 	unsigned long flags;
 	unsigned long flags;
 	unsigned long history;
 	unsigned long history;
@@ -843,45 +842,6 @@ struct c4iw_ep {
 	struct c4iw_ep_stats stats;
 	struct c4iw_ep_stats stats;
 };
 };
 
 
-static inline void print_addr(struct c4iw_ep_common *epc, const char *func,
-			      const char *msg)
-{
-
-#define SINA(a) (&(((struct sockaddr_in *)(a))->sin_addr.s_addr))
-#define SINP(a) ntohs(((struct sockaddr_in *)(a))->sin_port)
-#define SIN6A(a) (&(((struct sockaddr_in6 *)(a))->sin6_addr))
-#define SIN6P(a) ntohs(((struct sockaddr_in6 *)(a))->sin6_port)
-
-	if (c4iw_debug) {
-		switch (epc->local_addr.ss_family) {
-		case AF_INET:
-			PDBG("%s %s %pI4:%u/%u <-> %pI4:%u/%u\n",
-			     func, msg, SINA(&epc->local_addr),
-			     SINP(&epc->local_addr),
-			     SINP(&epc->mapped_local_addr),
-			     SINA(&epc->remote_addr),
-			     SINP(&epc->remote_addr),
-			     SINP(&epc->mapped_remote_addr));
-			break;
-		case AF_INET6:
-			PDBG("%s %s %pI6:%u/%u <-> %pI6:%u/%u\n",
-			     func, msg, SIN6A(&epc->local_addr),
-			     SIN6P(&epc->local_addr),
-			     SIN6P(&epc->mapped_local_addr),
-			     SIN6A(&epc->remote_addr),
-			     SIN6P(&epc->remote_addr),
-			     SIN6P(&epc->mapped_remote_addr));
-			break;
-		default:
-			break;
-		}
-	}
-#undef SINA
-#undef SINP
-#undef SIN6A
-#undef SIN6P
-}
-
 static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
 static inline struct c4iw_ep *to_ep(struct iw_cm_id *cm_id)
 {
 {
 	return cm_id->provider_data;
 	return cm_id->provider_data;
@@ -961,7 +921,8 @@ int c4iw_map_mr_sg(struct ib_mr *ibmr,
 		   struct scatterlist *sg,
 		   struct scatterlist *sg,
 		   int sg_nents);
 		   int sg_nents);
 int c4iw_dealloc_mw(struct ib_mw *mw);
 int c4iw_dealloc_mw(struct ib_mw *mw);
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+			    struct ib_udata *udata);
 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
 struct ib_mr *c4iw_reg_user_mr(struct ib_pd *pd, u64 start,
 					   u64 length, u64 virt, int acc,
 					   u64 length, u64 virt, int acc,
 					   struct ib_udata *udata);
 					   struct ib_udata *udata);
@@ -1016,6 +977,8 @@ extern int c4iw_wr_log;
 extern int db_fc_threshold;
 extern int db_fc_threshold;
 extern int db_coalescing_threshold;
 extern int db_coalescing_threshold;
 extern int use_dsgl;
 extern int use_dsgl;
+void c4iw_drain_rq(struct ib_qp *qp);
+void c4iw_drain_sq(struct ib_qp *qp);
 
 
 
 
 #endif
 #endif

+ 8 - 4
drivers/infiniband/hw/cxgb4/mem.c

@@ -34,6 +34,7 @@
 #include <linux/moduleparam.h>
 #include <linux/moduleparam.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_umem.h>
 #include <linux/atomic.h>
 #include <linux/atomic.h>
+#include <rdma/ib_user_verbs.h>
 
 
 #include "iw_cxgb4.h"
 #include "iw_cxgb4.h"
 
 
@@ -552,7 +553,8 @@ err:
 	return ERR_PTR(err);
 	return ERR_PTR(err);
 }
 }
 
 
-struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
+struct ib_mw *c4iw_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+			    struct ib_udata *udata)
 {
 {
 	struct c4iw_dev *rhp;
 	struct c4iw_dev *rhp;
 	struct c4iw_pd *php;
 	struct c4iw_pd *php;
@@ -617,12 +619,14 @@ struct ib_mr *c4iw_alloc_mr(struct ib_pd *pd,
 	int ret = 0;
 	int ret = 0;
 	int length = roundup(max_num_sg * sizeof(u64), 32);
 	int length = roundup(max_num_sg * sizeof(u64), 32);
 
 
+	php = to_c4iw_pd(pd);
+	rhp = php->rhp;
+
 	if (mr_type != IB_MR_TYPE_MEM_REG ||
 	if (mr_type != IB_MR_TYPE_MEM_REG ||
-	    max_num_sg > t4_max_fr_depth(use_dsgl))
+	    max_num_sg > t4_max_fr_depth(&rhp->rdev.lldi.ulptx_memwrite_dsgl &&
+					 use_dsgl))
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 
 
-	php = to_c4iw_pd(pd);
-	rhp = php->rhp;
 	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
 	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
 	if (!mhp) {
 	if (!mhp) {
 		ret = -ENOMEM;
 		ret = -ENOMEM;

+ 4 - 1
drivers/infiniband/hw/cxgb4/provider.c

@@ -339,7 +339,8 @@ static int c4iw_query_device(struct ib_device *ibdev, struct ib_device_attr *pro
 	props->max_mr = c4iw_num_stags(&dev->rdev);
 	props->max_mr = c4iw_num_stags(&dev->rdev);
 	props->max_pd = T4_MAX_NUM_PD;
 	props->max_pd = T4_MAX_NUM_PD;
 	props->local_ca_ack_delay = 0;
 	props->local_ca_ack_delay = 0;
-	props->max_fast_reg_page_list_len = t4_max_fr_depth(use_dsgl);
+	props->max_fast_reg_page_list_len =
+		t4_max_fr_depth(dev->rdev.lldi.ulptx_memwrite_dsgl && use_dsgl);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -564,6 +565,8 @@ int c4iw_register_device(struct c4iw_dev *dev)
 	dev->ibdev.get_protocol_stats = c4iw_get_mib;
 	dev->ibdev.get_protocol_stats = c4iw_get_mib;
 	dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
 	dev->ibdev.uverbs_abi_ver = C4IW_UVERBS_ABI_VERSION;
 	dev->ibdev.get_port_immutable = c4iw_port_immutable;
 	dev->ibdev.get_port_immutable = c4iw_port_immutable;
+	dev->ibdev.drain_sq = c4iw_drain_sq;
+	dev->ibdev.drain_rq = c4iw_drain_rq;
 
 
 	dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
 	dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
 	if (!dev->ibdev.iwcm)
 	if (!dev->ibdev.iwcm)

+ 62 - 45
drivers/infiniband/hw/cxgb4/qp.c

@@ -606,7 +606,7 @@ static int build_rdma_recv(struct c4iw_qp *qhp, union t4_recv_wr *wqe,
 }
 }
 
 
 static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
 static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
-			struct ib_reg_wr *wr, u8 *len16, u8 t5dev)
+			struct ib_reg_wr *wr, u8 *len16, bool dsgl_supported)
 {
 {
 	struct c4iw_mr *mhp = to_c4iw_mr(wr->mr);
 	struct c4iw_mr *mhp = to_c4iw_mr(wr->mr);
 	struct fw_ri_immd *imdp;
 	struct fw_ri_immd *imdp;
@@ -615,7 +615,7 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
 	int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
 	int pbllen = roundup(mhp->mpl_len * sizeof(u64), 32);
 	int rem;
 	int rem;
 
 
-	if (mhp->mpl_len > t4_max_fr_depth(use_dsgl))
+	if (mhp->mpl_len > t4_max_fr_depth(dsgl_supported && use_dsgl))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	wqe->fr.qpbinde_to_dcacpu = 0;
 	wqe->fr.qpbinde_to_dcacpu = 0;
@@ -629,7 +629,7 @@ static int build_memreg(struct t4_sq *sq, union t4_wr *wqe,
 	wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
 	wqe->fr.va_lo_fbo = cpu_to_be32(mhp->ibmr.iova &
 					0xffffffff);
 					0xffffffff);
 
 
-	if (t5dev && use_dsgl && (pbllen > max_fr_immd)) {
+	if (dsgl_supported && use_dsgl && (pbllen > max_fr_immd)) {
 		struct fw_ri_dsgl *sglp;
 		struct fw_ri_dsgl *sglp;
 
 
 		for (i = 0; i < mhp->mpl_len; i++)
 		for (i = 0; i < mhp->mpl_len; i++)
@@ -808,9 +808,7 @@ int c4iw_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			fw_opcode = FW_RI_FR_NSMR_WR;
 			fw_opcode = FW_RI_FR_NSMR_WR;
 			swsqe->opcode = FW_RI_FAST_REGISTER;
 			swsqe->opcode = FW_RI_FAST_REGISTER;
 			err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16,
 			err = build_memreg(&qhp->wq.sq, wqe, reg_wr(wr), &len16,
-					   is_t5(
-					   qhp->rhp->rdev.lldi.adapter_type) ?
-					   1 : 0);
+				qhp->rhp->rdev.lldi.ulptx_memwrite_dsgl);
 			break;
 			break;
 		case IB_WR_LOCAL_INV:
 		case IB_WR_LOCAL_INV:
 			if (wr->send_flags & IB_SEND_FENCE)
 			if (wr->send_flags & IB_SEND_FENCE)
@@ -1621,7 +1619,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	unsigned int sqsize, rqsize;
 	unsigned int sqsize, rqsize;
 	struct c4iw_ucontext *ucontext;
 	struct c4iw_ucontext *ucontext;
 	int ret;
 	int ret;
-	struct c4iw_mm_entry *mm1, *mm2, *mm3, *mm4, *mm5 = NULL;
+	struct c4iw_mm_entry *sq_key_mm, *rq_key_mm = NULL, *sq_db_key_mm;
+	struct c4iw_mm_entry *rq_db_key_mm = NULL, *ma_sync_key_mm = NULL;
 
 
 	PDBG("%s ib_pd %p\n", __func__, pd);
 	PDBG("%s ib_pd %p\n", __func__, pd);
 
 
@@ -1697,6 +1696,8 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	qhp->attr.max_ird = 0;
 	qhp->attr.max_ird = 0;
 	qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
 	qhp->sq_sig_all = attrs->sq_sig_type == IB_SIGNAL_ALL_WR;
 	spin_lock_init(&qhp->lock);
 	spin_lock_init(&qhp->lock);
+	init_completion(&qhp->sq_drained);
+	init_completion(&qhp->rq_drained);
 	mutex_init(&qhp->mutex);
 	mutex_init(&qhp->mutex);
 	init_waitqueue_head(&qhp->wait);
 	init_waitqueue_head(&qhp->wait);
 	atomic_set(&qhp->refcnt, 1);
 	atomic_set(&qhp->refcnt, 1);
@@ -1706,29 +1707,30 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		goto err2;
 		goto err2;
 
 
 	if (udata) {
 	if (udata) {
-		mm1 = kmalloc(sizeof *mm1, GFP_KERNEL);
-		if (!mm1) {
+		sq_key_mm = kmalloc(sizeof(*sq_key_mm), GFP_KERNEL);
+		if (!sq_key_mm) {
 			ret = -ENOMEM;
 			ret = -ENOMEM;
 			goto err3;
 			goto err3;
 		}
 		}
-		mm2 = kmalloc(sizeof *mm2, GFP_KERNEL);
-		if (!mm2) {
+		rq_key_mm = kmalloc(sizeof(*rq_key_mm), GFP_KERNEL);
+		if (!rq_key_mm) {
 			ret = -ENOMEM;
 			ret = -ENOMEM;
 			goto err4;
 			goto err4;
 		}
 		}
-		mm3 = kmalloc(sizeof *mm3, GFP_KERNEL);
-		if (!mm3) {
+		sq_db_key_mm = kmalloc(sizeof(*sq_db_key_mm), GFP_KERNEL);
+		if (!sq_db_key_mm) {
 			ret = -ENOMEM;
 			ret = -ENOMEM;
 			goto err5;
 			goto err5;
 		}
 		}
-		mm4 = kmalloc(sizeof *mm4, GFP_KERNEL);
-		if (!mm4) {
+		rq_db_key_mm = kmalloc(sizeof(*rq_db_key_mm), GFP_KERNEL);
+		if (!rq_db_key_mm) {
 			ret = -ENOMEM;
 			ret = -ENOMEM;
 			goto err6;
 			goto err6;
 		}
 		}
 		if (t4_sq_onchip(&qhp->wq.sq)) {
 		if (t4_sq_onchip(&qhp->wq.sq)) {
-			mm5 = kmalloc(sizeof *mm5, GFP_KERNEL);
-			if (!mm5) {
+			ma_sync_key_mm = kmalloc(sizeof(*ma_sync_key_mm),
+						 GFP_KERNEL);
+			if (!ma_sync_key_mm) {
 				ret = -ENOMEM;
 				ret = -ENOMEM;
 				goto err7;
 				goto err7;
 			}
 			}
@@ -1743,7 +1745,7 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		uresp.rq_size = qhp->wq.rq.size;
 		uresp.rq_size = qhp->wq.rq.size;
 		uresp.rq_memsize = qhp->wq.rq.memsize;
 		uresp.rq_memsize = qhp->wq.rq.memsize;
 		spin_lock(&ucontext->mmap_lock);
 		spin_lock(&ucontext->mmap_lock);
-		if (mm5) {
+		if (ma_sync_key_mm) {
 			uresp.ma_sync_key = ucontext->key;
 			uresp.ma_sync_key = ucontext->key;
 			ucontext->key += PAGE_SIZE;
 			ucontext->key += PAGE_SIZE;
 		} else {
 		} else {
@@ -1761,28 +1763,29 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
 		ret = ib_copy_to_udata(udata, &uresp, sizeof uresp);
 		if (ret)
 		if (ret)
 			goto err8;
 			goto err8;
-		mm1->key = uresp.sq_key;
-		mm1->addr = qhp->wq.sq.phys_addr;
-		mm1->len = PAGE_ALIGN(qhp->wq.sq.memsize);
-		insert_mmap(ucontext, mm1);
-		mm2->key = uresp.rq_key;
-		mm2->addr = virt_to_phys(qhp->wq.rq.queue);
-		mm2->len = PAGE_ALIGN(qhp->wq.rq.memsize);
-		insert_mmap(ucontext, mm2);
-		mm3->key = uresp.sq_db_gts_key;
-		mm3->addr = (__force unsigned long)qhp->wq.sq.bar2_pa;
-		mm3->len = PAGE_SIZE;
-		insert_mmap(ucontext, mm3);
-		mm4->key = uresp.rq_db_gts_key;
-		mm4->addr = (__force unsigned long)qhp->wq.rq.bar2_pa;
-		mm4->len = PAGE_SIZE;
-		insert_mmap(ucontext, mm4);
-		if (mm5) {
-			mm5->key = uresp.ma_sync_key;
-			mm5->addr = (pci_resource_start(rhp->rdev.lldi.pdev, 0)
-				    + PCIE_MA_SYNC_A) & PAGE_MASK;
-			mm5->len = PAGE_SIZE;
-			insert_mmap(ucontext, mm5);
+		sq_key_mm->key = uresp.sq_key;
+		sq_key_mm->addr = qhp->wq.sq.phys_addr;
+		sq_key_mm->len = PAGE_ALIGN(qhp->wq.sq.memsize);
+		insert_mmap(ucontext, sq_key_mm);
+		rq_key_mm->key = uresp.rq_key;
+		rq_key_mm->addr = virt_to_phys(qhp->wq.rq.queue);
+		rq_key_mm->len = PAGE_ALIGN(qhp->wq.rq.memsize);
+		insert_mmap(ucontext, rq_key_mm);
+		sq_db_key_mm->key = uresp.sq_db_gts_key;
+		sq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.sq.bar2_pa;
+		sq_db_key_mm->len = PAGE_SIZE;
+		insert_mmap(ucontext, sq_db_key_mm);
+		rq_db_key_mm->key = uresp.rq_db_gts_key;
+		rq_db_key_mm->addr = (u64)(unsigned long)qhp->wq.rq.bar2_pa;
+		rq_db_key_mm->len = PAGE_SIZE;
+		insert_mmap(ucontext, rq_db_key_mm);
+		if (ma_sync_key_mm) {
+			ma_sync_key_mm->key = uresp.ma_sync_key;
+			ma_sync_key_mm->addr =
+				(pci_resource_start(rhp->rdev.lldi.pdev, 0) +
+				PCIE_MA_SYNC_A) & PAGE_MASK;
+			ma_sync_key_mm->len = PAGE_SIZE;
+			insert_mmap(ucontext, ma_sync_key_mm);
 		}
 		}
 	}
 	}
 	qhp->ibqp.qp_num = qhp->wq.sq.qid;
 	qhp->ibqp.qp_num = qhp->wq.sq.qid;
@@ -1795,15 +1798,15 @@ struct ib_qp *c4iw_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *attrs,
 	     qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
 	     qhp->wq.rq.memsize, attrs->cap.max_recv_wr);
 	return &qhp->ibqp;
 	return &qhp->ibqp;
 err8:
 err8:
-	kfree(mm5);
+	kfree(ma_sync_key_mm);
 err7:
 err7:
-	kfree(mm4);
+	kfree(rq_db_key_mm);
 err6:
 err6:
-	kfree(mm3);
+	kfree(sq_db_key_mm);
 err5:
 err5:
-	kfree(mm2);
+	kfree(rq_key_mm);
 err4:
 err4:
-	kfree(mm1);
+	kfree(sq_key_mm);
 err3:
 err3:
 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
 	remove_handle(rhp, &rhp->qpidr, qhp->wq.sq.qid);
 err2:
 err2:
@@ -1888,3 +1891,17 @@ int c4iw_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
 	init_attr->sq_sig_type = qhp->sq_sig_all ? IB_SIGNAL_ALL_WR : 0;
 	return 0;
 	return 0;
 }
 }
+
+void c4iw_drain_sq(struct ib_qp *ibqp)
+{
+	struct c4iw_qp *qp = to_c4iw_qp(ibqp);
+
+	wait_for_completion(&qp->sq_drained);
+}
+
+void c4iw_drain_rq(struct ib_qp *ibqp)
+{
+	struct c4iw_qp *qp = to_c4iw_qp(ibqp);
+
+	wait_for_completion(&qp->rq_drained);
+}

+ 3 - 3
drivers/infiniband/hw/mlx4/alias_GUID.c

@@ -310,7 +310,7 @@ static void aliasguid_query_handler(int status,
 	if (status) {
 	if (status) {
 		pr_debug("(port: %d) failed: status = %d\n",
 		pr_debug("(port: %d) failed: status = %d\n",
 			 cb_ctx->port, status);
 			 cb_ctx->port, status);
-		rec->time_to_run = ktime_get_real_ns() + 1 * NSEC_PER_SEC;
+		rec->time_to_run = ktime_get_boot_ns() + 1 * NSEC_PER_SEC;
 		goto out;
 		goto out;
 	}
 	}
 
 
@@ -416,7 +416,7 @@ next_entry:
 			 be64_to_cpu((__force __be64)rec->guid_indexes),
 			 be64_to_cpu((__force __be64)rec->guid_indexes),
 			 be64_to_cpu((__force __be64)applied_guid_indexes),
 			 be64_to_cpu((__force __be64)applied_guid_indexes),
 			 be64_to_cpu((__force __be64)declined_guid_indexes));
 			 be64_to_cpu((__force __be64)declined_guid_indexes));
-		rec->time_to_run = ktime_get_real_ns() +
+		rec->time_to_run = ktime_get_boot_ns() +
 			resched_delay_sec * NSEC_PER_SEC;
 			resched_delay_sec * NSEC_PER_SEC;
 	} else {
 	} else {
 		rec->status = MLX4_GUID_INFO_STATUS_SET;
 		rec->status = MLX4_GUID_INFO_STATUS_SET;
@@ -708,7 +708,7 @@ static int get_low_record_time_index(struct mlx4_ib_dev *dev, u8 port,
 		}
 		}
 	}
 	}
 	if (resched_delay_sec) {
 	if (resched_delay_sec) {
-		u64 curr_time = ktime_get_real_ns();
+		u64 curr_time = ktime_get_boot_ns();
 
 
 		*resched_delay_sec = (low_record_time < curr_time) ? 0 :
 		*resched_delay_sec = (low_record_time < curr_time) ? 0 :
 			div_u64((low_record_time - curr_time), NSEC_PER_SEC);
 			div_u64((low_record_time - curr_time), NSEC_PER_SEC);

+ 69 - 3
drivers/infiniband/hw/mlx4/main.c

@@ -1643,6 +1643,56 @@ static int mlx4_ib_tunnel_steer_add(struct ib_qp *qp, struct ib_flow_attr *flow_
 	return err;
 	return err;
 }
 }
 
 
+static int mlx4_ib_add_dont_trap_rule(struct mlx4_dev *dev,
+				      struct ib_flow_attr *flow_attr,
+				      enum mlx4_net_trans_promisc_mode *type)
+{
+	int err = 0;
+
+	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER) ||
+	    (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC) ||
+	    (flow_attr->num_of_specs > 1) || (flow_attr->priority != 0)) {
+		return -EOPNOTSUPP;
+	}
+
+	if (flow_attr->num_of_specs == 0) {
+		type[0] = MLX4_FS_MC_SNIFFER;
+		type[1] = MLX4_FS_UC_SNIFFER;
+	} else {
+		union ib_flow_spec *ib_spec;
+
+		ib_spec = (union ib_flow_spec *)(flow_attr + 1);
+		if (ib_spec->type !=  IB_FLOW_SPEC_ETH)
+			return -EINVAL;
+
+		/* if all is zero than MC and UC */
+		if (is_zero_ether_addr(ib_spec->eth.mask.dst_mac)) {
+			type[0] = MLX4_FS_MC_SNIFFER;
+			type[1] = MLX4_FS_UC_SNIFFER;
+		} else {
+			u8 mac[ETH_ALEN] = {ib_spec->eth.mask.dst_mac[0] ^ 0x01,
+					    ib_spec->eth.mask.dst_mac[1],
+					    ib_spec->eth.mask.dst_mac[2],
+					    ib_spec->eth.mask.dst_mac[3],
+					    ib_spec->eth.mask.dst_mac[4],
+					    ib_spec->eth.mask.dst_mac[5]};
+
+			/* Above xor was only on MC bit, non empty mask is valid
+			 * only if this bit is set and rest are zero.
+			 */
+			if (!is_zero_ether_addr(&mac[0]))
+				return -EINVAL;
+
+			if (is_multicast_ether_addr(ib_spec->eth.val.dst_mac))
+				type[0] = MLX4_FS_MC_SNIFFER;
+			else
+				type[0] = MLX4_FS_UC_SNIFFER;
+		}
+	}
+
+	return err;
+}
+
 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 				    struct ib_flow_attr *flow_attr,
 				    struct ib_flow_attr *flow_attr,
 				    int domain)
 				    int domain)
@@ -1653,6 +1703,10 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 	struct mlx4_dev *dev = (to_mdev(qp->device))->dev;
 	int is_bonded = mlx4_is_bonded(dev);
 	int is_bonded = mlx4_is_bonded(dev);
 
 
+	if ((flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) &&
+	    (flow_attr->type != IB_FLOW_ATTR_NORMAL))
+		return ERR_PTR(-EOPNOTSUPP);
+
 	memset(type, 0, sizeof(type));
 	memset(type, 0, sizeof(type));
 
 
 	mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
 	mflow = kzalloc(sizeof(*mflow), GFP_KERNEL);
@@ -1663,7 +1717,19 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 
 
 	switch (flow_attr->type) {
 	switch (flow_attr->type) {
 	case IB_FLOW_ATTR_NORMAL:
 	case IB_FLOW_ATTR_NORMAL:
-		type[0] = MLX4_FS_REGULAR;
+		/* If dont trap flag (continue match) is set, under specific
+		 * condition traffic be replicated to given qp,
+		 * without stealing it
+		 */
+		if (unlikely(flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)) {
+			err = mlx4_ib_add_dont_trap_rule(dev,
+							 flow_attr,
+							 type);
+			if (err)
+				goto err_free;
+		} else {
+			type[0] = MLX4_FS_REGULAR;
+		}
 		break;
 		break;
 
 
 	case IB_FLOW_ATTR_ALL_DEFAULT:
 	case IB_FLOW_ATTR_ALL_DEFAULT:
@@ -1675,8 +1741,8 @@ static struct ib_flow *mlx4_ib_create_flow(struct ib_qp *qp,
 		break;
 		break;
 
 
 	case IB_FLOW_ATTR_SNIFFER:
 	case IB_FLOW_ATTR_SNIFFER:
-		type[0] = MLX4_FS_UC_SNIFFER;
-		type[1] = MLX4_FS_MC_SNIFFER;
+		type[0] = MLX4_FS_MIRROR_RX_PORT;
+		type[1] = MLX4_FS_MIRROR_SX_PORT;
 		break;
 		break;
 
 
 	default:
 	default:

+ 2 - 1
drivers/infiniband/hw/mlx4/mlx4_ib.h

@@ -711,7 +711,8 @@ struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata);
 				  struct ib_udata *udata);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
 int mlx4_ib_dereg_mr(struct ib_mr *mr);
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type);
+struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+			       struct ib_udata *udata);
 int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 int mlx4_ib_dealloc_mw(struct ib_mw *mw);
 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
 struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
 			       enum ib_mr_type mr_type,
 			       enum ib_mr_type mr_type,

+ 3 - 1
drivers/infiniband/hw/mlx4/mr.c

@@ -32,6 +32,7 @@
  */
  */
 
 
 #include <linux/slab.h>
 #include <linux/slab.h>
+#include <rdma/ib_user_verbs.h>
 
 
 #include "mlx4_ib.h"
 #include "mlx4_ib.h"
 
 
@@ -334,7 +335,8 @@ int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
 	return 0;
 	return 0;
 }
 }
 
 
-struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type)
+struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+			       struct ib_udata *udata)
 {
 {
 	struct mlx4_ib_dev *dev = to_mdev(pd->device);
 	struct mlx4_ib_dev *dev = to_mdev(pd->device);
 	struct mlx4_ib_mw *mw;
 	struct mlx4_ib_mw *mw;

+ 1 - 1
drivers/infiniband/hw/mlx5/Makefile

@@ -1,4 +1,4 @@
 obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
 
-mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
+mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o

+ 90 - 14
drivers/infiniband/hw/mlx5/cq.c

@@ -207,7 +207,10 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 		break;
 		break;
 	case MLX5_CQE_RESP_SEND:
 	case MLX5_CQE_RESP_SEND:
 		wc->opcode   = IB_WC_RECV;
 		wc->opcode   = IB_WC_RECV;
-		wc->wc_flags = 0;
+		wc->wc_flags = IB_WC_IP_CSUM_OK;
+		if (unlikely(!((cqe->hds_ip_ext & CQE_L3_OK) &&
+			       (cqe->hds_ip_ext & CQE_L4_OK))))
+			wc->wc_flags = 0;
 		break;
 		break;
 	case MLX5_CQE_RESP_SEND_IMM:
 	case MLX5_CQE_RESP_SEND_IMM:
 		wc->opcode	= IB_WC_RECV;
 		wc->opcode	= IB_WC_RECV;
@@ -431,7 +434,7 @@ static int mlx5_poll_one(struct mlx5_ib_cq *cq,
 	struct mlx5_core_qp *mqp;
 	struct mlx5_core_qp *mqp;
 	struct mlx5_ib_wq *wq;
 	struct mlx5_ib_wq *wq;
 	struct mlx5_sig_err_cqe *sig_err_cqe;
 	struct mlx5_sig_err_cqe *sig_err_cqe;
-	struct mlx5_core_mr *mmr;
+	struct mlx5_core_mkey *mmkey;
 	struct mlx5_ib_mr *mr;
 	struct mlx5_ib_mr *mr;
 	uint8_t opcode;
 	uint8_t opcode;
 	uint32_t qpn;
 	uint32_t qpn;
@@ -536,17 +539,17 @@ repoll:
 	case MLX5_CQE_SIG_ERR:
 	case MLX5_CQE_SIG_ERR:
 		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
 		sig_err_cqe = (struct mlx5_sig_err_cqe *)cqe64;
 
 
-		read_lock(&dev->mdev->priv.mr_table.lock);
-		mmr = __mlx5_mr_lookup(dev->mdev,
-				       mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
-		if (unlikely(!mmr)) {
-			read_unlock(&dev->mdev->priv.mr_table.lock);
+		read_lock(&dev->mdev->priv.mkey_table.lock);
+		mmkey = __mlx5_mr_lookup(dev->mdev,
+					 mlx5_base_mkey(be32_to_cpu(sig_err_cqe->mkey)));
+		if (unlikely(!mmkey)) {
+			read_unlock(&dev->mdev->priv.mkey_table.lock);
 			mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
 			mlx5_ib_warn(dev, "CQE@CQ %06x for unknown MR %6x\n",
 				     cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
 				     cq->mcq.cqn, be32_to_cpu(sig_err_cqe->mkey));
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 
 
-		mr = to_mibmr(mmr);
+		mr = to_mibmr(mmkey);
 		get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
 		get_sig_err_item(sig_err_cqe, &mr->sig->err_item);
 		mr->sig->sig_err_exists = true;
 		mr->sig->sig_err_exists = true;
 		mr->sig->sigerr_count++;
 		mr->sig->sigerr_count++;
@@ -558,25 +561,51 @@ repoll:
 			     mr->sig->err_item.expected,
 			     mr->sig->err_item.expected,
 			     mr->sig->err_item.actual);
 			     mr->sig->err_item.actual);
 
 
-		read_unlock(&dev->mdev->priv.mr_table.lock);
+		read_unlock(&dev->mdev->priv.mkey_table.lock);
 		goto repoll;
 		goto repoll;
 	}
 	}
 
 
 	return 0;
 	return 0;
 }
 }
 
 
+static int poll_soft_wc(struct mlx5_ib_cq *cq, int num_entries,
+			struct ib_wc *wc)
+{
+	struct mlx5_ib_dev *dev = to_mdev(cq->ibcq.device);
+	struct mlx5_ib_wc *soft_wc, *next;
+	int npolled = 0;
+
+	list_for_each_entry_safe(soft_wc, next, &cq->wc_list, list) {
+		if (npolled >= num_entries)
+			break;
+
+		mlx5_ib_dbg(dev, "polled software generated completion on CQ 0x%x\n",
+			    cq->mcq.cqn);
+
+		wc[npolled++] = soft_wc->wc;
+		list_del(&soft_wc->list);
+		kfree(soft_wc);
+	}
+
+	return npolled;
+}
+
 int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 {
 {
 	struct mlx5_ib_cq *cq = to_mcq(ibcq);
 	struct mlx5_ib_cq *cq = to_mcq(ibcq);
 	struct mlx5_ib_qp *cur_qp = NULL;
 	struct mlx5_ib_qp *cur_qp = NULL;
 	unsigned long flags;
 	unsigned long flags;
+	int soft_polled = 0;
 	int npolled;
 	int npolled;
 	int err = 0;
 	int err = 0;
 
 
 	spin_lock_irqsave(&cq->lock, flags);
 	spin_lock_irqsave(&cq->lock, flags);
 
 
-	for (npolled = 0; npolled < num_entries; npolled++) {
-		err = mlx5_poll_one(cq, &cur_qp, wc + npolled);
+	if (unlikely(!list_empty(&cq->wc_list)))
+		soft_polled = poll_soft_wc(cq, num_entries, wc);
+
+	for (npolled = 0; npolled < num_entries - soft_polled; npolled++) {
+		err = mlx5_poll_one(cq, &cur_qp, wc + soft_polled + npolled);
 		if (err)
 		if (err)
 			break;
 			break;
 	}
 	}
@@ -587,7 +616,7 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 	spin_unlock_irqrestore(&cq->lock, flags);
 	spin_unlock_irqrestore(&cq->lock, flags);
 
 
 	if (err == 0 || err == -EAGAIN)
 	if (err == 0 || err == -EAGAIN)
-		return npolled;
+		return soft_polled + npolled;
 	else
 	else
 		return err;
 		return err;
 }
 }
@@ -595,16 +624,27 @@ int mlx5_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 int mlx5_ib_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags)
 {
 {
 	struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
 	struct mlx5_core_dev *mdev = to_mdev(ibcq->device)->mdev;
+	struct mlx5_ib_cq *cq = to_mcq(ibcq);
 	void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
 	void __iomem *uar_page = mdev->priv.uuari.uars[0].map;
+	unsigned long irq_flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&cq->lock, irq_flags);
+	if (cq->notify_flags != IB_CQ_NEXT_COMP)
+		cq->notify_flags = flags & IB_CQ_SOLICITED_MASK;
 
 
-	mlx5_cq_arm(&to_mcq(ibcq)->mcq,
+	if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !list_empty(&cq->wc_list))
+		ret = 1;
+	spin_unlock_irqrestore(&cq->lock, irq_flags);
+
+	mlx5_cq_arm(&cq->mcq,
 		    (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
 		    (flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED ?
 		    MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
 		    MLX5_CQ_DB_REQ_NOT_SOL : MLX5_CQ_DB_REQ_NOT,
 		    uar_page,
 		    uar_page,
 		    MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
 		    MLX5_GET_DOORBELL_LOCK(&mdev->priv.cq_uar_lock),
 		    to_mcq(ibcq)->mcq.cons_index);
 		    to_mcq(ibcq)->mcq.cons_index);
 
 
-	return 0;
+	return ret;
 }
 }
 
 
 static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
 static int alloc_cq_buf(struct mlx5_ib_dev *dev, struct mlx5_ib_cq_buf *buf,
@@ -757,6 +797,14 @@ static void destroy_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq)
 	mlx5_db_free(dev->mdev, &cq->db);
 	mlx5_db_free(dev->mdev, &cq->db);
 }
 }
 
 
+static void notify_soft_wc_handler(struct work_struct *work)
+{
+	struct mlx5_ib_cq *cq = container_of(work, struct mlx5_ib_cq,
+					     notify_work);
+
+	cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
+}
+
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 				const struct ib_cq_init_attr *attr,
 				const struct ib_cq_init_attr *attr,
 				struct ib_ucontext *context,
 				struct ib_ucontext *context,
@@ -807,6 +855,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 				       &index, &inlen);
 				       &index, &inlen);
 		if (err)
 		if (err)
 			goto err_create;
 			goto err_create;
+
+		INIT_WORK(&cq->notify_work, notify_soft_wc_handler);
 	}
 	}
 
 
 	cq->cqe_size = cqe_size;
 	cq->cqe_size = cqe_size;
@@ -832,6 +882,8 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev,
 	cq->mcq.comp  = mlx5_ib_cq_comp;
 	cq->mcq.comp  = mlx5_ib_cq_comp;
 	cq->mcq.event = mlx5_ib_cq_event;
 	cq->mcq.event = mlx5_ib_cq_event;
 
 
+	INIT_LIST_HEAD(&cq->wc_list);
+
 	if (context)
 	if (context)
 		if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
 		if (ib_copy_to_udata(udata, &cq->mcq.cqn, sizeof(__u32))) {
 			err = -EFAULT;
 			err = -EFAULT;
@@ -1219,3 +1271,27 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq)
 	cq = to_mcq(ibcq);
 	cq = to_mcq(ibcq);
 	return cq->cqe_size;
 	return cq->cqe_size;
 }
 }
+
+/* Called from atomic context */
+int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc)
+{
+	struct mlx5_ib_wc *soft_wc;
+	struct mlx5_ib_cq *cq = to_mcq(ibcq);
+	unsigned long flags;
+
+	soft_wc = kmalloc(sizeof(*soft_wc), GFP_ATOMIC);
+	if (!soft_wc)
+		return -ENOMEM;
+
+	soft_wc->wc = *wc;
+	spin_lock_irqsave(&cq->lock, flags);
+	list_add_tail(&soft_wc->list, &cq->wc_list);
+	if (cq->notify_flags == IB_CQ_NEXT_COMP ||
+	    wc->status != IB_WC_SUCCESS) {
+		cq->notify_flags = 0;
+		schedule_work(&cq->notify_work);
+	}
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return 0;
+}

+ 548 - 0
drivers/infiniband/hw/mlx5/gsi.c

@@ -0,0 +1,548 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "mlx5_ib.h"
+
+struct mlx5_ib_gsi_wr {
+	struct ib_cqe cqe;
+	struct ib_wc wc;
+	int send_flags;
+	bool completed:1;
+};
+
+struct mlx5_ib_gsi_qp {
+	struct ib_qp ibqp;
+	struct ib_qp *rx_qp;
+	u8 port_num;
+	struct ib_qp_cap cap;
+	enum ib_sig_type sq_sig_type;
+	/* Serialize qp state modifications */
+	struct mutex mutex;
+	struct ib_cq *cq;
+	struct mlx5_ib_gsi_wr *outstanding_wrs;
+	u32 outstanding_pi, outstanding_ci;
+	int num_qps;
+	/* Protects access to the tx_qps. Post send operations synchronize
+	 * with tx_qp creation in setup_qp(). Also protects the
+	 * outstanding_wrs array and indices.
+	 */
+	spinlock_t lock;
+	struct ib_qp **tx_qps;
+};
+
+static struct mlx5_ib_gsi_qp *gsi_qp(struct ib_qp *qp)
+{
+	return container_of(qp, struct mlx5_ib_gsi_qp, ibqp);
+}
+
+static bool mlx5_ib_deth_sqpn_cap(struct mlx5_ib_dev *dev)
+{
+	return MLX5_CAP_GEN(dev->mdev, set_deth_sqpn);
+}
+
+static u32 next_outstanding(struct mlx5_ib_gsi_qp *gsi, u32 index)
+{
+	return ++index % gsi->cap.max_send_wr;
+}
+
+#define for_each_outstanding_wr(gsi, index) \
+	for (index = gsi->outstanding_ci; index != gsi->outstanding_pi; \
+	     index = next_outstanding(gsi, index))
+
+/* Call with gsi->lock locked */
+static void generate_completions(struct mlx5_ib_gsi_qp *gsi)
+{
+	struct ib_cq *gsi_cq = gsi->ibqp.send_cq;
+	struct mlx5_ib_gsi_wr *wr;
+	u32 index;
+
+	for_each_outstanding_wr(gsi, index) {
+		wr = &gsi->outstanding_wrs[index];
+
+		if (!wr->completed)
+			break;
+
+		if (gsi->sq_sig_type == IB_SIGNAL_ALL_WR ||
+		    wr->send_flags & IB_SEND_SIGNALED)
+			WARN_ON_ONCE(mlx5_ib_generate_wc(gsi_cq, &wr->wc));
+
+		wr->completed = false;
+	}
+
+	gsi->outstanding_ci = index;
+}
+
+static void handle_single_completion(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct mlx5_ib_gsi_qp *gsi = cq->cq_context;
+	struct mlx5_ib_gsi_wr *wr =
+		container_of(wc->wr_cqe, struct mlx5_ib_gsi_wr, cqe);
+	u64 wr_id;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gsi->lock, flags);
+	wr->completed = true;
+	wr_id = wr->wc.wr_id;
+	wr->wc = *wc;
+	wr->wc.wr_id = wr_id;
+	wr->wc.qp = &gsi->ibqp;
+
+	generate_completions(gsi);
+	spin_unlock_irqrestore(&gsi->lock, flags);
+}
+
+struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
+				    struct ib_qp_init_attr *init_attr)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_ib_gsi_qp *gsi;
+	struct ib_qp_init_attr hw_init_attr = *init_attr;
+	const u8 port_num = init_attr->port_num;
+	const int num_pkeys = pd->device->attrs.max_pkeys;
+	const int num_qps = mlx5_ib_deth_sqpn_cap(dev) ? num_pkeys : 0;
+	int ret;
+
+	mlx5_ib_dbg(dev, "creating GSI QP\n");
+
+	if (port_num > ARRAY_SIZE(dev->devr.ports) || port_num < 1) {
+		mlx5_ib_warn(dev,
+			     "invalid port number %d during GSI QP creation\n",
+			     port_num);
+		return ERR_PTR(-EINVAL);
+	}
+
+	gsi = kzalloc(sizeof(*gsi), GFP_KERNEL);
+	if (!gsi)
+		return ERR_PTR(-ENOMEM);
+
+	gsi->tx_qps = kcalloc(num_qps, sizeof(*gsi->tx_qps), GFP_KERNEL);
+	if (!gsi->tx_qps) {
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	gsi->outstanding_wrs = kcalloc(init_attr->cap.max_send_wr,
+				       sizeof(*gsi->outstanding_wrs),
+				       GFP_KERNEL);
+	if (!gsi->outstanding_wrs) {
+		ret = -ENOMEM;
+		goto err_free_tx;
+	}
+
+	mutex_init(&gsi->mutex);
+
+	mutex_lock(&dev->devr.mutex);
+
+	if (dev->devr.ports[port_num - 1].gsi) {
+		mlx5_ib_warn(dev, "GSI QP already exists on port %d\n",
+			     port_num);
+		ret = -EBUSY;
+		goto err_free_wrs;
+	}
+	gsi->num_qps = num_qps;
+	spin_lock_init(&gsi->lock);
+
+	gsi->cap = init_attr->cap;
+	gsi->sq_sig_type = init_attr->sq_sig_type;
+	gsi->ibqp.qp_num = 1;
+	gsi->port_num = port_num;
+
+	gsi->cq = ib_alloc_cq(pd->device, gsi, init_attr->cap.max_send_wr, 0,
+			      IB_POLL_SOFTIRQ);
+	if (IS_ERR(gsi->cq)) {
+		mlx5_ib_warn(dev, "unable to create send CQ for GSI QP. error %ld\n",
+			     PTR_ERR(gsi->cq));
+		ret = PTR_ERR(gsi->cq);
+		goto err_free_wrs;
+	}
+
+	hw_init_attr.qp_type = MLX5_IB_QPT_HW_GSI;
+	hw_init_attr.send_cq = gsi->cq;
+	if (num_qps) {
+		hw_init_attr.cap.max_send_wr = 0;
+		hw_init_attr.cap.max_send_sge = 0;
+		hw_init_attr.cap.max_inline_data = 0;
+	}
+	gsi->rx_qp = ib_create_qp(pd, &hw_init_attr);
+	if (IS_ERR(gsi->rx_qp)) {
+		mlx5_ib_warn(dev, "unable to create hardware GSI QP. error %ld\n",
+			     PTR_ERR(gsi->rx_qp));
+		ret = PTR_ERR(gsi->rx_qp);
+		goto err_destroy_cq;
+	}
+
+	dev->devr.ports[init_attr->port_num - 1].gsi = gsi;
+
+	mutex_unlock(&dev->devr.mutex);
+
+	return &gsi->ibqp;
+
+err_destroy_cq:
+	ib_free_cq(gsi->cq);
+err_free_wrs:
+	mutex_unlock(&dev->devr.mutex);
+	kfree(gsi->outstanding_wrs);
+err_free_tx:
+	kfree(gsi->tx_qps);
+err_free:
+	kfree(gsi);
+	return ERR_PTR(ret);
+}
+
+int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->device);
+	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+	const int port_num = gsi->port_num;
+	int qp_index;
+	int ret;
+
+	mlx5_ib_dbg(dev, "destroying GSI QP\n");
+
+	mutex_lock(&dev->devr.mutex);
+	ret = ib_destroy_qp(gsi->rx_qp);
+	if (ret) {
+		mlx5_ib_warn(dev, "unable to destroy hardware GSI QP. error %d\n",
+			     ret);
+		mutex_unlock(&dev->devr.mutex);
+		return ret;
+	}
+	dev->devr.ports[port_num - 1].gsi = NULL;
+	mutex_unlock(&dev->devr.mutex);
+	gsi->rx_qp = NULL;
+
+	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index) {
+		if (!gsi->tx_qps[qp_index])
+			continue;
+		WARN_ON_ONCE(ib_destroy_qp(gsi->tx_qps[qp_index]));
+		gsi->tx_qps[qp_index] = NULL;
+	}
+
+	ib_free_cq(gsi->cq);
+
+	kfree(gsi->outstanding_wrs);
+	kfree(gsi->tx_qps);
+	kfree(gsi);
+
+	return 0;
+}
+
+static struct ib_qp *create_gsi_ud_qp(struct mlx5_ib_gsi_qp *gsi)
+{
+	struct ib_pd *pd = gsi->rx_qp->pd;
+	struct ib_qp_init_attr init_attr = {
+		.event_handler = gsi->rx_qp->event_handler,
+		.qp_context = gsi->rx_qp->qp_context,
+		.send_cq = gsi->cq,
+		.recv_cq = gsi->rx_qp->recv_cq,
+		.cap = {
+			.max_send_wr = gsi->cap.max_send_wr,
+			.max_send_sge = gsi->cap.max_send_sge,
+			.max_inline_data = gsi->cap.max_inline_data,
+		},
+		.sq_sig_type = gsi->sq_sig_type,
+		.qp_type = IB_QPT_UD,
+		.create_flags = mlx5_ib_create_qp_sqpn_qp1(),
+	};
+
+	return ib_create_qp(pd, &init_attr);
+}
+
+static int modify_to_rts(struct mlx5_ib_gsi_qp *gsi, struct ib_qp *qp,
+			 u16 qp_index)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->device);
+	struct ib_qp_attr attr;
+	int mask;
+	int ret;
+
+	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY | IB_QP_PORT;
+	attr.qp_state = IB_QPS_INIT;
+	attr.pkey_index = qp_index;
+	attr.qkey = IB_QP1_QKEY;
+	attr.port_num = gsi->port_num;
+	ret = ib_modify_qp(qp, &attr, mask);
+	if (ret) {
+		mlx5_ib_err(dev, "could not change QP%d state to INIT: %d\n",
+			    qp->qp_num, ret);
+		return ret;
+	}
+
+	attr.qp_state = IB_QPS_RTR;
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
+	if (ret) {
+		mlx5_ib_err(dev, "could not change QP%d state to RTR: %d\n",
+			    qp->qp_num, ret);
+		return ret;
+	}
+
+	attr.qp_state = IB_QPS_RTS;
+	attr.sq_psn = 0;
+	ret = ib_modify_qp(qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
+	if (ret) {
+		mlx5_ib_err(dev, "could not change QP%d state to RTS: %d\n",
+			    qp->qp_num, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void setup_qp(struct mlx5_ib_gsi_qp *gsi, u16 qp_index)
+{
+	struct ib_device *device = gsi->rx_qp->device;
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct ib_qp *qp;
+	unsigned long flags;
+	u16 pkey;
+	int ret;
+
+	ret = ib_query_pkey(device, gsi->port_num, qp_index, &pkey);
+	if (ret) {
+		mlx5_ib_warn(dev, "unable to read P_Key at port %d, index %d\n",
+			     gsi->port_num, qp_index);
+		return;
+	}
+
+	if (!pkey) {
+		mlx5_ib_dbg(dev, "invalid P_Key at port %d, index %d.  Skipping.\n",
+			    gsi->port_num, qp_index);
+		return;
+	}
+
+	spin_lock_irqsave(&gsi->lock, flags);
+	qp = gsi->tx_qps[qp_index];
+	spin_unlock_irqrestore(&gsi->lock, flags);
+	if (qp) {
+		mlx5_ib_dbg(dev, "already existing GSI TX QP at port %d, index %d. Skipping\n",
+			    gsi->port_num, qp_index);
+		return;
+	}
+
+	qp = create_gsi_ud_qp(gsi);
+	if (IS_ERR(qp)) {
+		mlx5_ib_warn(dev, "unable to create hardware UD QP for GSI: %ld\n",
+			     PTR_ERR(qp));
+		return;
+	}
+
+	ret = modify_to_rts(gsi, qp, qp_index);
+	if (ret)
+		goto err_destroy_qp;
+
+	spin_lock_irqsave(&gsi->lock, flags);
+	WARN_ON_ONCE(gsi->tx_qps[qp_index]);
+	gsi->tx_qps[qp_index] = qp;
+	spin_unlock_irqrestore(&gsi->lock, flags);
+
+	return;
+
+err_destroy_qp:
+	WARN_ON_ONCE(qp);
+}
+
+static void setup_qps(struct mlx5_ib_gsi_qp *gsi)
+{
+	u16 qp_index;
+
+	for (qp_index = 0; qp_index < gsi->num_qps; ++qp_index)
+		setup_qp(gsi, qp_index);
+}
+
+int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+			  int attr_mask)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->device);
+	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+	int ret;
+
+	mlx5_ib_dbg(dev, "modifying GSI QP to state %d\n", attr->qp_state);
+
+	mutex_lock(&gsi->mutex);
+	ret = ib_modify_qp(gsi->rx_qp, attr, attr_mask);
+	if (ret) {
+		mlx5_ib_warn(dev, "unable to modify GSI rx QP: %d\n", ret);
+		goto unlock;
+	}
+
+	if (to_mqp(gsi->rx_qp)->state == IB_QPS_RTS)
+		setup_qps(gsi);
+
+unlock:
+	mutex_unlock(&gsi->mutex);
+
+	return ret;
+}
+
+int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+			 int qp_attr_mask,
+			 struct ib_qp_init_attr *qp_init_attr)
+{
+	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+	int ret;
+
+	mutex_lock(&gsi->mutex);
+	ret = ib_query_qp(gsi->rx_qp, qp_attr, qp_attr_mask, qp_init_attr);
+	qp_init_attr->cap = gsi->cap;
+	mutex_unlock(&gsi->mutex);
+
+	return ret;
+}
+
+/* Call with gsi->lock locked */
+static int mlx5_ib_add_outstanding_wr(struct mlx5_ib_gsi_qp *gsi,
+				      struct ib_ud_wr *wr, struct ib_wc *wc)
+{
+	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
+	struct mlx5_ib_gsi_wr *gsi_wr;
+
+	if (gsi->outstanding_pi == gsi->outstanding_ci + gsi->cap.max_send_wr) {
+		mlx5_ib_warn(dev, "no available GSI work request.\n");
+		return -ENOMEM;
+	}
+
+	gsi_wr = &gsi->outstanding_wrs[gsi->outstanding_pi];
+	gsi->outstanding_pi = next_outstanding(gsi, gsi->outstanding_pi);
+
+	if (!wc) {
+		memset(&gsi_wr->wc, 0, sizeof(gsi_wr->wc));
+		gsi_wr->wc.pkey_index = wr->pkey_index;
+		gsi_wr->wc.wr_id = wr->wr.wr_id;
+	} else {
+		gsi_wr->wc = *wc;
+		gsi_wr->completed = true;
+	}
+
+	gsi_wr->cqe.done = &handle_single_completion;
+	wr->wr.wr_cqe = &gsi_wr->cqe;
+
+	return 0;
+}
+
+/* Call with gsi->lock locked */
+static int mlx5_ib_gsi_silent_drop(struct mlx5_ib_gsi_qp *gsi,
+				    struct ib_ud_wr *wr)
+{
+	struct ib_wc wc = {
+		{ .wr_id = wr->wr.wr_id },
+		.status = IB_WC_SUCCESS,
+		.opcode = IB_WC_SEND,
+		.qp = &gsi->ibqp,
+	};
+	int ret;
+
+	ret = mlx5_ib_add_outstanding_wr(gsi, wr, &wc);
+	if (ret)
+		return ret;
+
+	generate_completions(gsi);
+
+	return 0;
+}
+
+/* Call with gsi->lock locked */
+static struct ib_qp *get_tx_qp(struct mlx5_ib_gsi_qp *gsi, struct ib_ud_wr *wr)
+{
+	struct mlx5_ib_dev *dev = to_mdev(gsi->rx_qp->device);
+	int qp_index = wr->pkey_index;
+
+	if (!mlx5_ib_deth_sqpn_cap(dev))
+		return gsi->rx_qp;
+
+	if (qp_index >= gsi->num_qps)
+		return NULL;
+
+	return gsi->tx_qps[qp_index];
+}
+
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
+			  struct ib_send_wr **bad_wr)
+{
+	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+	struct ib_qp *tx_qp;
+	unsigned long flags;
+	int ret;
+
+	for (; wr; wr = wr->next) {
+		struct ib_ud_wr cur_wr = *ud_wr(wr);
+
+		cur_wr.wr.next = NULL;
+
+		spin_lock_irqsave(&gsi->lock, flags);
+		tx_qp = get_tx_qp(gsi, &cur_wr);
+		if (!tx_qp) {
+			ret = mlx5_ib_gsi_silent_drop(gsi, &cur_wr);
+			if (ret)
+				goto err;
+			spin_unlock_irqrestore(&gsi->lock, flags);
+			continue;
+		}
+
+		ret = mlx5_ib_add_outstanding_wr(gsi, &cur_wr, NULL);
+		if (ret)
+			goto err;
+
+		ret = ib_post_send(tx_qp, &cur_wr.wr, bad_wr);
+		if (ret) {
+			/* Undo the effect of adding the outstanding wr */
+			gsi->outstanding_pi = (gsi->outstanding_pi - 1) %
+					      gsi->cap.max_send_wr;
+			goto err;
+		}
+		spin_unlock_irqrestore(&gsi->lock, flags);
+	}
+
+	return 0;
+
+err:
+	spin_unlock_irqrestore(&gsi->lock, flags);
+	*bad_wr = wr;
+	return ret;
+}
+
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
+			  struct ib_recv_wr **bad_wr)
+{
+	struct mlx5_ib_gsi_qp *gsi = gsi_qp(qp);
+
+	return ib_post_recv(gsi->rx_qp, wr, bad_wr);
+}
+
+void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi)
+{
+	if (!gsi)
+		return;
+
+	mutex_lock(&gsi->mutex);
+	setup_qps(gsi);
+	mutex_unlock(&gsi->mutex);
+}

+ 194 - 0
drivers/infiniband/hw/mlx5/ib_virt.c

@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/mlx5/vport.h>
+#include "mlx5_ib.h"
+
+static inline u32 mlx_to_net_policy(enum port_state_policy mlx_policy)
+{
+	switch (mlx_policy) {
+	case MLX5_POLICY_DOWN:
+		return IFLA_VF_LINK_STATE_DISABLE;
+	case MLX5_POLICY_UP:
+		return IFLA_VF_LINK_STATE_ENABLE;
+	case MLX5_POLICY_FOLLOW:
+		return IFLA_VF_LINK_STATE_AUTO;
+	default:
+		return __IFLA_VF_LINK_STATE_MAX;
+	}
+}
+
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf, u8 port,
+			  struct ifla_vf_info *info)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *rep;
+	int err;
+
+	rep = kzalloc(sizeof(*rep), GFP_KERNEL);
+	if (!rep)
+		return -ENOMEM;
+
+	err = mlx5_query_hca_vport_context(mdev, 1, 1,  vf + 1, rep);
+	if (err) {
+		mlx5_ib_warn(dev, "failed to query port policy for vf %d (%d)\n",
+			     vf, err);
+		goto free;
+	}
+	memset(info, 0, sizeof(*info));
+	info->linkstate = mlx_to_net_policy(rep->policy);
+	if (info->linkstate == __IFLA_VF_LINK_STATE_MAX)
+		err = -EINVAL;
+
+free:
+	kfree(rep);
+	return err;
+}
+
+static inline enum port_state_policy net_to_mlx_policy(int policy)
+{
+	switch (policy) {
+	case IFLA_VF_LINK_STATE_DISABLE:
+		return MLX5_POLICY_DOWN;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		return MLX5_POLICY_UP;
+	case IFLA_VF_LINK_STATE_AUTO:
+		return MLX5_POLICY_FOLLOW;
+	default:
+		return MLX5_POLICY_INVALID;
+	}
+}
+
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+			      u8 port, int state)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->policy = net_to_mlx_policy(state);
+	if (in->policy == MLX5_POLICY_INVALID) {
+		err = -EINVAL;
+		goto out;
+	}
+	in->field_select = MLX5_HCA_VPORT_SEL_STATE_POLICY;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+
+out:
+	kfree(in);
+	return err;
+}
+
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+			 u8 port, struct ifla_vf_stats *stats)
+{
+	int out_sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+	struct mlx5_core_dev *mdev;
+	struct mlx5_ib_dev *dev;
+	void *out;
+	int err;
+
+	dev = to_mdev(device);
+	mdev = dev->mdev;
+
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+
+	err = mlx5_core_query_vport_counter(mdev, true, vf, port, out, out_sz);
+	if (err)
+		goto ex;
+
+	stats->rx_packets = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.packets);
+	stats->tx_packets = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.packets);
+	stats->rx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_unicast.octets);
+	stats->tx_bytes = MLX5_GET64_PR(query_vport_counter_out, out, transmitted_ib_unicast.octets);
+	stats->multicast = MLX5_GET64_PR(query_vport_counter_out, out, received_ib_multicast.packets);
+
+ex:
+	kfree(out);
+	return err;
+}
+
+static int set_vf_node_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->field_select = MLX5_HCA_VPORT_SEL_NODE_GUID;
+	in->node_guid = guid;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+	kfree(in);
+	return err;
+}
+
+static int set_vf_port_guid(struct ib_device *device, int vf, u8 port, u64 guid)
+{
+	struct mlx5_ib_dev *dev = to_mdev(device);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	struct mlx5_hca_vport_context *in;
+	int err;
+
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	in->field_select = MLX5_HCA_VPORT_SEL_PORT_GUID;
+	in->port_guid = guid;
+	err = mlx5_core_modify_hca_vport_context(mdev, 1, 1, vf + 1, in);
+	kfree(in);
+	return err;
+}
+
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+			u64 guid, int type)
+{
+	if (type == IFLA_VF_IB_NODE_GUID)
+		return set_vf_node_guid(device, vf, port, guid);
+	else if (type == IFLA_VF_IB_PORT_GUID)
+		return set_vf_port_guid(device, vf, port, guid);
+
+	return -EINVAL;
+}

+ 155 - 11
drivers/infiniband/hw/mlx5/mad.c

@@ -31,8 +31,10 @@
  */
  */
 
 
 #include <linux/mlx5/cmd.h>
 #include <linux/mlx5/cmd.h>
+#include <linux/mlx5/vport.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
 #include <rdma/ib_smi.h>
+#include <rdma/ib_pma.h>
 #include "mlx5_ib.h"
 #include "mlx5_ib.h"
 
 
 enum {
 enum {
@@ -57,20 +59,12 @@ int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
 	return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
 	return mlx5_core_mad_ifc(dev->mdev, in_mad, response_mad, op_modifier, port);
 }
 }
 
 
-int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
-			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
-			const struct ib_mad_hdr *in, size_t in_mad_size,
-			struct ib_mad_hdr *out, size_t *out_mad_size,
-			u16 *out_mad_pkey_index)
+static int process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		       const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		       const struct ib_mad *in_mad, struct ib_mad *out_mad)
 {
 {
 	u16 slid;
 	u16 slid;
 	int err;
 	int err;
-	const struct ib_mad *in_mad = (const struct ib_mad *)in;
-	struct ib_mad *out_mad = (struct ib_mad *)out;
-
-	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
-			 *out_mad_size != sizeof(*out_mad)))
-		return IB_MAD_RESULT_FAILURE;
 
 
 	slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
 	slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
 
 
@@ -117,6 +111,156 @@ int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
 }
 }
 
 
+static void pma_cnt_ext_assign(struct ib_pma_portcounters_ext *pma_cnt_ext,
+			       void *out)
+{
+#define MLX5_SUM_CNT(p, cntr1, cntr2)	\
+	(MLX5_GET64(query_vport_counter_out, p, cntr1) + \
+	MLX5_GET64(query_vport_counter_out, p, cntr2))
+
+	pma_cnt_ext->port_xmit_data =
+		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.octets,
+					 transmitted_ib_multicast.octets) >> 2);
+	pma_cnt_ext->port_xmit_data =
+		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.octets,
+					 received_ib_multicast.octets) >> 2);
+	pma_cnt_ext->port_xmit_packets =
+		cpu_to_be64(MLX5_SUM_CNT(out, transmitted_ib_unicast.packets,
+					 transmitted_ib_multicast.packets));
+	pma_cnt_ext->port_rcv_packets =
+		cpu_to_be64(MLX5_SUM_CNT(out, received_ib_unicast.packets,
+					 received_ib_multicast.packets));
+	pma_cnt_ext->port_unicast_xmit_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, transmitted_ib_unicast.packets);
+	pma_cnt_ext->port_unicast_rcv_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, received_ib_unicast.packets);
+	pma_cnt_ext->port_multicast_xmit_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, transmitted_ib_multicast.packets);
+	pma_cnt_ext->port_multicast_rcv_packets =
+		MLX5_GET64_BE(query_vport_counter_out,
+			      out, received_ib_multicast.packets);
+}
+
+static void pma_cnt_assign(struct ib_pma_portcounters *pma_cnt,
+			   void *out)
+{
+	/* Traffic counters will be reported in
+	 * their 64bit form via ib_pma_portcounters_ext by default.
+	 */
+	void *out_pma = MLX5_ADDR_OF(ppcnt_reg, out,
+				     counter_set);
+
+#define MLX5_ASSIGN_PMA_CNTR(counter_var, counter_name)	{		\
+	counter_var = MLX5_GET_BE(typeof(counter_var),			\
+				  ib_port_cntrs_grp_data_layout,	\
+				  out_pma, counter_name);		\
+	}
+
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->symbol_error_counter,
+			     symbol_error_counter);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_error_recovery_counter,
+			     link_error_recovery_counter);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_downed_counter,
+			     link_downed_counter);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_errors,
+			     port_rcv_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_remphys_errors,
+			     port_rcv_remote_physical_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_switch_relay_errors,
+			     port_rcv_switch_relay_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_discards,
+			     port_xmit_discards);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_xmit_constraint_errors,
+			     port_xmit_constraint_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->port_rcv_constraint_errors,
+			     port_rcv_constraint_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->link_overrun_errors,
+			     link_overrun_errors);
+	MLX5_ASSIGN_PMA_CNTR(pma_cnt->vl15_dropped,
+			     vl_15_dropped);
+}
+
+static int process_pma_cmd(struct ib_device *ibdev, u8 port_num,
+			   const struct ib_mad *in_mad, struct ib_mad *out_mad)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	int err;
+	void *out_cnt;
+
+	/* Decalring support of extended counters */
+	if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO) {
+		struct ib_class_port_info cpi = {};
+
+		cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
+		memcpy((out_mad->data + 40), &cpi, sizeof(cpi));
+		return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+	}
+
+	if (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT) {
+		struct ib_pma_portcounters_ext *pma_cnt_ext =
+			(struct ib_pma_portcounters_ext *)(out_mad->data + 40);
+		int sz = MLX5_ST_SZ_BYTES(query_vport_counter_out);
+
+		out_cnt = mlx5_vzalloc(sz);
+		if (!out_cnt)
+			return IB_MAD_RESULT_FAILURE;
+
+		err = mlx5_core_query_vport_counter(dev->mdev, 0, 0,
+						    port_num, out_cnt, sz);
+		if (!err)
+			pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
+	} else {
+		struct ib_pma_portcounters *pma_cnt =
+			(struct ib_pma_portcounters *)(out_mad->data + 40);
+		int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
+
+		out_cnt = mlx5_vzalloc(sz);
+		if (!out_cnt)
+			return IB_MAD_RESULT_FAILURE;
+
+		err = mlx5_core_query_ib_ppcnt(dev->mdev, port_num,
+					       out_cnt, sz);
+		if (!err)
+			pma_cnt_assign(pma_cnt, out_cnt);
+		}
+
+	kvfree(out_cnt);
+	if (err)
+		return IB_MAD_RESULT_FAILURE;
+
+	return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
+}
+
+int mlx5_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+			const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+			const struct ib_mad_hdr *in, size_t in_mad_size,
+			struct ib_mad_hdr *out, size_t *out_mad_size,
+			u16 *out_mad_pkey_index)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_core_dev *mdev = dev->mdev;
+	const struct ib_mad *in_mad = (const struct ib_mad *)in;
+	struct ib_mad *out_mad = (struct ib_mad *)out;
+
+	if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
+			 *out_mad_size != sizeof(*out_mad)))
+		return IB_MAD_RESULT_FAILURE;
+
+	memset(out_mad->data, 0, sizeof(out_mad->data));
+
+	if (MLX5_CAP_GEN(mdev, vport_counters) &&
+	    in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
+	    in_mad->mad_hdr.method == IB_MGMT_METHOD_GET) {
+		return process_pma_cmd(ibdev, port_num, in_mad, out_mad);
+	} else {
+		return process_mad(ibdev, mad_flags, port_num, in_wc, in_grh,
+				   in_mad, out_mad);
+	}
+}
+
 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
 int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
 {
 {
 	struct ib_smp *in_mad  = NULL;
 	struct ib_smp *in_mad  = NULL;

+ 116 - 15
drivers/infiniband/hw/mlx5/main.c

@@ -283,7 +283,7 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 
 
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
 {
-	return !dev->mdev->issi;
+	return !MLX5_CAP_GEN(dev->mdev, ib_virt);
 }
 }
 
 
 enum {
 enum {
@@ -487,6 +487,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 		props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
 	if (MLX5_CAP_GEN(mdev, xrc))
 	if (MLX5_CAP_GEN(mdev, xrc))
 		props->device_cap_flags |= IB_DEVICE_XRC;
 		props->device_cap_flags |= IB_DEVICE_XRC;
+	if (MLX5_CAP_GEN(mdev, imaicl)) {
+		props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
+					   IB_DEVICE_MEM_WINDOW_TYPE_2B;
+		props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
+		/* We support 'Gappy' memory registration too */
+		props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
+	}
 	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 	props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
 	if (MLX5_CAP_GEN(mdev, sho)) {
 	if (MLX5_CAP_GEN(mdev, sho)) {
 		props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
 		props->device_cap_flags |= IB_DEVICE_SIGNATURE_HANDOVER;
@@ -504,6 +511,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	    (MLX5_CAP_ETH(dev->mdev, csum_cap)))
 	    (MLX5_CAP_ETH(dev->mdev, csum_cap)))
 			props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
 			props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
 
 
+	if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
+		props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
+		props->device_cap_flags |= IB_DEVICE_UD_TSO;
+	}
+
 	props->vendor_part_id	   = mdev->pdev->device;
 	props->vendor_part_id	   = mdev->pdev->device;
 	props->hw_ver		   = mdev->pdev->revision;
 	props->hw_ver		   = mdev->pdev->revision;
 
 
@@ -529,7 +541,8 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
 	props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
 	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
 	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
 	props->max_srq_sge	   = max_rq_sg - 1;
 	props->max_srq_sge	   = max_rq_sg - 1;
-	props->max_fast_reg_page_list_len = (unsigned int)-1;
+	props->max_fast_reg_page_list_len =
+		1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
 	get_atomic_caps(dev, props);
 	get_atomic_caps(dev, props);
 	props->masked_atomic_cap   = IB_ATOMIC_NONE;
 	props->masked_atomic_cap   = IB_ATOMIC_NONE;
 	props->max_mcast_grp	   = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
 	props->max_mcast_grp	   = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
@@ -549,6 +562,9 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	if (MLX5_CAP_GEN(mdev, cd))
 	if (MLX5_CAP_GEN(mdev, cd))
 		props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
 		props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
 
 
+	if (!mlx5_core_is_pf(mdev))
+		props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -686,6 +702,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
 	props->qkey_viol_cntr	= rep->qkey_violation_counter;
 	props->qkey_viol_cntr	= rep->qkey_violation_counter;
 	props->subnet_timeout	= rep->subnet_timeout;
 	props->subnet_timeout	= rep->subnet_timeout;
 	props->init_type_reply	= rep->init_type_reply;
 	props->init_type_reply	= rep->init_type_reply;
+	props->grh_required	= rep->grh_required;
 
 
 	err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
 	err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
 	if (err)
 	if (err)
@@ -1369,11 +1386,20 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)
 	return 0;
 	return 0;
 }
 }
 
 
+static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap)
+{
+	priority *= 2;
+	if (!dont_trap)
+		priority++;
+	return priority;
+}
+
 #define MLX5_FS_MAX_TYPES	 10
 #define MLX5_FS_MAX_TYPES	 10
 #define MLX5_FS_MAX_ENTRIES	 32000UL
 #define MLX5_FS_MAX_ENTRIES	 32000UL
 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 						struct ib_flow_attr *flow_attr)
 						struct ib_flow_attr *flow_attr)
 {
 {
+	bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP;
 	struct mlx5_flow_namespace *ns = NULL;
 	struct mlx5_flow_namespace *ns = NULL;
 	struct mlx5_ib_flow_prio *prio;
 	struct mlx5_ib_flow_prio *prio;
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_table *ft;
@@ -1383,10 +1409,12 @@ static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev,
 	int err = 0;
 	int err = 0;
 
 
 	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
 	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
-		if (flow_is_multicast_only(flow_attr))
+		if (flow_is_multicast_only(flow_attr) &&
+		    !dont_trap)
 			priority = MLX5_IB_FLOW_MCAST_PRIO;
 			priority = MLX5_IB_FLOW_MCAST_PRIO;
 		else
 		else
-			priority = flow_attr->priority;
+			priority = ib_prio_to_core_prio(flow_attr->priority,
+							dont_trap);
 		ns = mlx5_get_flow_namespace(dev->mdev,
 		ns = mlx5_get_flow_namespace(dev->mdev,
 					     MLX5_FLOW_NAMESPACE_BYPASS);
 					     MLX5_FLOW_NAMESPACE_BYPASS);
 		num_entries = MLX5_FS_MAX_ENTRIES;
 		num_entries = MLX5_FS_MAX_ENTRIES;
@@ -1434,6 +1462,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	unsigned int spec_index;
 	unsigned int spec_index;
 	u32 *match_c;
 	u32 *match_c;
 	u32 *match_v;
 	u32 *match_v;
+	u32 action;
 	int err = 0;
 	int err = 0;
 
 
 	if (!is_valid_attr(flow_attr))
 	if (!is_valid_attr(flow_attr))
@@ -1459,9 +1488,11 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 
 
 	/* Outer header support only */
 	/* Outer header support only */
 	match_criteria_enable = (!outer_header_zero(match_c)) << 0;
 	match_criteria_enable = (!outer_header_zero(match_c)) << 0;
+	action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+		MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 	handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
 	handler->rule = mlx5_add_flow_rule(ft, match_criteria_enable,
 					   match_c, match_v,
 					   match_c, match_v,
-					   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+					   action,
 					   MLX5_FS_DEFAULT_FLOW_TAG,
 					   MLX5_FS_DEFAULT_FLOW_TAG,
 					   dst);
 					   dst);
 
 
@@ -1481,6 +1512,29 @@ free:
 	return err ? ERR_PTR(err) : handler;
 	return err ? ERR_PTR(err) : handler;
 }
 }
 
 
+static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
+							  struct mlx5_ib_flow_prio *ft_prio,
+							  struct ib_flow_attr *flow_attr,
+							  struct mlx5_flow_destination *dst)
+{
+	struct mlx5_ib_flow_handler *handler_dst = NULL;
+	struct mlx5_ib_flow_handler *handler = NULL;
+
+	handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
+	if (!IS_ERR(handler)) {
+		handler_dst = create_flow_rule(dev, ft_prio,
+					       flow_attr, dst);
+		if (IS_ERR(handler_dst)) {
+			mlx5_del_flow_rule(handler->rule);
+			kfree(handler);
+			handler = handler_dst;
+		} else {
+			list_add(&handler_dst->list, &handler->list);
+		}
+	}
+
+	return handler;
+}
 enum {
 enum {
 	LEFTOVERS_MC,
 	LEFTOVERS_MC,
 	LEFTOVERS_UC,
 	LEFTOVERS_UC,
@@ -1558,7 +1612,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 
 
 	if (domain != IB_FLOW_DOMAIN_USER ||
 	if (domain != IB_FLOW_DOMAIN_USER ||
 	    flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
 	    flow_attr->port > MLX5_CAP_GEN(dev->mdev, num_ports) ||
-	    flow_attr->flags)
+	    (flow_attr->flags & ~IB_FLOW_ATTR_FLAGS_DONT_TRAP))
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 
 
 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 	dst = kzalloc(sizeof(*dst), GFP_KERNEL);
@@ -1577,8 +1631,13 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
 	dst->tir_num = to_mqp(qp)->raw_packet_qp.rq.tirn;
 
 
 	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
 	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
-		handler = create_flow_rule(dev, ft_prio, flow_attr,
-					   dst);
+		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  {
+			handler = create_dont_trap_rule(dev, ft_prio,
+							flow_attr, dst);
+		} else {
+			handler = create_flow_rule(dev, ft_prio, flow_attr,
+						   dst);
+		}
 	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
 		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
 		handler = create_leftovers_rule(dev, ft_prio, flow_attr,
 		handler = create_leftovers_rule(dev, ft_prio, flow_attr,
@@ -1716,6 +1775,17 @@ static struct device_attribute *mlx5_class_attributes[] = {
 	&dev_attr_reg_pages,
 	&dev_attr_reg_pages,
 };
 };
 
 
+static void pkey_change_handler(struct work_struct *work)
+{
+	struct mlx5_ib_port_resources *ports =
+		container_of(work, struct mlx5_ib_port_resources,
+			     pkey_change_work);
+
+	mutex_lock(&ports->devr->mutex);
+	mlx5_ib_gsi_pkey_change(ports->gsi);
+	mutex_unlock(&ports->devr->mutex);
+}
+
 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 			  enum mlx5_dev_event event, unsigned long param)
 			  enum mlx5_dev_event event, unsigned long param)
 {
 {
@@ -1752,6 +1822,8 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 	case MLX5_DEV_EVENT_PKEY_CHANGE:
 	case MLX5_DEV_EVENT_PKEY_CHANGE:
 		ibev.event = IB_EVENT_PKEY_CHANGE;
 		ibev.event = IB_EVENT_PKEY_CHANGE;
 		port = (u8)param;
 		port = (u8)param;
+
+		schedule_work(&ibdev->devr.ports[port - 1].pkey_change_work);
 		break;
 		break;
 
 
 	case MLX5_DEV_EVENT_GUID_CHANGE:
 	case MLX5_DEV_EVENT_GUID_CHANGE:
@@ -1838,7 +1910,7 @@ static void destroy_umrc_res(struct mlx5_ib_dev *dev)
 		mlx5_ib_warn(dev, "mr cache cleanup failed\n");
 		mlx5_ib_warn(dev, "mr cache cleanup failed\n");
 
 
 	mlx5_ib_destroy_qp(dev->umrc.qp);
 	mlx5_ib_destroy_qp(dev->umrc.qp);
-	ib_destroy_cq(dev->umrc.cq);
+	ib_free_cq(dev->umrc.cq);
 	ib_dealloc_pd(dev->umrc.pd);
 	ib_dealloc_pd(dev->umrc.pd);
 }
 }
 
 
@@ -1853,7 +1925,6 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
 	struct ib_pd *pd;
 	struct ib_pd *pd;
 	struct ib_cq *cq;
 	struct ib_cq *cq;
 	struct ib_qp *qp;
 	struct ib_qp *qp;
-	struct ib_cq_init_attr cq_attr = {};
 	int ret;
 	int ret;
 
 
 	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
 	attr = kzalloc(sizeof(*attr), GFP_KERNEL);
@@ -1870,15 +1941,12 @@ static int create_umr_res(struct mlx5_ib_dev *dev)
 		goto error_0;
 		goto error_0;
 	}
 	}
 
 
-	cq_attr.cqe = 128;
-	cq = ib_create_cq(&dev->ib_dev, mlx5_umr_cq_handler, NULL, NULL,
-			  &cq_attr);
+	cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ);
 	if (IS_ERR(cq)) {
 	if (IS_ERR(cq)) {
 		mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
 		mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n");
 		ret = PTR_ERR(cq);
 		ret = PTR_ERR(cq);
 		goto error_2;
 		goto error_2;
 	}
 	}
-	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
 
 
 	init_attr->send_cq = cq;
 	init_attr->send_cq = cq;
 	init_attr->recv_cq = cq;
 	init_attr->recv_cq = cq;
@@ -1945,7 +2013,7 @@ error_4:
 	mlx5_ib_destroy_qp(qp);
 	mlx5_ib_destroy_qp(qp);
 
 
 error_3:
 error_3:
-	ib_destroy_cq(cq);
+	ib_free_cq(cq);
 
 
 error_2:
 error_2:
 	ib_dealloc_pd(pd);
 	ib_dealloc_pd(pd);
@@ -1961,10 +2029,13 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
 	struct ib_srq_init_attr attr;
 	struct ib_srq_init_attr attr;
 	struct mlx5_ib_dev *dev;
 	struct mlx5_ib_dev *dev;
 	struct ib_cq_init_attr cq_attr = {.cqe = 1};
 	struct ib_cq_init_attr cq_attr = {.cqe = 1};
+	int port;
 	int ret = 0;
 	int ret = 0;
 
 
 	dev = container_of(devr, struct mlx5_ib_dev, devr);
 	dev = container_of(devr, struct mlx5_ib_dev, devr);
 
 
+	mutex_init(&devr->mutex);
+
 	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
 	devr->p0 = mlx5_ib_alloc_pd(&dev->ib_dev, NULL, NULL);
 	if (IS_ERR(devr->p0)) {
 	if (IS_ERR(devr->p0)) {
 		ret = PTR_ERR(devr->p0);
 		ret = PTR_ERR(devr->p0);
@@ -2052,6 +2123,12 @@ static int create_dev_resources(struct mlx5_ib_resources *devr)
 	atomic_inc(&devr->p0->usecnt);
 	atomic_inc(&devr->p0->usecnt);
 	atomic_set(&devr->s0->usecnt, 0);
 	atomic_set(&devr->s0->usecnt, 0);
 
 
+	for (port = 0; port < ARRAY_SIZE(devr->ports); ++port) {
+		INIT_WORK(&devr->ports[port].pkey_change_work,
+			  pkey_change_handler);
+		devr->ports[port].devr = devr;
+	}
+
 	return 0;
 	return 0;
 
 
 error5:
 error5:
@@ -2070,12 +2147,20 @@ error0:
 
 
 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
 static void destroy_dev_resources(struct mlx5_ib_resources *devr)
 {
 {
+	struct mlx5_ib_dev *dev =
+		container_of(devr, struct mlx5_ib_dev, devr);
+	int port;
+
 	mlx5_ib_destroy_srq(devr->s1);
 	mlx5_ib_destroy_srq(devr->s1);
 	mlx5_ib_destroy_srq(devr->s0);
 	mlx5_ib_destroy_srq(devr->s0);
 	mlx5_ib_dealloc_xrcd(devr->x0);
 	mlx5_ib_dealloc_xrcd(devr->x0);
 	mlx5_ib_dealloc_xrcd(devr->x1);
 	mlx5_ib_dealloc_xrcd(devr->x1);
 	mlx5_ib_destroy_cq(devr->c0);
 	mlx5_ib_destroy_cq(devr->c0);
 	mlx5_ib_dealloc_pd(devr->p0);
 	mlx5_ib_dealloc_pd(devr->p0);
+
+	/* Make sure no change P_Key work items are still executing */
+	for (port = 0; port < dev->num_ports; ++port)
+		cancel_work_sync(&devr->ports[port].pkey_change_work);
 }
 }
 
 
 static u32 get_core_cap_flags(struct ib_device *ibdev)
 static u32 get_core_cap_flags(struct ib_device *ibdev)
@@ -2198,6 +2283,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
 		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
 		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
 		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
 		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+		(1ull << IB_USER_VERBS_CMD_REREG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
 		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)	|
 		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
@@ -2258,6 +2344,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.req_notify_cq	= mlx5_ib_arm_cq;
 	dev->ib_dev.req_notify_cq	= mlx5_ib_arm_cq;
 	dev->ib_dev.get_dma_mr		= mlx5_ib_get_dma_mr;
 	dev->ib_dev.get_dma_mr		= mlx5_ib_get_dma_mr;
 	dev->ib_dev.reg_user_mr		= mlx5_ib_reg_user_mr;
 	dev->ib_dev.reg_user_mr		= mlx5_ib_reg_user_mr;
+	dev->ib_dev.rereg_user_mr	= mlx5_ib_rereg_user_mr;
 	dev->ib_dev.dereg_mr		= mlx5_ib_dereg_mr;
 	dev->ib_dev.dereg_mr		= mlx5_ib_dereg_mr;
 	dev->ib_dev.attach_mcast	= mlx5_ib_mcg_attach;
 	dev->ib_dev.attach_mcast	= mlx5_ib_mcg_attach;
 	dev->ib_dev.detach_mcast	= mlx5_ib_mcg_detach;
 	dev->ib_dev.detach_mcast	= mlx5_ib_mcg_detach;
@@ -2266,9 +2353,23 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.map_mr_sg		= mlx5_ib_map_mr_sg;
 	dev->ib_dev.map_mr_sg		= mlx5_ib_map_mr_sg;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
 	dev->ib_dev.get_port_immutable  = mlx5_port_immutable;
+	if (mlx5_core_is_pf(mdev)) {
+		dev->ib_dev.get_vf_config	= mlx5_ib_get_vf_config;
+		dev->ib_dev.set_vf_link_state	= mlx5_ib_set_vf_link_state;
+		dev->ib_dev.get_vf_stats	= mlx5_ib_get_vf_stats;
+		dev->ib_dev.set_vf_guid		= mlx5_ib_set_vf_guid;
+	}
 
 
 	mlx5_ib_internal_fill_odp_caps(dev);
 	mlx5_ib_internal_fill_odp_caps(dev);
 
 
+	if (MLX5_CAP_GEN(mdev, imaicl)) {
+		dev->ib_dev.alloc_mw		= mlx5_ib_alloc_mw;
+		dev->ib_dev.dealloc_mw		= mlx5_ib_dealloc_mw;
+		dev->ib_dev.uverbs_cmd_mask |=
+			(1ull << IB_USER_VERBS_CMD_ALLOC_MW)	|
+			(1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
+	}
+
 	if (MLX5_CAP_GEN(mdev, xrc)) {
 	if (MLX5_CAP_GEN(mdev, xrc)) {
 		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
 		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
 		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
 		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;

+ 99 - 17
drivers/infiniband/hw/mlx5/mlx5_ib.h

@@ -43,6 +43,7 @@
 #include <linux/mlx5/srq.h>
 #include <linux/mlx5/srq.h>
 #include <linux/types.h>
 #include <linux/types.h>
 #include <linux/mlx5/transobj.h>
 #include <linux/mlx5/transobj.h>
+#include <rdma/ib_user_verbs.h>
 
 
 #define mlx5_ib_dbg(dev, format, arg...)				\
 #define mlx5_ib_dbg(dev, format, arg...)				\
 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
 pr_debug("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__,	\
@@ -126,7 +127,7 @@ struct mlx5_ib_pd {
 };
 };
 
 
 #define MLX5_IB_FLOW_MCAST_PRIO		(MLX5_BY_PASS_NUM_PRIOS - 1)
 #define MLX5_IB_FLOW_MCAST_PRIO		(MLX5_BY_PASS_NUM_PRIOS - 1)
-#define MLX5_IB_FLOW_LAST_PRIO		(MLX5_IB_FLOW_MCAST_PRIO - 1)
+#define MLX5_IB_FLOW_LAST_PRIO		(MLX5_BY_PASS_NUM_REGULAR_PRIOS - 1)
 #if (MLX5_IB_FLOW_LAST_PRIO <= 0)
 #if (MLX5_IB_FLOW_LAST_PRIO <= 0)
 #error "Invalid number of bypass priorities"
 #error "Invalid number of bypass priorities"
 #endif
 #endif
@@ -162,9 +163,31 @@ struct mlx5_ib_flow_db {
 #define MLX5_IB_SEND_UMR_UNREG	IB_SEND_RESERVED_START
 #define MLX5_IB_SEND_UMR_UNREG	IB_SEND_RESERVED_START
 #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
 #define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
 #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
 #define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
+
+#define MLX5_IB_SEND_UMR_UPDATE_TRANSLATION	(IB_SEND_RESERVED_START << 3)
+#define MLX5_IB_SEND_UMR_UPDATE_PD		(IB_SEND_RESERVED_START << 4)
+#define MLX5_IB_SEND_UMR_UPDATE_ACCESS		IB_SEND_RESERVED_END
+
 #define MLX5_IB_QPT_REG_UMR	IB_QPT_RESERVED1
 #define MLX5_IB_QPT_REG_UMR	IB_QPT_RESERVED1
+/*
+ * IB_QPT_GSI creates the software wrapper around GSI, and MLX5_IB_QPT_HW_GSI
+ * creates the actual hardware QP.
+ */
+#define MLX5_IB_QPT_HW_GSI	IB_QPT_RESERVED2
 #define MLX5_IB_WR_UMR		IB_WR_RESERVED1
 #define MLX5_IB_WR_UMR		IB_WR_RESERVED1
 
 
+/* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
+ *
+ * These flags are intended for internal use by the mlx5_ib driver, and they
+ * rely on the range reserved for that use in the ib_qp_create_flags enum.
+ */
+
+/* Create a UD QP whose source QP number is 1 */
+static inline enum ib_qp_create_flags mlx5_ib_create_qp_sqpn_qp1(void)
+{
+	return IB_QP_CREATE_RESERVED_START;
+}
+
 struct wr_list {
 struct wr_list {
 	u16	opcode;
 	u16	opcode;
 	u16	next;
 	u16	next;
@@ -325,11 +348,14 @@ struct mlx5_ib_cq_buf {
 };
 };
 
 
 enum mlx5_ib_qp_flags {
 enum mlx5_ib_qp_flags {
-	MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = 1 << 0,
-	MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
-	MLX5_IB_QP_CROSS_CHANNEL		= 1 << 2,
-	MLX5_IB_QP_MANAGED_SEND			= 1 << 3,
-	MLX5_IB_QP_MANAGED_RECV			= 1 << 4,
+	MLX5_IB_QP_LSO                          = IB_QP_CREATE_IPOIB_UD_LSO,
+	MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK     = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK,
+	MLX5_IB_QP_CROSS_CHANNEL            = IB_QP_CREATE_CROSS_CHANNEL,
+	MLX5_IB_QP_MANAGED_SEND             = IB_QP_CREATE_MANAGED_SEND,
+	MLX5_IB_QP_MANAGED_RECV             = IB_QP_CREATE_MANAGED_RECV,
+	MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 5,
+	/* QP uses 1 as its source QP number */
+	MLX5_IB_QP_SQPN_QP1			= 1 << 6,
 };
 };
 
 
 struct mlx5_umr_wr {
 struct mlx5_umr_wr {
@@ -373,6 +399,14 @@ struct mlx5_ib_cq {
 	struct ib_umem	       *resize_umem;
 	struct ib_umem	       *resize_umem;
 	int			cqe_size;
 	int			cqe_size;
 	u32			create_flags;
 	u32			create_flags;
+	struct list_head	wc_list;
+	enum ib_cq_notify_flags notify_flags;
+	struct work_struct	notify_work;
+};
+
+struct mlx5_ib_wc {
+	struct ib_wc wc;
+	struct list_head list;
 };
 };
 
 
 struct mlx5_ib_srq {
 struct mlx5_ib_srq {
@@ -413,7 +447,8 @@ struct mlx5_ib_mr {
 	int			ndescs;
 	int			ndescs;
 	int			max_descs;
 	int			max_descs;
 	int			desc_size;
 	int			desc_size;
-	struct mlx5_core_mr	mmr;
+	int			access_mode;
+	struct mlx5_core_mkey	mmkey;
 	struct ib_umem	       *umem;
 	struct ib_umem	       *umem;
 	struct mlx5_shared_mr_info	*smr_info;
 	struct mlx5_shared_mr_info	*smr_info;
 	struct list_head	list;
 	struct list_head	list;
@@ -425,19 +460,20 @@ struct mlx5_ib_mr {
 	struct mlx5_core_sig_ctx    *sig;
 	struct mlx5_core_sig_ctx    *sig;
 	int			live;
 	int			live;
 	void			*descs_alloc;
 	void			*descs_alloc;
+	int			access_flags; /* Needed for rereg MR */
+};
+
+struct mlx5_ib_mw {
+	struct ib_mw		ibmw;
+	struct mlx5_core_mkey	mmkey;
 };
 };
 
 
 struct mlx5_ib_umr_context {
 struct mlx5_ib_umr_context {
+	struct ib_cqe		cqe;
 	enum ib_wc_status	status;
 	enum ib_wc_status	status;
 	struct completion	done;
 	struct completion	done;
 };
 };
 
 
-static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
-{
-	context->status = -1;
-	init_completion(&context->done);
-}
-
 struct umr_common {
 struct umr_common {
 	struct ib_pd	*pd;
 	struct ib_pd	*pd;
 	struct ib_cq	*cq;
 	struct ib_cq	*cq;
@@ -487,6 +523,14 @@ struct mlx5_mr_cache {
 	unsigned long		last_add;
 	unsigned long		last_add;
 };
 };
 
 
+struct mlx5_ib_gsi_qp;
+
+struct mlx5_ib_port_resources {
+	struct mlx5_ib_resources *devr;
+	struct mlx5_ib_gsi_qp *gsi;
+	struct work_struct pkey_change_work;
+};
+
 struct mlx5_ib_resources {
 struct mlx5_ib_resources {
 	struct ib_cq	*c0;
 	struct ib_cq	*c0;
 	struct ib_xrcd	*x0;
 	struct ib_xrcd	*x0;
@@ -494,6 +538,9 @@ struct mlx5_ib_resources {
 	struct ib_pd	*p0;
 	struct ib_pd	*p0;
 	struct ib_srq	*s0;
 	struct ib_srq	*s0;
 	struct ib_srq	*s1;
 	struct ib_srq	*s1;
+	struct mlx5_ib_port_resources ports[2];
+	/* Protects changes to the port resources */
+	struct mutex	mutex;
 };
 };
 
 
 struct mlx5_roce {
 struct mlx5_roce {
@@ -558,9 +605,9 @@ static inline struct mlx5_ib_qp *to_mibqp(struct mlx5_core_qp *mqp)
 	return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
 	return container_of(mqp, struct mlx5_ib_qp_base, mqp)->container_mibqp;
 }
 }
 
 
-static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mr *mmr)
+static inline struct mlx5_ib_mr *to_mibmr(struct mlx5_core_mkey *mmkey)
 {
 {
-	return container_of(mmr, struct mlx5_ib_mr, mmr);
+	return container_of(mmkey, struct mlx5_ib_mr, mmkey);
 }
 }
 
 
 static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
 static inline struct mlx5_ib_pd *to_mpd(struct ib_pd *ibpd)
@@ -588,6 +635,11 @@ static inline struct mlx5_ib_mr *to_mmr(struct ib_mr *ibmr)
 	return container_of(ibmr, struct mlx5_ib_mr, ibmr);
 	return container_of(ibmr, struct mlx5_ib_mr, ibmr);
 }
 }
 
 
+static inline struct mlx5_ib_mw *to_mmw(struct ib_mw *ibmw)
+{
+	return container_of(ibmw, struct mlx5_ib_mw, ibmw);
+}
+
 struct mlx5_ib_ah {
 struct mlx5_ib_ah {
 	struct ib_ah		ibah;
 	struct ib_ah		ibah;
 	struct mlx5_av		av;
 	struct mlx5_av		av;
@@ -648,8 +700,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata);
 				  struct ib_udata *udata);
+struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+			       struct ib_udata *udata);
+int mlx5_ib_dealloc_mw(struct ib_mw *mw);
 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
 		       int npages, int zap);
 		       int npages, int zap);
+int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+			  u64 length, u64 virt_addr, int access_flags,
+			  struct ib_pd *pd, struct ib_udata *udata);
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 			       enum ib_mr_type mr_type,
 			       enum ib_mr_type mr_type,
@@ -700,7 +758,6 @@ int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
 int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
 int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
-void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context);
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status);
 			    struct ib_mr_status *mr_status);
 
 
@@ -719,6 +776,14 @@ void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
 void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
 void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
 			      unsigned long end);
 			      unsigned long end);
+int mlx5_ib_get_vf_config(struct ib_device *device, int vf,
+			  u8 port, struct ifla_vf_info *info);
+int mlx5_ib_set_vf_link_state(struct ib_device *device, int vf,
+			      u8 port, int state);
+int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
+			 u8 port, struct ifla_vf_stats *stats);
+int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
+			u64 guid, int type);
 
 
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -739,6 +804,23 @@ static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)  {}
 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
 			       int index);
 			       int index);
 
 
+/* GSI QP helper functions */
+struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
+				    struct ib_qp_init_attr *init_attr);
+int mlx5_ib_gsi_destroy_qp(struct ib_qp *qp);
+int mlx5_ib_gsi_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
+			  int attr_mask);
+int mlx5_ib_gsi_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
+			 int qp_attr_mask,
+			 struct ib_qp_init_attr *qp_init_attr);
+int mlx5_ib_gsi_post_send(struct ib_qp *qp, struct ib_send_wr *wr,
+			  struct ib_send_wr **bad_wr);
+int mlx5_ib_gsi_post_recv(struct ib_qp *qp, struct ib_recv_wr *wr,
+			  struct ib_recv_wr **bad_wr);
+void mlx5_ib_gsi_pkey_change(struct mlx5_ib_gsi_qp *gsi);
+
+int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc);
+
 static inline void init_query_mad(struct ib_smp *mad)
 static inline void init_query_mad(struct ib_smp *mad)
 {
 {
 	mad->base_version  = 1;
 	mad->base_version  = 1;
@@ -758,7 +840,7 @@ static inline u8 convert_access(int acc)
 
 
 static inline int is_qp1(enum ib_qp_type qp_type)
 static inline int is_qp1(enum ib_qp_type qp_type)
 {
 {
-	return qp_type == IB_QPT_GSI;
+	return qp_type == MLX5_IB_QPT_HW_GSI;
 }
 }
 
 
 #define MLX5_MAX_UMR_SHIFT 16
 #define MLX5_MAX_UMR_SHIFT 16

+ 471 - 130
drivers/infiniband/hw/mlx5/mr.c

@@ -40,6 +40,7 @@
 #include <rdma/ib_umem_odp.h>
 #include <rdma/ib_umem_odp.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_verbs.h>
 #include "mlx5_ib.h"
 #include "mlx5_ib.h"
+#include "user.h"
 
 
 enum {
 enum {
 	MAX_PENDING_REG_MR = 8,
 	MAX_PENDING_REG_MR = 8,
@@ -57,7 +58,7 @@ static int clean_mr(struct mlx5_ib_mr *mr);
 
 
 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 {
-	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
 
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	/* Wait until all page fault handlers using the mr complete. */
 	/* Wait until all page fault handlers using the mr complete. */
@@ -77,6 +78,40 @@ static int order2idx(struct mlx5_ib_dev *dev, int order)
 		return order - cache->ent[0].order;
 		return order - cache->ent[0].order;
 }
 }
 
 
+static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
+{
+	return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
+		length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
+}
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static void update_odp_mr(struct mlx5_ib_mr *mr)
+{
+	if (mr->umem->odp_data) {
+		/*
+		 * This barrier prevents the compiler from moving the
+		 * setting of umem->odp_data->private to point to our
+		 * MR, before reg_umr finished, to ensure that the MR
+		 * initialization have finished before starting to
+		 * handle invalidations.
+		 */
+		smp_wmb();
+		mr->umem->odp_data->private = mr;
+		/*
+		 * Make sure we will see the new
+		 * umem->odp_data->private value in the invalidation
+		 * routines, before we can get page faults on the
+		 * MR. Page faults can happen once we put the MR in
+		 * the tree, below this line. Without the barrier,
+		 * there can be a fault handling and an invalidation
+		 * before umem->odp_data->private == mr is visible to
+		 * the invalidation handler.
+		 */
+		smp_wmb();
+	}
+}
+#endif
+
 static void reg_mr_callback(int status, void *context)
 static void reg_mr_callback(int status, void *context)
 {
 {
 	struct mlx5_ib_mr *mr = context;
 	struct mlx5_ib_mr *mr = context;
@@ -86,7 +121,7 @@ static void reg_mr_callback(int status, void *context)
 	struct mlx5_cache_ent *ent = &cache->ent[c];
 	struct mlx5_cache_ent *ent = &cache->ent[c];
 	u8 key;
 	u8 key;
 	unsigned long flags;
 	unsigned long flags;
-	struct mlx5_mr_table *table = &dev->mdev->priv.mr_table;
+	struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
 	int err;
 	int err;
 
 
 	spin_lock_irqsave(&ent->lock, flags);
 	spin_lock_irqsave(&ent->lock, flags);
@@ -113,7 +148,7 @@ static void reg_mr_callback(int status, void *context)
 	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
 	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
 	key = dev->mdev->priv.mkey_key++;
 	key = dev->mdev->priv.mkey_key++;
 	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
 	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
-	mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
+	mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key;
 
 
 	cache->last_add = jiffies;
 	cache->last_add = jiffies;
 
 
@@ -124,10 +159,10 @@ static void reg_mr_callback(int status, void *context)
 	spin_unlock_irqrestore(&ent->lock, flags);
 	spin_unlock_irqrestore(&ent->lock, flags);
 
 
 	write_lock_irqsave(&table->lock, flags);
 	write_lock_irqsave(&table->lock, flags);
-	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key),
-				&mr->mmr);
+	err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
+				&mr->mmkey);
 	if (err)
 	if (err)
-		pr_err("Error inserting to mr tree. 0x%x\n", -err);
+		pr_err("Error inserting to mkey tree. 0x%x\n", -err);
 	write_unlock_irqrestore(&table->lock, flags);
 	write_unlock_irqrestore(&table->lock, flags);
 }
 }
 
 
@@ -168,7 +203,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		spin_lock_irq(&ent->lock);
 		spin_lock_irq(&ent->lock);
 		ent->pending++;
 		ent->pending++;
 		spin_unlock_irq(&ent->lock);
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in,
+		err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in,
 					    sizeof(*in), reg_mr_callback,
 					    sizeof(*in), reg_mr_callback,
 					    mr, &mr->out);
 					    mr, &mr->out);
 		if (err) {
 		if (err) {
@@ -657,14 +692,14 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	seg->start_addr = 0;
 	seg->start_addr = 0;
 
 
-	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL,
+	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL,
 				    NULL);
 				    NULL);
 	if (err)
 	if (err)
 		goto err_in;
 		goto err_in;
 
 
 	kfree(in);
 	kfree(in);
-	mr->ibmr.lkey = mr->mmr.key;
-	mr->ibmr.rkey = mr->mmr.key;
+	mr->ibmr.lkey = mr->mmkey.key;
+	mr->ibmr.rkey = mr->mmkey.key;
 	mr->umem = NULL;
 	mr->umem = NULL;
 
 
 	return &mr->ibmr;
 	return &mr->ibmr;
@@ -693,10 +728,40 @@ static int use_umr(int order)
 	return order <= MLX5_MAX_UMR_SHIFT;
 	return order <= MLX5_MAX_UMR_SHIFT;
 }
 }
 
 
-static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
-			     struct ib_sge *sg, u64 dma, int n, u32 key,
-			     int page_shift, u64 virt_addr, u64 len,
-			     int access_flags)
+static int dma_map_mr_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			  int npages, int page_shift, int *size,
+			  __be64 **mr_pas, dma_addr_t *dma)
+{
+	__be64 *pas;
+	struct device *ddev = dev->ib_dev.dma_device;
+
+	/*
+	 * UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
+	 * To avoid copying garbage after the pas array, we allocate
+	 * a little more.
+	 */
+	*size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
+	*mr_pas = kmalloc(*size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+	if (!(*mr_pas))
+		return -ENOMEM;
+
+	pas = PTR_ALIGN(*mr_pas, MLX5_UMR_ALIGN);
+	mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
+	/* Clear padding after the actual pages. */
+	memset(pas + npages, 0, *size - npages * sizeof(u64));
+
+	*dma = dma_map_single(ddev, pas, *size, DMA_TO_DEVICE);
+	if (dma_mapping_error(ddev, *dma)) {
+		kfree(*mr_pas);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void prep_umr_wqe_common(struct ib_pd *pd, struct ib_send_wr *wr,
+				struct ib_sge *sg, u64 dma, int n, u32 key,
+				int page_shift)
 {
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_umr_wr *umrwr = umr_wr(wr);
 	struct mlx5_umr_wr *umrwr = umr_wr(wr);
@@ -706,7 +771,6 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 	sg->lkey = dev->umrc.pd->local_dma_lkey;
 	sg->lkey = dev->umrc.pd->local_dma_lkey;
 
 
 	wr->next = NULL;
 	wr->next = NULL;
-	wr->send_flags = 0;
 	wr->sg_list = sg;
 	wr->sg_list = sg;
 	if (n)
 	if (n)
 		wr->num_sge = 1;
 		wr->num_sge = 1;
@@ -718,6 +782,19 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 	umrwr->npages = n;
 	umrwr->npages = n;
 	umrwr->page_shift = page_shift;
 	umrwr->page_shift = page_shift;
 	umrwr->mkey = key;
 	umrwr->mkey = key;
+}
+
+static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
+			     struct ib_sge *sg, u64 dma, int n, u32 key,
+			     int page_shift, u64 virt_addr, u64 len,
+			     int access_flags)
+{
+	struct mlx5_umr_wr *umrwr = umr_wr(wr);
+
+	prep_umr_wqe_common(pd, wr, sg, dma, n, key, page_shift);
+
+	wr->send_flags = 0;
+
 	umrwr->target.virt_addr = virt_addr;
 	umrwr->target.virt_addr = virt_addr;
 	umrwr->length = len;
 	umrwr->length = len;
 	umrwr->access_flags = access_flags;
 	umrwr->access_flags = access_flags;
@@ -734,26 +811,45 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 	umrwr->mkey = key;
 	umrwr->mkey = key;
 }
 }
 
 
-void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
+static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
+				   int access_flags, int *npages,
+				   int *page_shift, int *ncont, int *order)
 {
 {
-	struct mlx5_ib_umr_context *context;
-	struct ib_wc wc;
-	int err;
-
-	while (1) {
-		err = ib_poll_cq(cq, 1, &wc);
-		if (err < 0) {
-			pr_warn("poll cq error %d\n", err);
-			return;
-		}
-		if (err == 0)
-			break;
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length,
+					   access_flags, 0);
+	if (IS_ERR(umem)) {
+		mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
+		return (void *)umem;
+	}
 
 
-		context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id;
-		context->status = wc.status;
-		complete(&context->done);
+	mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order);
+	if (!*npages) {
+		mlx5_ib_warn(dev, "avoid zero region\n");
+		ib_umem_release(umem);
+		return ERR_PTR(-EINVAL);
 	}
 	}
-	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
+
+	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
+		    *npages, *ncont, *order, *page_shift);
+
+	return umem;
+}
+
+static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+	struct mlx5_ib_umr_context *context =
+		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
+
+	context->status = wc->status;
+	complete(&context->done);
+}
+
+static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
+{
+	context->cqe.done = mlx5_ib_umr_done;
+	context->status = -1;
+	init_completion(&context->done);
 }
 }
 
 
 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
@@ -764,13 +860,12 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	struct device *ddev = dev->ib_dev.dma_device;
 	struct device *ddev = dev->ib_dev.dma_device;
 	struct umr_common *umrc = &dev->umrc;
 	struct umr_common *umrc = &dev->umrc;
 	struct mlx5_ib_umr_context umr_context;
 	struct mlx5_ib_umr_context umr_context;
-	struct mlx5_umr_wr umrwr;
+	struct mlx5_umr_wr umrwr = {};
 	struct ib_send_wr *bad;
 	struct ib_send_wr *bad;
 	struct mlx5_ib_mr *mr;
 	struct mlx5_ib_mr *mr;
 	struct ib_sge sg;
 	struct ib_sge sg;
 	int size;
 	int size;
 	__be64 *mr_pas;
 	__be64 *mr_pas;
-	__be64 *pas;
 	dma_addr_t dma;
 	dma_addr_t dma;
 	int err = 0;
 	int err = 0;
 	int i;
 	int i;
@@ -790,33 +885,17 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	if (!mr)
 	if (!mr)
 		return ERR_PTR(-EAGAIN);
 		return ERR_PTR(-EAGAIN);
 
 
-	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
-	 * To avoid copying garbage after the pas array, we allocate
-	 * a little more. */
-	size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
-	mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
-	if (!mr_pas) {
-		err = -ENOMEM;
+	err = dma_map_mr_pas(dev, umem, npages, page_shift, &size, &mr_pas,
+			     &dma);
+	if (err)
 		goto free_mr;
 		goto free_mr;
-	}
 
 
-	pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
-	mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
-	/* Clear padding after the actual pages. */
-	memset(pas + npages, 0, size - npages * sizeof(u64));
-
-	dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
-	if (dma_mapping_error(ddev, dma)) {
-		err = -ENOMEM;
-		goto free_pas;
-	}
+	mlx5_ib_init_umr_context(&umr_context);
 
 
-	memset(&umrwr, 0, sizeof(umrwr));
-	umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
-	prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key,
+	umrwr.wr.wr_cqe = &umr_context.cqe;
+	prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
 			 page_shift, virt_addr, len, access_flags);
 			 page_shift, virt_addr, len, access_flags);
 
 
-	mlx5_ib_init_umr_context(&umr_context);
 	down(&umrc->sem);
 	down(&umrc->sem);
 	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
 	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
 	if (err) {
 	if (err) {
@@ -830,9 +909,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 		}
 		}
 	}
 	}
 
 
-	mr->mmr.iova = virt_addr;
-	mr->mmr.size = len;
-	mr->mmr.pd = to_mpd(pd)->pdn;
+	mr->mmkey.iova = virt_addr;
+	mr->mmkey.size = len;
+	mr->mmkey.pd = to_mpd(pd)->pdn;
 
 
 	mr->live = 1;
 	mr->live = 1;
 
 
@@ -840,7 +919,6 @@ unmap_dma:
 	up(&umrc->sem);
 	up(&umrc->sem);
 	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 
 
-free_pas:
 	kfree(mr_pas);
 	kfree(mr_pas);
 
 
 free_mr:
 free_mr:
@@ -929,8 +1007,10 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 
 
 		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
 		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
 
 
+		mlx5_ib_init_umr_context(&umr_context);
+
 		memset(&wr, 0, sizeof(wr));
 		memset(&wr, 0, sizeof(wr));
-		wr.wr.wr_id = (u64)(unsigned long)&umr_context;
+		wr.wr.wr_cqe = &umr_context.cqe;
 
 
 		sg.addr = dma;
 		sg.addr = dma;
 		sg.length = ALIGN(npages * sizeof(u64),
 		sg.length = ALIGN(npages * sizeof(u64),
@@ -944,10 +1024,9 @@ int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
 		wr.wr.opcode = MLX5_IB_WR_UMR;
 		wr.wr.opcode = MLX5_IB_WR_UMR;
 		wr.npages = sg.length / sizeof(u64);
 		wr.npages = sg.length / sizeof(u64);
 		wr.page_shift = PAGE_SHIFT;
 		wr.page_shift = PAGE_SHIFT;
-		wr.mkey = mr->mmr.key;
+		wr.mkey = mr->mmkey.key;
 		wr.target.offset = start_page_index;
 		wr.target.offset = start_page_index;
 
 
-		mlx5_ib_init_umr_context(&umr_context);
 		down(&umrc->sem);
 		down(&umrc->sem);
 		err = ib_post_send(umrc->qp, &wr.wr, &bad);
 		err = ib_post_send(umrc->qp, &wr.wr, &bad);
 		if (err) {
 		if (err) {
@@ -974,10 +1053,14 @@ free_pas:
 }
 }
 #endif
 #endif
 
 
-static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
-				     u64 length, struct ib_umem *umem,
-				     int npages, int page_shift,
-				     int access_flags)
+/*
+ * If ibmr is NULL it will be allocated by reg_create.
+ * Else, the given ibmr will be used.
+ */
+static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
+				     u64 virt_addr, u64 length,
+				     struct ib_umem *umem, int npages,
+				     int page_shift, int access_flags)
 {
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_create_mkey_mbox_in *in;
 	struct mlx5_create_mkey_mbox_in *in;
@@ -986,7 +1069,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 	int err;
 	int err;
 	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
 	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
 
 
-	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 	if (!mr)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 
 
@@ -1013,7 +1096,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
 	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length,
 							 1 << page_shift));
 							 1 << page_shift));
-	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL,
+	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL,
 				    NULL, NULL);
 				    NULL, NULL);
 	if (err) {
 	if (err) {
 		mlx5_ib_warn(dev, "create mkey failed\n");
 		mlx5_ib_warn(dev, "create mkey failed\n");
@@ -1024,7 +1107,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 	mr->live = 1;
 	mr->live = 1;
 	kvfree(in);
 	kvfree(in);
 
 
-	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
+	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
 
 
 	return mr;
 	return mr;
 
 
@@ -1032,11 +1115,23 @@ err_2:
 	kvfree(in);
 	kvfree(in);
 
 
 err_1:
 err_1:
-	kfree(mr);
+	if (!ibmr)
+		kfree(mr);
 
 
 	return ERR_PTR(err);
 	return ERR_PTR(err);
 }
 }
 
 
+static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
+			  int npages, u64 length, int access_flags)
+{
+	mr->npages = npages;
+	atomic_add(npages, &dev->mdev->priv.reg_pages);
+	mr->ibmr.lkey = mr->mmkey.key;
+	mr->ibmr.rkey = mr->mmkey.key;
+	mr->ibmr.length = length;
+	mr->access_flags = access_flags;
+}
+
 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata)
 				  struct ib_udata *udata)
@@ -1052,22 +1147,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 
 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
 		    start, virt_addr, length, access_flags);
 		    start, virt_addr, length, access_flags);
-	umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
-			   0);
-	if (IS_ERR(umem)) {
-		mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem));
-		return (void *)umem;
-	}
+	umem = mr_umem_get(pd, start, length, access_flags, &npages,
+			   &page_shift, &ncont, &order);
 
 
-	mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
-	if (!npages) {
-		mlx5_ib_warn(dev, "avoid zero region\n");
-		err = -EINVAL;
-		goto error;
-	}
-
-	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
-		    npages, ncont, order, page_shift);
+	if (IS_ERR(umem))
+		return (void *)umem;
 
 
 	if (use_umr(order)) {
 	if (use_umr(order)) {
 		mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
 		mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
@@ -1083,45 +1167,21 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 	}
 	}
 
 
 	if (!mr)
 	if (!mr)
-		mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
-				access_flags);
+		mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
+				page_shift, access_flags);
 
 
 	if (IS_ERR(mr)) {
 	if (IS_ERR(mr)) {
 		err = PTR_ERR(mr);
 		err = PTR_ERR(mr);
 		goto error;
 		goto error;
 	}
 	}
 
 
-	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
+	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
 
 
 	mr->umem = umem;
 	mr->umem = umem;
-	mr->npages = npages;
-	atomic_add(npages, &dev->mdev->priv.reg_pages);
-	mr->ibmr.lkey = mr->mmr.key;
-	mr->ibmr.rkey = mr->mmr.key;
+	set_mr_fileds(dev, mr, npages, length, access_flags);
 
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
-	if (umem->odp_data) {
-		/*
-		 * This barrier prevents the compiler from moving the
-		 * setting of umem->odp_data->private to point to our
-		 * MR, before reg_umr finished, to ensure that the MR
-		 * initialization have finished before starting to
-		 * handle invalidations.
-		 */
-		smp_wmb();
-		mr->umem->odp_data->private = mr;
-		/*
-		 * Make sure we will see the new
-		 * umem->odp_data->private value in the invalidation
-		 * routines, before we can get page faults on the
-		 * MR. Page faults can happen once we put the MR in
-		 * the tree, below this line. Without the barrier,
-		 * there can be a fault handling and an invalidation
-		 * before umem->odp_data->private == mr is visible to
-		 * the invalidation handler.
-		 */
-		smp_wmb();
-	}
+	update_odp_mr(mr);
 #endif
 #endif
 
 
 	return &mr->ibmr;
 	return &mr->ibmr;
@@ -1135,15 +1195,15 @@ static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
 {
 	struct umr_common *umrc = &dev->umrc;
 	struct umr_common *umrc = &dev->umrc;
 	struct mlx5_ib_umr_context umr_context;
 	struct mlx5_ib_umr_context umr_context;
-	struct mlx5_umr_wr umrwr;
+	struct mlx5_umr_wr umrwr = {};
 	struct ib_send_wr *bad;
 	struct ib_send_wr *bad;
 	int err;
 	int err;
 
 
-	memset(&umrwr.wr, 0, sizeof(umrwr));
-	umrwr.wr.wr_id = (u64)(unsigned long)&umr_context;
-	prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key);
-
 	mlx5_ib_init_umr_context(&umr_context);
 	mlx5_ib_init_umr_context(&umr_context);
+
+	umrwr.wr.wr_cqe = &umr_context.cqe;
+	prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmkey.key);
+
 	down(&umrc->sem);
 	down(&umrc->sem);
 	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
 	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
 	if (err) {
 	if (err) {
@@ -1165,6 +1225,167 @@ error:
 	return err;
 	return err;
 }
 }
 
 
+static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, u64 virt_addr,
+		     u64 length, int npages, int page_shift, int order,
+		     int access_flags, int flags)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct device *ddev = dev->ib_dev.dma_device;
+	struct mlx5_ib_umr_context umr_context;
+	struct ib_send_wr *bad;
+	struct mlx5_umr_wr umrwr = {};
+	struct ib_sge sg;
+	struct umr_common *umrc = &dev->umrc;
+	dma_addr_t dma = 0;
+	__be64 *mr_pas = NULL;
+	int size;
+	int err;
+
+	mlx5_ib_init_umr_context(&umr_context);
+
+	umrwr.wr.wr_cqe = &umr_context.cqe;
+	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
+
+	if (flags & IB_MR_REREG_TRANS) {
+		err = dma_map_mr_pas(dev, mr->umem, npages, page_shift, &size,
+				     &mr_pas, &dma);
+		if (err)
+			return err;
+
+		umrwr.target.virt_addr = virt_addr;
+		umrwr.length = length;
+		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
+	}
+
+	prep_umr_wqe_common(pd, &umrwr.wr, &sg, dma, npages, mr->mmkey.key,
+			    page_shift);
+
+	if (flags & IB_MR_REREG_PD) {
+		umrwr.pd = pd;
+		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD;
+	}
+
+	if (flags & IB_MR_REREG_ACCESS) {
+		umrwr.access_flags = access_flags;
+		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_ACCESS;
+	}
+
+	/* post send request to UMR QP */
+	down(&umrc->sem);
+	err = ib_post_send(umrc->qp, &umrwr.wr, &bad);
+
+	if (err) {
+		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
+	} else {
+		wait_for_completion(&umr_context.done);
+		if (umr_context.status != IB_WC_SUCCESS) {
+			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
+				     umr_context.status);
+			err = -EFAULT;
+		}
+	}
+
+	up(&umrc->sem);
+	if (flags & IB_MR_REREG_TRANS) {
+		dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+		kfree(mr_pas);
+	}
+	return err;
+}
+
+int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
+			  u64 length, u64 virt_addr, int new_access_flags,
+			  struct ib_pd *new_pd, struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
+	struct mlx5_ib_mr *mr = to_mmr(ib_mr);
+	struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
+	int access_flags = flags & IB_MR_REREG_ACCESS ?
+			    new_access_flags :
+			    mr->access_flags;
+	u64 addr = (flags & IB_MR_REREG_TRANS) ? virt_addr : mr->umem->address;
+	u64 len = (flags & IB_MR_REREG_TRANS) ? length : mr->umem->length;
+	int page_shift = 0;
+	int npages = 0;
+	int ncont = 0;
+	int order = 0;
+	int err;
+
+	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
+		    start, virt_addr, length, access_flags);
+
+	if (flags != IB_MR_REREG_PD) {
+		/*
+		 * Replace umem. This needs to be done whether or not UMR is
+		 * used.
+		 */
+		flags |= IB_MR_REREG_TRANS;
+		ib_umem_release(mr->umem);
+		mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages,
+				       &page_shift, &ncont, &order);
+		if (IS_ERR(mr->umem)) {
+			err = PTR_ERR(mr->umem);
+			mr->umem = NULL;
+			return err;
+		}
+	}
+
+	if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
+		/*
+		 * UMR can't be used - MKey needs to be replaced.
+		 */
+		if (mr->umred) {
+			err = unreg_umr(dev, mr);
+			if (err)
+				mlx5_ib_warn(dev, "Failed to unregister MR\n");
+		} else {
+			err = destroy_mkey(dev, mr);
+			if (err)
+				mlx5_ib_warn(dev, "Failed to destroy MKey\n");
+		}
+		if (err)
+			return err;
+
+		mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
+				page_shift, access_flags);
+
+		if (IS_ERR(mr))
+			return PTR_ERR(mr);
+
+		mr->umred = 0;
+	} else {
+		/*
+		 * Send a UMR WQE
+		 */
+		err = rereg_umr(pd, mr, addr, len, npages, page_shift,
+				order, access_flags, flags);
+		if (err) {
+			mlx5_ib_warn(dev, "Failed to rereg UMR\n");
+			return err;
+		}
+	}
+
+	if (flags & IB_MR_REREG_PD) {
+		ib_mr->pd = pd;
+		mr->mmkey.pd = to_mpd(pd)->pdn;
+	}
+
+	if (flags & IB_MR_REREG_ACCESS)
+		mr->access_flags = access_flags;
+
+	if (flags & IB_MR_REREG_TRANS) {
+		atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
+		set_mr_fileds(dev, mr, npages, len, access_flags);
+		mr->mmkey.iova = addr;
+		mr->mmkey.size = len;
+	}
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	update_odp_mr(mr);
+#endif
+
+	return 0;
+}
+
 static int
 static int
 mlx5_alloc_priv_descs(struct ib_device *device,
 mlx5_alloc_priv_descs(struct ib_device *device,
 		      struct mlx5_ib_mr *mr,
 		      struct mlx5_ib_mr *mr,
@@ -1236,7 +1457,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
 		err = destroy_mkey(dev, mr);
 		err = destroy_mkey(dev, mr);
 		if (err) {
 		if (err) {
 			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
-				     mr->mmr.key, err);
+				     mr->mmkey.key, err);
 			return err;
 			return err;
 		}
 		}
 	} else {
 	} else {
@@ -1300,8 +1521,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_create_mkey_mbox_in *in;
 	struct mlx5_create_mkey_mbox_in *in;
 	struct mlx5_ib_mr *mr;
 	struct mlx5_ib_mr *mr;
-	int access_mode, err;
-	int ndescs = roundup(max_num_sg, 4);
+	int ndescs = ALIGN(max_num_sg, 4);
+	int err;
 
 
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
 	if (!mr)
@@ -1319,7 +1540,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
 
 
 	if (mr_type == IB_MR_TYPE_MEM_REG) {
 	if (mr_type == IB_MR_TYPE_MEM_REG) {
-		access_mode = MLX5_ACCESS_MODE_MTT;
+		mr->access_mode = MLX5_ACCESS_MODE_MTT;
 		in->seg.log2_page_size = PAGE_SHIFT;
 		in->seg.log2_page_size = PAGE_SHIFT;
 
 
 		err = mlx5_alloc_priv_descs(pd->device, mr,
 		err = mlx5_alloc_priv_descs(pd->device, mr,
@@ -1329,6 +1550,15 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 
 
 		mr->desc_size = sizeof(u64);
 		mr->desc_size = sizeof(u64);
 		mr->max_descs = ndescs;
 		mr->max_descs = ndescs;
+	} else if (mr_type == IB_MR_TYPE_SG_GAPS) {
+		mr->access_mode = MLX5_ACCESS_MODE_KLM;
+
+		err = mlx5_alloc_priv_descs(pd->device, mr,
+					    ndescs, sizeof(struct mlx5_klm));
+		if (err)
+			goto err_free_in;
+		mr->desc_size = sizeof(struct mlx5_klm);
+		mr->max_descs = ndescs;
 	} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
 	} else if (mr_type == IB_MR_TYPE_SIGNATURE) {
 		u32 psv_index[2];
 		u32 psv_index[2];
 
 
@@ -1347,7 +1577,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 		if (err)
 		if (err)
 			goto err_free_sig;
 			goto err_free_sig;
 
 
-		access_mode = MLX5_ACCESS_MODE_KLM;
+		mr->access_mode = MLX5_ACCESS_MODE_KLM;
 		mr->sig->psv_memory.psv_idx = psv_index[0];
 		mr->sig->psv_memory.psv_idx = psv_index[0];
 		mr->sig->psv_wire.psv_idx = psv_index[1];
 		mr->sig->psv_wire.psv_idx = psv_index[1];
 
 
@@ -1361,14 +1591,14 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
 		goto err_free_in;
 		goto err_free_in;
 	}
 	}
 
 
-	in->seg.flags = MLX5_PERM_UMR_EN | access_mode;
-	err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in),
+	in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode;
+	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in),
 				    NULL, NULL, NULL);
 				    NULL, NULL, NULL);
 	if (err)
 	if (err)
 		goto err_destroy_psv;
 		goto err_destroy_psv;
 
 
-	mr->ibmr.lkey = mr->mmr.key;
-	mr->ibmr.rkey = mr->mmr.key;
+	mr->ibmr.lkey = mr->mmkey.key;
+	mr->ibmr.rkey = mr->mmkey.key;
 	mr->umem = NULL;
 	mr->umem = NULL;
 	kfree(in);
 	kfree(in);
 
 
@@ -1395,6 +1625,88 @@ err_free:
 	return ERR_PTR(err);
 	return ERR_PTR(err);
 }
 }
 
 
+struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
+			       struct ib_udata *udata)
+{
+	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_create_mkey_mbox_in *in = NULL;
+	struct mlx5_ib_mw *mw = NULL;
+	int ndescs;
+	int err;
+	struct mlx5_ib_alloc_mw req = {};
+	struct {
+		__u32	comp_mask;
+		__u32	response_length;
+	} resp = {};
+
+	err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
+	if (err)
+		return ERR_PTR(err);
+
+	if (req.comp_mask || req.reserved1 || req.reserved2)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (udata->inlen > sizeof(req) &&
+	    !ib_is_udata_cleared(udata, sizeof(req),
+				 udata->inlen - sizeof(req)))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
+
+	mw = kzalloc(sizeof(*mw), GFP_KERNEL);
+	in = kzalloc(sizeof(*in), GFP_KERNEL);
+	if (!mw || !in) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	in->seg.status = MLX5_MKEY_STATUS_FREE;
+	in->seg.xlt_oct_size = cpu_to_be32(ndescs);
+	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
+	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM |
+		MLX5_PERM_LOCAL_READ;
+	if (type == IB_MW_TYPE_2)
+		in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
+	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
+
+	err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in),
+				    NULL, NULL, NULL);
+	if (err)
+		goto free;
+
+	mw->ibmw.rkey = mw->mmkey.key;
+
+	resp.response_length = min(offsetof(typeof(resp), response_length) +
+				   sizeof(resp.response_length), udata->outlen);
+	if (resp.response_length) {
+		err = ib_copy_to_udata(udata, &resp, resp.response_length);
+		if (err) {
+			mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
+			goto free;
+		}
+	}
+
+	kfree(in);
+	return &mw->ibmw;
+
+free:
+	kfree(mw);
+	kfree(in);
+	return ERR_PTR(err);
+}
+
+int mlx5_ib_dealloc_mw(struct ib_mw *mw)
+{
+	struct mlx5_ib_mw *mmw = to_mmw(mw);
+	int err;
+
+	err =  mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
+				      &mmw->mmkey);
+	if (!err)
+		kfree(mmw);
+	return err;
+}
+
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status)
 			    struct ib_mr_status *mr_status)
 {
 {
@@ -1436,6 +1748,32 @@ done:
 	return ret;
 	return ret;
 }
 }
 
 
+static int
+mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
+		   struct scatterlist *sgl,
+		   unsigned short sg_nents)
+{
+	struct scatterlist *sg = sgl;
+	struct mlx5_klm *klms = mr->descs;
+	u32 lkey = mr->ibmr.pd->local_dma_lkey;
+	int i;
+
+	mr->ibmr.iova = sg_dma_address(sg);
+	mr->ibmr.length = 0;
+	mr->ndescs = sg_nents;
+
+	for_each_sg(sgl, sg, sg_nents, i) {
+		if (unlikely(i > mr->max_descs))
+			break;
+		klms[i].va = cpu_to_be64(sg_dma_address(sg));
+		klms[i].bcount = cpu_to_be32(sg_dma_len(sg));
+		klms[i].key = cpu_to_be32(lkey);
+		mr->ibmr.length += sg_dma_len(sg);
+	}
+
+	return i;
+}
+
 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
 {
 {
 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
@@ -1463,7 +1801,10 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr,
 				   mr->desc_size * mr->max_descs,
 				   mr->desc_size * mr->max_descs,
 				   DMA_TO_DEVICE);
 				   DMA_TO_DEVICE);
 
 
-	n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
+	if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+		n = mlx5_ib_sg_to_klms(mr, sg, sg_nents);
+	else
+		n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page);
 
 
 	ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
 	ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
 				      mr->desc_size * mr->max_descs,
 				      mr->desc_size * mr->max_descs,

+ 5 - 5
drivers/infiniband/hw/mlx5/odp.c

@@ -142,13 +142,13 @@ static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
 						   u32 key)
 						   u32 key)
 {
 {
 	u32 base_key = mlx5_base_mkey(key);
 	u32 base_key = mlx5_base_mkey(key);
-	struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key);
-	struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr);
+	struct mlx5_core_mkey *mmkey = __mlx5_mr_lookup(dev->mdev, base_key);
+	struct mlx5_ib_mr *mr = container_of(mmkey, struct mlx5_ib_mr, mmkey);
 
 
-	if (!mmr || mmr->key != key || !mr->live)
+	if (!mmkey || mmkey->key != key || !mr->live)
 		return NULL;
 		return NULL;
 
 
-	return container_of(mmr, struct mlx5_ib_mr, mmr);
+	return container_of(mmkey, struct mlx5_ib_mr, mmkey);
 }
 }
 
 
 static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
 static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
@@ -232,7 +232,7 @@ static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
 	io_virt += pfault->mpfault.bytes_committed;
 	io_virt += pfault->mpfault.bytes_committed;
 	bcnt -= pfault->mpfault.bytes_committed;
 	bcnt -= pfault->mpfault.bytes_committed;
 
 
-	start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT;
+	start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
 
 
 	if (mr->umem->writable)
 	if (mr->umem->writable)
 		access_mask |= ODP_WRITE_ALLOWED_BIT;
 		access_mask |= ODP_WRITE_ALLOWED_BIT;

+ 242 - 29
drivers/infiniband/hw/mlx5/qp.c

@@ -58,6 +58,7 @@ enum {
 
 
 static const u32 mlx5_ib_opcode[] = {
 static const u32 mlx5_ib_opcode[] = {
 	[IB_WR_SEND]				= MLX5_OPCODE_SEND,
 	[IB_WR_SEND]				= MLX5_OPCODE_SEND,
+	[IB_WR_LSO]				= MLX5_OPCODE_LSO,
 	[IB_WR_SEND_WITH_IMM]			= MLX5_OPCODE_SEND_IMM,
 	[IB_WR_SEND_WITH_IMM]			= MLX5_OPCODE_SEND_IMM,
 	[IB_WR_RDMA_WRITE]			= MLX5_OPCODE_RDMA_WRITE,
 	[IB_WR_RDMA_WRITE]			= MLX5_OPCODE_RDMA_WRITE,
 	[IB_WR_RDMA_WRITE_WITH_IMM]		= MLX5_OPCODE_RDMA_WRITE_IMM,
 	[IB_WR_RDMA_WRITE_WITH_IMM]		= MLX5_OPCODE_RDMA_WRITE_IMM,
@@ -72,6 +73,9 @@ static const u32 mlx5_ib_opcode[] = {
 	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
 	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
 };
 };
 
 
+struct mlx5_wqe_eth_pad {
+	u8 rsvd0[16];
+};
 
 
 static int is_qp0(enum ib_qp_type qp_type)
 static int is_qp0(enum ib_qp_type qp_type)
 {
 {
@@ -260,11 +264,11 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 	return 0;
 	return 0;
 }
 }
 
 
-static int sq_overhead(enum ib_qp_type qp_type)
+static int sq_overhead(struct ib_qp_init_attr *attr)
 {
 {
 	int size = 0;
 	int size = 0;
 
 
-	switch (qp_type) {
+	switch (attr->qp_type) {
 	case IB_QPT_XRC_INI:
 	case IB_QPT_XRC_INI:
 		size += sizeof(struct mlx5_wqe_xrc_seg);
 		size += sizeof(struct mlx5_wqe_xrc_seg);
 		/* fall through */
 		/* fall through */
@@ -287,8 +291,12 @@ static int sq_overhead(enum ib_qp_type qp_type)
 		break;
 		break;
 
 
 	case IB_QPT_UD:
 	case IB_QPT_UD:
+		if (attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)
+			size += sizeof(struct mlx5_wqe_eth_pad) +
+				sizeof(struct mlx5_wqe_eth_seg);
+		/* fall through */
 	case IB_QPT_SMI:
 	case IB_QPT_SMI:
-	case IB_QPT_GSI:
+	case MLX5_IB_QPT_HW_GSI:
 		size += sizeof(struct mlx5_wqe_ctrl_seg) +
 		size += sizeof(struct mlx5_wqe_ctrl_seg) +
 			sizeof(struct mlx5_wqe_datagram_seg);
 			sizeof(struct mlx5_wqe_datagram_seg);
 		break;
 		break;
@@ -311,7 +319,7 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
 	int inl_size = 0;
 	int inl_size = 0;
 	int size;
 	int size;
 
 
-	size = sq_overhead(attr->qp_type);
+	size = sq_overhead(attr);
 	if (size < 0)
 	if (size < 0)
 		return size;
 		return size;
 
 
@@ -348,8 +356,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 		return -EINVAL;
 		return -EINVAL;
 	}
 	}
 
 
-	qp->max_inline_data = wqe_size - sq_overhead(attr->qp_type) -
-		sizeof(struct mlx5_wqe_inline_seg);
+	qp->max_inline_data = wqe_size - sq_overhead(attr) -
+			      sizeof(struct mlx5_wqe_inline_seg);
 	attr->cap.max_inline_data = qp->max_inline_data;
 	attr->cap.max_inline_data = qp->max_inline_data;
 
 
 	if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
 	if (attr->create_flags & IB_QP_CREATE_SIGNATURE_EN)
@@ -590,7 +598,7 @@ static int to_mlx5_st(enum ib_qp_type type)
 	case IB_QPT_XRC_INI:
 	case IB_QPT_XRC_INI:
 	case IB_QPT_XRC_TGT:		return MLX5_QP_ST_XRC;
 	case IB_QPT_XRC_TGT:		return MLX5_QP_ST_XRC;
 	case IB_QPT_SMI:		return MLX5_QP_ST_QP0;
 	case IB_QPT_SMI:		return MLX5_QP_ST_QP0;
-	case IB_QPT_GSI:		return MLX5_QP_ST_QP1;
+	case MLX5_IB_QPT_HW_GSI:	return MLX5_QP_ST_QP1;
 	case IB_QPT_RAW_IPV6:		return MLX5_QP_ST_RAW_IPV6;
 	case IB_QPT_RAW_IPV6:		return MLX5_QP_ST_RAW_IPV6;
 	case IB_QPT_RAW_PACKET:
 	case IB_QPT_RAW_PACKET:
 	case IB_QPT_RAW_ETHERTYPE:	return MLX5_QP_ST_RAW_ETHERTYPE;
 	case IB_QPT_RAW_ETHERTYPE:	return MLX5_QP_ST_RAW_ETHERTYPE;
@@ -783,7 +791,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	int err;
 	int err;
 
 
 	uuari = &dev->mdev->priv.uuari;
 	uuari = &dev->mdev->priv.uuari;
-	if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN | IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK))
+	if (init_attr->create_flags & ~(IB_QP_CREATE_SIGNATURE_EN |
+					IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK |
+					IB_QP_CREATE_IPOIB_UD_LSO |
+					mlx5_ib_create_qp_sqpn_qp1()))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
 	if (init_attr->qp_type == MLX5_IB_QPT_REG_UMR)
@@ -828,6 +839,11 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
 	(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
 	(*in)->ctx.params1 |= cpu_to_be32(1 << 11);
 	(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
 	(*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4);
 
 
+	if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) {
+		(*in)->ctx.deth_sqpn = cpu_to_be32(1);
+		qp->flags |= MLX5_IB_QP_SQPN_QP1;
+	}
+
 	mlx5_fill_page_array(&qp->buf, (*in)->pas);
 	mlx5_fill_page_array(&qp->buf, (*in)->pas);
 
 
 	err = mlx5_db_alloc(dev->mdev, &qp->db);
 	err = mlx5_db_alloc(dev->mdev, &qp->db);
@@ -1228,6 +1244,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
 		if (init_attr->create_flags & IB_QP_CREATE_MANAGED_RECV)
 			qp->flags |= MLX5_IB_QP_MANAGED_RECV;
 			qp->flags |= MLX5_IB_QP_MANAGED_RECV;
 	}
 	}
+
+	if (init_attr->qp_type == IB_QPT_UD &&
+	    (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO))
+		if (!MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
+			mlx5_ib_dbg(dev, "ipoib UD lso qp isn't supported\n");
+			return -EOPNOTSUPP;
+		}
+
 	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
 	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
 		qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 		qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 
 
@@ -1271,6 +1295,11 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 					    ucmd.sq_wqe_count, max_wqes);
 					    ucmd.sq_wqe_count, max_wqes);
 				return -EINVAL;
 				return -EINVAL;
 			}
 			}
+			if (init_attr->create_flags &
+			    mlx5_ib_create_qp_sqpn_qp1()) {
+				mlx5_ib_dbg(dev, "user-space is not allowed to create UD QPs spoofing as QP1\n");
+				return -EINVAL;
+			}
 			err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
 			err = create_user_qp(dev, pd, qp, udata, init_attr, &in,
 					     &resp, &inlen, base);
 					     &resp, &inlen, base);
 			if (err)
 			if (err)
@@ -1385,6 +1414,13 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		/* 0xffffff means we ask to work with cqe version 0 */
 		/* 0xffffff means we ask to work with cqe version 0 */
 		MLX5_SET(qpc, qpc, user_index, uidx);
 		MLX5_SET(qpc, qpc, user_index, uidx);
 	}
 	}
+	/* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */
+	if (init_attr->qp_type == IB_QPT_UD &&
+	    (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) {
+		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+		MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1);
+		qp->flags |= MLX5_IB_QP_LSO;
+	}
 
 
 	if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
 	if (init_attr->qp_type == IB_QPT_RAW_PACKET) {
 		qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
 		qp->raw_packet_qp.sq.ubuffer.buf_addr = ucmd.sq_buf_addr;
@@ -1494,7 +1530,7 @@ static void get_cqs(struct mlx5_ib_qp *qp,
 		break;
 		break;
 
 
 	case IB_QPT_SMI:
 	case IB_QPT_SMI:
-	case IB_QPT_GSI:
+	case MLX5_IB_QPT_HW_GSI:
 	case IB_QPT_RC:
 	case IB_QPT_RC:
 	case IB_QPT_UC:
 	case IB_QPT_UC:
 	case IB_QPT_UD:
 	case IB_QPT_UD:
@@ -1657,7 +1693,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 	case IB_QPT_UC:
 	case IB_QPT_UC:
 	case IB_QPT_UD:
 	case IB_QPT_UD:
 	case IB_QPT_SMI:
 	case IB_QPT_SMI:
-	case IB_QPT_GSI:
+	case MLX5_IB_QPT_HW_GSI:
 	case MLX5_IB_QPT_REG_UMR:
 	case MLX5_IB_QPT_REG_UMR:
 		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 		qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 		if (!qp)
 		if (!qp)
@@ -1686,6 +1722,9 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 
 
 		break;
 		break;
 
 
+	case IB_QPT_GSI:
+		return mlx5_ib_gsi_create_qp(pd, init_attr);
+
 	case IB_QPT_RAW_IPV6:
 	case IB_QPT_RAW_IPV6:
 	case IB_QPT_RAW_ETHERTYPE:
 	case IB_QPT_RAW_ETHERTYPE:
 	case IB_QPT_MAX:
 	case IB_QPT_MAX:
@@ -1704,6 +1743,9 @@ int mlx5_ib_destroy_qp(struct ib_qp *qp)
 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
 	struct mlx5_ib_qp *mqp = to_mqp(qp);
 	struct mlx5_ib_qp *mqp = to_mqp(qp);
 
 
+	if (unlikely(qp->qp_type == IB_QPT_GSI))
+		return mlx5_ib_gsi_destroy_qp(qp);
+
 	destroy_qp_common(dev, mqp);
 	destroy_qp_common(dev, mqp);
 
 
 	kfree(mqp);
 	kfree(mqp);
@@ -2161,8 +2203,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 
 
 	context = &in->ctx;
 	context = &in->ctx;
 	err = to_mlx5_st(ibqp->qp_type);
 	err = to_mlx5_st(ibqp->qp_type);
-	if (err < 0)
+	if (err < 0) {
+		mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type);
 		goto out;
 		goto out;
+	}
 
 
 	context->flags = cpu_to_be32(err << 16);
 	context->flags = cpu_to_be32(err << 16);
 
 
@@ -2182,7 +2226,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 		}
 		}
 	}
 	}
 
 
-	if (ibqp->qp_type == IB_QPT_GSI || ibqp->qp_type == IB_QPT_SMI) {
+	if (is_sqp(ibqp->qp_type)) {
 		context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
 		context->mtu_msgmax = (IB_MTU_256 << 5) | 8;
 	} else if (ibqp->qp_type == IB_QPT_UD ||
 	} else if (ibqp->qp_type == IB_QPT_UD ||
 		   ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
 		   ibqp->qp_type == MLX5_IB_QPT_REG_UMR) {
@@ -2284,6 +2328,8 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
 	if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
 		context->sq_crq_size |= cpu_to_be16(1 << 4);
 		context->sq_crq_size |= cpu_to_be16(1 << 4);
 
 
+	if (qp->flags & MLX5_IB_QP_SQPN_QP1)
+		context->deth_sqpn = cpu_to_be32(1);
 
 
 	mlx5_cur = to_mlx5_state(cur_state);
 	mlx5_cur = to_mlx5_state(cur_state);
 	mlx5_new = to_mlx5_state(new_state);
 	mlx5_new = to_mlx5_state(new_state);
@@ -2363,11 +2409,18 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 {
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	enum ib_qp_type qp_type;
 	enum ib_qp_state cur_state, new_state;
 	enum ib_qp_state cur_state, new_state;
 	int err = -EINVAL;
 	int err = -EINVAL;
 	int port;
 	int port;
 	enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
 	enum rdma_link_layer ll = IB_LINK_LAYER_UNSPECIFIED;
 
 
+	if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+		return mlx5_ib_gsi_modify_qp(ibqp, attr, attr_mask);
+
+	qp_type = (unlikely(ibqp->qp_type == MLX5_IB_QPT_HW_GSI)) ?
+		IB_QPT_GSI : ibqp->qp_type;
+
 	mutex_lock(&qp->mutex);
 	mutex_lock(&qp->mutex);
 
 
 	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
 	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
@@ -2378,32 +2431,46 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
 		ll = dev->ib_dev.get_link_layer(&dev->ib_dev, port);
 	}
 	}
 
 
-	if (ibqp->qp_type != MLX5_IB_QPT_REG_UMR &&
-	    !ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type, attr_mask,
-				ll))
+	if (qp_type != MLX5_IB_QPT_REG_UMR &&
+	    !ib_modify_qp_is_ok(cur_state, new_state, qp_type, attr_mask, ll)) {
+		mlx5_ib_dbg(dev, "invalid QP state transition from %d to %d, qp_type %d, attr_mask 0x%x\n",
+			    cur_state, new_state, ibqp->qp_type, attr_mask);
 		goto out;
 		goto out;
+	}
 
 
 	if ((attr_mask & IB_QP_PORT) &&
 	if ((attr_mask & IB_QP_PORT) &&
 	    (attr->port_num == 0 ||
 	    (attr->port_num == 0 ||
-	     attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports)))
+	     attr->port_num > MLX5_CAP_GEN(dev->mdev, num_ports))) {
+		mlx5_ib_dbg(dev, "invalid port number %d. number of ports is %d\n",
+			    attr->port_num, dev->num_ports);
 		goto out;
 		goto out;
+	}
 
 
 	if (attr_mask & IB_QP_PKEY_INDEX) {
 	if (attr_mask & IB_QP_PKEY_INDEX) {
 		port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
 		port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
 		if (attr->pkey_index >=
 		if (attr->pkey_index >=
-		    dev->mdev->port_caps[port - 1].pkey_table_len)
+		    dev->mdev->port_caps[port - 1].pkey_table_len) {
+			mlx5_ib_dbg(dev, "invalid pkey index %d\n",
+				    attr->pkey_index);
 			goto out;
 			goto out;
+		}
 	}
 	}
 
 
 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
 	    attr->max_rd_atomic >
 	    attr->max_rd_atomic >
-	    (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp)))
+	    (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
+		mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
+			    attr->max_rd_atomic);
 		goto out;
 		goto out;
+	}
 
 
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
 	    attr->max_dest_rd_atomic >
 	    attr->max_dest_rd_atomic >
-	    (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp)))
+	    (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
+		mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
+			    attr->max_dest_rd_atomic);
 		goto out;
 		goto out;
+	}
 
 
 	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
 	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
 		err = 0;
 		err = 0;
@@ -2442,6 +2509,59 @@ static __always_inline void set_raddr_seg(struct mlx5_wqe_raddr_seg *rseg,
 	rseg->reserved = 0;
 	rseg->reserved = 0;
 }
 }
 
 
+static void *set_eth_seg(struct mlx5_wqe_eth_seg *eseg,
+			 struct ib_send_wr *wr, void *qend,
+			 struct mlx5_ib_qp *qp, int *size)
+{
+	void *seg = eseg;
+
+	memset(eseg, 0, sizeof(struct mlx5_wqe_eth_seg));
+
+	if (wr->send_flags & IB_SEND_IP_CSUM)
+		eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM |
+				 MLX5_ETH_WQE_L4_CSUM;
+
+	seg += sizeof(struct mlx5_wqe_eth_seg);
+	*size += sizeof(struct mlx5_wqe_eth_seg) / 16;
+
+	if (wr->opcode == IB_WR_LSO) {
+		struct ib_ud_wr *ud_wr = container_of(wr, struct ib_ud_wr, wr);
+		int size_of_inl_hdr_start = sizeof(eseg->inline_hdr_start);
+		u64 left, leftlen, copysz;
+		void *pdata = ud_wr->header;
+
+		left = ud_wr->hlen;
+		eseg->mss = cpu_to_be16(ud_wr->mss);
+		eseg->inline_hdr_sz = cpu_to_be16(left);
+
+		/*
+		 * check if there is space till the end of queue, if yes,
+		 * copy all in one shot, otherwise copy till the end of queue,
+		 * rollback and than the copy the left
+		 */
+		leftlen = qend - (void *)eseg->inline_hdr_start;
+		copysz = min_t(u64, leftlen, left);
+
+		memcpy(seg - size_of_inl_hdr_start, pdata, copysz);
+
+		if (likely(copysz > size_of_inl_hdr_start)) {
+			seg += ALIGN(copysz - size_of_inl_hdr_start, 16);
+			*size += ALIGN(copysz - size_of_inl_hdr_start, 16) / 16;
+		}
+
+		if (unlikely(copysz < left)) { /* the last wqe in the queue */
+			seg = mlx5_get_send_wqe(qp, 0);
+			left -= copysz;
+			pdata += copysz;
+			memcpy(seg, pdata, left);
+			seg += ALIGN(left, 16);
+			*size += ALIGN(left, 16) / 16;
+		}
+	}
+
+	return seg;
+}
+
 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
 static void set_datagram_seg(struct mlx5_wqe_datagram_seg *dseg,
 			     struct ib_send_wr *wr)
 			     struct ib_send_wr *wr)
 {
 {
@@ -2509,6 +2629,11 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr,
 	int ndescs = mr->ndescs;
 	int ndescs = mr->ndescs;
 
 
 	memset(umr, 0, sizeof(*umr));
 	memset(umr, 0, sizeof(*umr));
+
+	if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+		/* KLMs take twice the size of MTTs */
+		ndescs *= 2;
+
 	umr->flags = MLX5_UMR_CHECK_NOT_FREE;
 	umr->flags = MLX5_UMR_CHECK_NOT_FREE;
 	umr->klm_octowords = get_klm_octo(ndescs);
 	umr->klm_octowords = get_klm_octo(ndescs);
 	umr->mkey_mask = frwr_mkey_mask();
 	umr->mkey_mask = frwr_mkey_mask();
@@ -2558,6 +2683,44 @@ static __be64 get_umr_update_mtt_mask(void)
 	return cpu_to_be64(result);
 	return cpu_to_be64(result);
 }
 }
 
 
+static __be64 get_umr_update_translation_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_LEN |
+		 MLX5_MKEY_MASK_PAGE_SIZE |
+		 MLX5_MKEY_MASK_START_ADDR |
+		 MLX5_MKEY_MASK_KEY |
+		 MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_access_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_LW |
+		 MLX5_MKEY_MASK_RR |
+		 MLX5_MKEY_MASK_RW |
+		 MLX5_MKEY_MASK_A |
+		 MLX5_MKEY_MASK_KEY |
+		 MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_pd_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_PD |
+		 MLX5_MKEY_MASK_KEY |
+		 MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 				struct ib_send_wr *wr)
 				struct ib_send_wr *wr)
 {
 {
@@ -2576,9 +2739,15 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 			umr->mkey_mask = get_umr_update_mtt_mask();
 			umr->mkey_mask = get_umr_update_mtt_mask();
 			umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
 			umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
 			umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
 			umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
-		} else {
-			umr->mkey_mask = get_umr_reg_mr_mask();
 		}
 		}
+		if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_TRANSLATION)
+			umr->mkey_mask |= get_umr_update_translation_mask();
+		if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_ACCESS)
+			umr->mkey_mask |= get_umr_update_access_mask();
+		if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD)
+			umr->mkey_mask |= get_umr_update_pd_mask();
+		if (!umr->mkey_mask)
+			umr->mkey_mask = get_umr_reg_mr_mask();
 	} else {
 	} else {
 		umr->mkey_mask = get_umr_unreg_mr_mask();
 		umr->mkey_mask = get_umr_unreg_mr_mask();
 	}
 	}
@@ -2603,13 +2772,19 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg,
 	int ndescs = ALIGN(mr->ndescs, 8) >> 1;
 	int ndescs = ALIGN(mr->ndescs, 8) >> 1;
 
 
 	memset(seg, 0, sizeof(*seg));
 	memset(seg, 0, sizeof(*seg));
-	seg->flags = get_umr_flags(access) | MLX5_ACCESS_MODE_MTT;
+
+	if (mr->access_mode == MLX5_ACCESS_MODE_MTT)
+		seg->log2_page_size = ilog2(mr->ibmr.page_size);
+	else if (mr->access_mode == MLX5_ACCESS_MODE_KLM)
+		/* KLMs take twice the size of MTTs */
+		ndescs *= 2;
+
+	seg->flags = get_umr_flags(access) | mr->access_mode;
 	seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
 	seg->qpn_mkey7_0 = cpu_to_be32((key & 0xff) | 0xffffff00);
 	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
 	seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL);
 	seg->start_addr = cpu_to_be64(mr->ibmr.iova);
 	seg->start_addr = cpu_to_be64(mr->ibmr.iova);
 	seg->len = cpu_to_be64(mr->ibmr.length);
 	seg->len = cpu_to_be64(mr->ibmr.length);
 	seg->xlt_oct_size = cpu_to_be32(ndescs);
 	seg->xlt_oct_size = cpu_to_be32(ndescs);
-	seg->log2_page_size = ilog2(mr->ibmr.page_size);
 }
 }
 
 
 static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
 static void set_linv_mkey_seg(struct mlx5_mkey_seg *seg)
@@ -2630,7 +2805,8 @@ static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *w
 
 
 	seg->flags = convert_access(umrwr->access_flags);
 	seg->flags = convert_access(umrwr->access_flags);
 	if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
 	if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
-		seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+		if (umrwr->pd)
+			seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
 		seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
 		seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
 	}
 	}
 	seg->len = cpu_to_be64(umrwr->length);
 	seg->len = cpu_to_be64(umrwr->length);
@@ -3196,13 +3372,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 {
 {
 	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
 	struct mlx5_wqe_ctrl_seg *ctrl = NULL;  /* compiler warning */
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
-	struct mlx5_ib_qp *qp = to_mqp(ibqp);
+	struct mlx5_ib_qp *qp;
 	struct mlx5_ib_mr *mr;
 	struct mlx5_ib_mr *mr;
 	struct mlx5_wqe_data_seg *dpseg;
 	struct mlx5_wqe_data_seg *dpseg;
 	struct mlx5_wqe_xrc_seg *xrc;
 	struct mlx5_wqe_xrc_seg *xrc;
-	struct mlx5_bf *bf = qp->bf;
+	struct mlx5_bf *bf;
 	int uninitialized_var(size);
 	int uninitialized_var(size);
-	void *qend = qp->sq.qend;
+	void *qend;
 	unsigned long flags;
 	unsigned long flags;
 	unsigned idx;
 	unsigned idx;
 	int err = 0;
 	int err = 0;
@@ -3214,6 +3390,13 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	u8 next_fence = 0;
 	u8 next_fence = 0;
 	u8 fence;
 	u8 fence;
 
 
+	if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+		return mlx5_ib_gsi_post_send(ibqp, wr, bad_wr);
+
+	qp = to_mqp(ibqp);
+	bf = qp->bf;
+	qend = qp->sq.qend;
+
 	spin_lock_irqsave(&qp->sq.lock, flags);
 	spin_lock_irqsave(&qp->sq.lock, flags);
 
 
 	for (nreq = 0; wr; nreq++, wr = wr->next) {
 	for (nreq = 0; wr; nreq++, wr = wr->next) {
@@ -3373,16 +3556,37 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			}
 			}
 			break;
 			break;
 
 
-		case IB_QPT_UD:
 		case IB_QPT_SMI:
 		case IB_QPT_SMI:
-		case IB_QPT_GSI:
+		case MLX5_IB_QPT_HW_GSI:
 			set_datagram_seg(seg, wr);
 			set_datagram_seg(seg, wr);
 			seg += sizeof(struct mlx5_wqe_datagram_seg);
 			seg += sizeof(struct mlx5_wqe_datagram_seg);
 			size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
 			size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
 			if (unlikely((seg == qend)))
 			if (unlikely((seg == qend)))
 				seg = mlx5_get_send_wqe(qp, 0);
 				seg = mlx5_get_send_wqe(qp, 0);
 			break;
 			break;
+		case IB_QPT_UD:
+			set_datagram_seg(seg, wr);
+			seg += sizeof(struct mlx5_wqe_datagram_seg);
+			size += sizeof(struct mlx5_wqe_datagram_seg) / 16;
+
+			if (unlikely((seg == qend)))
+				seg = mlx5_get_send_wqe(qp, 0);
+
+			/* handle qp that supports ud offload */
+			if (qp->flags & IB_QP_CREATE_IPOIB_UD_LSO) {
+				struct mlx5_wqe_eth_pad *pad;
+
+				pad = seg;
+				memset(pad, 0, sizeof(struct mlx5_wqe_eth_pad));
+				seg += sizeof(struct mlx5_wqe_eth_pad);
+				size += sizeof(struct mlx5_wqe_eth_pad) / 16;
 
 
+				seg = set_eth_seg(seg, wr, qend, qp, &size);
+
+				if (unlikely((seg == qend)))
+					seg = mlx5_get_send_wqe(qp, 0);
+			}
+			break;
 		case MLX5_IB_QPT_REG_UMR:
 		case MLX5_IB_QPT_REG_UMR:
 			if (wr->opcode != MLX5_IB_WR_UMR) {
 			if (wr->opcode != MLX5_IB_WR_UMR) {
 				err = -EINVAL;
 				err = -EINVAL;
@@ -3502,6 +3706,9 @@ int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	int ind;
 	int ind;
 	int i;
 	int i;
 
 
+	if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+		return mlx5_ib_gsi_post_recv(ibqp, wr, bad_wr);
+
 	spin_lock_irqsave(&qp->rq.lock, flags);
 	spin_lock_irqsave(&qp->rq.lock, flags);
 
 
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
@@ -3822,6 +4029,10 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 	int err = 0;
 	int err = 0;
 	u8 raw_packet_qp_state;
 	u8 raw_packet_qp_state;
 
 
+	if (unlikely(ibqp->qp_type == IB_QPT_GSI))
+		return mlx5_ib_gsi_query_qp(ibqp, qp_attr, qp_attr_mask,
+					    qp_init_attr);
+
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	/*
 	/*
 	 * Wait for any outstanding page faults, in case the user frees memory
 	 * Wait for any outstanding page faults, in case the user frees memory
@@ -3874,6 +4085,8 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 		qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
 		qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_SEND;
 	if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
 	if (qp->flags & MLX5_IB_QP_MANAGED_RECV)
 		qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
 		qp_init_attr->create_flags |= IB_QP_CREATE_MANAGED_RECV;
+	if (qp->flags & MLX5_IB_QP_SQPN_QP1)
+		qp_init_attr->create_flags |= mlx5_ib_create_qp_sqpn_qp1();
 
 
 	qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
 	qp_init_attr->sq_sig_type = qp->sq_signal_bits & MLX5_WQE_CTRL_CQ_UPDATE ?
 		IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;
 		IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR;

+ 22 - 19
drivers/infiniband/hw/mlx5/srq.c

@@ -75,7 +75,8 @@ static void mlx5_ib_srq_event(struct mlx5_core_srq *srq, enum mlx5_event type)
 
 
 static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 			   struct mlx5_create_srq_mbox_in **in,
 			   struct mlx5_create_srq_mbox_in **in,
-			   struct ib_udata *udata, int buf_size, int *inlen)
+			   struct ib_udata *udata, int buf_size, int *inlen,
+			   int is_xrc)
 {
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct mlx5_ib_create_srq ucmd = {};
 	struct mlx5_ib_create_srq ucmd = {};
@@ -87,13 +88,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 	int ncont;
 	int ncont;
 	u32 offset;
 	u32 offset;
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
 	u32 uidx = MLX5_IB_DEFAULT_UIDX;
-	int drv_data = udata->inlen - sizeof(struct ib_uverbs_cmd_hdr);
 
 
-	if (drv_data < 0)
-		return -EINVAL;
-
-	ucmdlen = (drv_data < sizeof(ucmd)) ?
-		  drv_data : sizeof(ucmd);
+	ucmdlen = min(udata->inlen, sizeof(ucmd));
 
 
 	if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
 	if (ib_copy_from_udata(&ucmd, udata, ucmdlen)) {
 		mlx5_ib_dbg(dev, "failed copy udata\n");
 		mlx5_ib_dbg(dev, "failed copy udata\n");
@@ -103,15 +99,17 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 	if (ucmd.reserved0 || ucmd.reserved1)
 	if (ucmd.reserved0 || ucmd.reserved1)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	if (drv_data > sizeof(ucmd) &&
+	if (udata->inlen > sizeof(ucmd) &&
 	    !ib_is_udata_cleared(udata, sizeof(ucmd),
 	    !ib_is_udata_cleared(udata, sizeof(ucmd),
-				 drv_data - sizeof(ucmd)))
+				 udata->inlen - sizeof(ucmd)))
 		return -EINVAL;
 		return -EINVAL;
 
 
-	err = get_srq_user_index(to_mucontext(pd->uobject->context),
-				 &ucmd, udata->inlen, &uidx);
-	if (err)
-		return err;
+	if (is_xrc) {
+		err = get_srq_user_index(to_mucontext(pd->uobject->context),
+					 &ucmd, udata->inlen, &uidx);
+		if (err)
+			return err;
+	}
 
 
 	srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 	srq->wq_sig = !!(ucmd.flags & MLX5_SRQ_FLAG_SIGNATURE);
 
 
@@ -151,7 +149,8 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 	(*in)->ctx.pgoff_cqn = cpu_to_be32(offset << 26);
 
 
-	if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+	if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+	     is_xrc){
 		xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
 		xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
 				     xrc_srq_context_entry);
 				     xrc_srq_context_entry);
 		MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
 		MLX5_SET(xrc_srqc, xsrqc, user_index, uidx);
@@ -170,7 +169,7 @@ err_umem:
 
 
 static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 			     struct mlx5_create_srq_mbox_in **in, int buf_size,
 			     struct mlx5_create_srq_mbox_in **in, int buf_size,
-			     int *inlen)
+			     int *inlen, int is_xrc)
 {
 {
 	int err;
 	int err;
 	int i;
 	int i;
@@ -224,7 +223,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 
 
 	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 	(*in)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
 
 
-	if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) {
+	if ((MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1) &&
+	     is_xrc){
 		xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
 		xsrqc = MLX5_ADDR_OF(create_xrc_srq_in, *in,
 				     xrc_srq_context_entry);
 				     xrc_srq_context_entry);
 		/* 0xffffff means we ask to work with cqe version 0 */
 		/* 0xffffff means we ask to work with cqe version 0 */
@@ -302,10 +302,14 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 		    desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
 		    desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs,
 		    srq->msrq.max_avail_gather);
 		    srq->msrq.max_avail_gather);
 
 
+	is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
+
 	if (pd->uobject)
 	if (pd->uobject)
-		err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen);
+		err = create_srq_user(pd, srq, &in, udata, buf_size, &inlen,
+				      is_xrc);
 	else
 	else
-		err = create_srq_kernel(dev, srq, &in, buf_size, &inlen);
+		err = create_srq_kernel(dev, srq, &in, buf_size, &inlen,
+					is_xrc);
 
 
 	if (err) {
 	if (err) {
 		mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
 		mlx5_ib_warn(dev, "create srq %s failed, err %d\n",
@@ -313,7 +317,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 		goto err_srq;
 		goto err_srq;
 	}
 	}
 
 
-	is_xrc = (init_attr->srq_type == IB_SRQT_XRC);
 	in->ctx.state_log_sz = ilog2(srq->msrq.max);
 	in->ctx.state_log_sz = ilog2(srq->msrq.max);
 	flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
 	flgs = ((srq->msrq.wqe_shift - 4) | (is_xrc << 5) | (srq->wq_sig << 7)) << 24;
 	xrcdn = 0;
 	xrcdn = 0;

+ 7 - 0
drivers/infiniband/hw/mlx5/user.h

@@ -152,6 +152,13 @@ struct mlx5_ib_create_qp_resp {
 	__u32	uuar_index;
 	__u32	uuar_index;
 };
 };
 
 
+struct mlx5_ib_alloc_mw {
+	__u32	comp_mask;
+	__u8	num_klms;
+	__u8	reserved1;
+	__u16	reserved2;
+};
+
 static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
 static inline int get_qp_user_index(struct mlx5_ib_ucontext *ucontext,
 				    struct mlx5_ib_create_qp *ucmd,
 				    struct mlx5_ib_create_qp *ucmd,
 				    int inlen,
 				    int inlen,

+ 0 - 1
drivers/infiniband/hw/nes/Kconfig

@@ -2,7 +2,6 @@ config INFINIBAND_NES
 	tristate "NetEffect RNIC Driver"
 	tristate "NetEffect RNIC Driver"
 	depends on PCI && INET && INFINIBAND
 	depends on PCI && INET && INFINIBAND
 	select LIBCRC32C
 	select LIBCRC32C
-	select INET_LRO
 	---help---
 	---help---
 	  This is the RDMA Network Interface Card (RNIC) driver for
 	  This is the RDMA Network Interface Card (RNIC) driver for
 	  NetEffect Ethernet Cluster Server Adapters.
 	  NetEffect Ethernet Cluster Server Adapters.

+ 0 - 25
drivers/infiniband/hw/nes/nes.c

@@ -111,17 +111,6 @@ static struct pci_device_id nes_pci_table[] = {
 
 
 MODULE_DEVICE_TABLE(pci, nes_pci_table);
 MODULE_DEVICE_TABLE(pci, nes_pci_table);
 
 
-/* registered nes netlink callbacks */
-static struct ibnl_client_cbs nes_nl_cb_table[] = {
-	[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
-	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
-	[RDMA_NL_IWPM_QUERY_MAPPING] = {.dump = iwpm_add_and_query_mapping_cb},
-	[RDMA_NL_IWPM_REMOTE_INFO] = {.dump = iwpm_remote_info_cb},
-	[RDMA_NL_IWPM_HANDLE_ERR] = {.dump = iwpm_mapping_error_cb},
-	[RDMA_NL_IWPM_MAPINFO] = {.dump = iwpm_mapping_info_cb},
-	[RDMA_NL_IWPM_MAPINFO_NUM] = {.dump = iwpm_ack_mapping_info_cb}
-};
-
 static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
 static int nes_inetaddr_event(struct notifier_block *, unsigned long, void *);
 static int nes_net_event(struct notifier_block *, unsigned long, void *);
 static int nes_net_event(struct notifier_block *, unsigned long, void *);
 static int nes_notifiers_registered;
 static int nes_notifiers_registered;
@@ -682,17 +671,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
 	}
 	}
 	nes_notifiers_registered++;
 	nes_notifiers_registered++;
 
 
-	if (ibnl_add_client(RDMA_NL_NES, RDMA_NL_IWPM_NUM_OPS, nes_nl_cb_table))
-		printk(KERN_ERR PFX "%s[%u]: Failed to add netlink callback\n",
-			__func__, __LINE__);
-
-	ret = iwpm_init(RDMA_NL_NES);
-	if (ret) {
-		printk(KERN_ERR PFX "%s: port mapper initialization failed\n",
-				pci_name(pcidev));
-		goto bail7;
-	}
-
 	INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
 	INIT_DELAYED_WORK(&nesdev->work, nes_recheck_link_status);
 
 
 	/* Initialize network devices */
 	/* Initialize network devices */
@@ -731,7 +709,6 @@ static int nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
 
 
 	nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
 	nes_debug(NES_DBG_INIT, "netdev_count=%d, nesadapter->netdev_count=%d\n",
 			nesdev->netdev_count, nesdev->nesadapter->netdev_count);
 			nesdev->netdev_count, nesdev->nesadapter->netdev_count);
-	ibnl_remove_client(RDMA_NL_NES);
 
 
 	nes_notifiers_registered--;
 	nes_notifiers_registered--;
 	if (nes_notifiers_registered == 0) {
 	if (nes_notifiers_registered == 0) {
@@ -795,8 +772,6 @@ static void nes_remove(struct pci_dev *pcidev)
 				nesdev->nesadapter->netdev_count--;
 				nesdev->nesadapter->netdev_count--;
 			}
 			}
 		}
 		}
-	ibnl_remove_client(RDMA_NL_NES);
-	iwpm_exit(RDMA_NL_NES);
 
 
 	nes_notifiers_registered--;
 	nes_notifiers_registered--;
 	if (nes_notifiers_registered == 0) {
 	if (nes_notifiers_registered == 0) {

+ 86 - 275
drivers/infiniband/hw/nes/nes_cm.c

@@ -482,11 +482,11 @@ static void form_cm_frame(struct sk_buff *skb,
 	iph->ttl = 0x40;
 	iph->ttl = 0x40;
 	iph->protocol = 0x06;   /* IPPROTO_TCP */
 	iph->protocol = 0x06;   /* IPPROTO_TCP */
 
 
-	iph->saddr = htonl(cm_node->mapped_loc_addr);
-	iph->daddr = htonl(cm_node->mapped_rem_addr);
+	iph->saddr = htonl(cm_node->loc_addr);
+	iph->daddr = htonl(cm_node->rem_addr);
 
 
-	tcph->source = htons(cm_node->mapped_loc_port);
-	tcph->dest = htons(cm_node->mapped_rem_port);
+	tcph->source = htons(cm_node->loc_port);
+	tcph->dest = htons(cm_node->rem_port);
 	tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
 	tcph->seq = htonl(cm_node->tcp_cntxt.loc_seq_num);
 
 
 	if (flags & SET_ACK) {
 	if (flags & SET_ACK) {
@@ -525,125 +525,6 @@ static void form_cm_frame(struct sk_buff *skb,
 	cm_packets_created++;
 	cm_packets_created++;
 }
 }
 
 
-/*
- * nes_create_sockaddr - Record ip addr and tcp port in a sockaddr struct
- */
-static void nes_create_sockaddr(__be32 ip_addr, __be16 port,
-				struct sockaddr_storage *addr)
-{
-	struct sockaddr_in *nes_sockaddr = (struct sockaddr_in *)addr;
-	nes_sockaddr->sin_family = AF_INET;
-	memcpy(&nes_sockaddr->sin_addr.s_addr, &ip_addr, sizeof(__be32));
-	nes_sockaddr->sin_port = port;
-}
-
-/*
- * nes_create_mapinfo - Create a mapinfo object in the port mapper data base
- */
-static int nes_create_mapinfo(struct nes_cm_info *cm_info)
-{
-	struct sockaddr_storage local_sockaddr;
-	struct sockaddr_storage mapped_sockaddr;
-
-	nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
-				&local_sockaddr);
-	nes_create_sockaddr(htonl(cm_info->mapped_loc_addr),
-			htons(cm_info->mapped_loc_port), &mapped_sockaddr);
-
-	return iwpm_create_mapinfo(&local_sockaddr,
-				&mapped_sockaddr, RDMA_NL_NES);
-}
-
-/*
- * nes_remove_mapinfo - Remove a mapinfo object from the port mapper data base
- *                      and send a remove mapping op message to
- *                      the userspace port mapper
- */
-static int nes_remove_mapinfo(u32 loc_addr, u16 loc_port,
-			u32 mapped_loc_addr, u16 mapped_loc_port)
-{
-	struct sockaddr_storage local_sockaddr;
-	struct sockaddr_storage mapped_sockaddr;
-
-	nes_create_sockaddr(htonl(loc_addr), htons(loc_port), &local_sockaddr);
-	nes_create_sockaddr(htonl(mapped_loc_addr), htons(mapped_loc_port),
-				&mapped_sockaddr);
-
-	iwpm_remove_mapinfo(&local_sockaddr, &mapped_sockaddr);
-	return iwpm_remove_mapping(&local_sockaddr, RDMA_NL_NES);
-}
-
-/*
- * nes_form_pm_msg - Form a port mapper message with mapping info
- */
-static void nes_form_pm_msg(struct nes_cm_info *cm_info,
-				struct iwpm_sa_data *pm_msg)
-{
-	nes_create_sockaddr(htonl(cm_info->loc_addr), htons(cm_info->loc_port),
-				&pm_msg->loc_addr);
-	nes_create_sockaddr(htonl(cm_info->rem_addr), htons(cm_info->rem_port),
-				&pm_msg->rem_addr);
-}
-
-/*
- * nes_form_reg_msg - Form a port mapper message with dev info
- */
-static void nes_form_reg_msg(struct nes_vnic *nesvnic,
-			struct iwpm_dev_data *pm_msg)
-{
-	memcpy(pm_msg->dev_name, nesvnic->nesibdev->ibdev.name,
-				IWPM_DEVNAME_SIZE);
-	memcpy(pm_msg->if_name, nesvnic->netdev->name, IWPM_IFNAME_SIZE);
-}
-
-static void record_sockaddr_info(struct sockaddr_storage *addr_info,
-					nes_addr_t *ip_addr, u16 *port_num)
-{
-	struct sockaddr_in *in_addr = (struct sockaddr_in *)addr_info;
-
-	if (in_addr->sin_family == AF_INET) {
-		*ip_addr = ntohl(in_addr->sin_addr.s_addr);
-		*port_num = ntohs(in_addr->sin_port);
-	}
-}
-
-/*
- * nes_record_pm_msg - Save the received mapping info
- */
-static void nes_record_pm_msg(struct nes_cm_info *cm_info,
-			struct iwpm_sa_data *pm_msg)
-{
-	record_sockaddr_info(&pm_msg->mapped_loc_addr,
-		&cm_info->mapped_loc_addr, &cm_info->mapped_loc_port);
-
-	record_sockaddr_info(&pm_msg->mapped_rem_addr,
-		&cm_info->mapped_rem_addr, &cm_info->mapped_rem_port);
-}
-
-/*
- * nes_get_reminfo - Get the address info of the remote connecting peer
- */
-static int nes_get_remote_addr(struct nes_cm_node *cm_node)
-{
-	struct sockaddr_storage mapped_loc_addr, mapped_rem_addr;
-	struct sockaddr_storage remote_addr;
-	int ret;
-
-	nes_create_sockaddr(htonl(cm_node->mapped_loc_addr),
-			htons(cm_node->mapped_loc_port), &mapped_loc_addr);
-	nes_create_sockaddr(htonl(cm_node->mapped_rem_addr),
-			htons(cm_node->mapped_rem_port), &mapped_rem_addr);
-
-	ret = iwpm_get_remote_info(&mapped_loc_addr, &mapped_rem_addr,
-				&remote_addr, RDMA_NL_NES);
-	if (ret)
-		nes_debug(NES_DBG_CM, "Unable to find remote peer address info\n");
-	else
-		record_sockaddr_info(&remote_addr, &cm_node->rem_addr,
-				&cm_node->rem_port);
-	return ret;
-}
-
 /**
 /**
  * print_core - dump a cm core
  * print_core - dump a cm core
  */
  */
@@ -1266,11 +1147,10 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
 			  loc_addr, loc_port,
 			  loc_addr, loc_port,
 			  cm_node->rem_addr, cm_node->rem_port,
 			  cm_node->rem_addr, cm_node->rem_port,
 			  rem_addr, rem_port);
 			  rem_addr, rem_port);
-		if ((cm_node->mapped_loc_addr == loc_addr) &&
-			(cm_node->mapped_loc_port == loc_port) &&
-			(cm_node->mapped_rem_addr == rem_addr) &&
-			(cm_node->mapped_rem_port == rem_port)) {
-
+		if ((cm_node->loc_addr == loc_addr) &&
+		    (cm_node->loc_port == loc_port) &&
+		    (cm_node->rem_addr == rem_addr) &&
+		    (cm_node->rem_port == rem_port)) {
 			add_ref_cm_node(cm_node);
 			add_ref_cm_node(cm_node);
 			spin_unlock_irqrestore(&cm_core->ht_lock, flags);
 			spin_unlock_irqrestore(&cm_core->ht_lock, flags);
 			return cm_node;
 			return cm_node;
@@ -1287,8 +1167,8 @@ static struct nes_cm_node *find_node(struct nes_cm_core *cm_core,
  * find_listener - find a cm node listening on this addr-port pair
  * find_listener - find a cm node listening on this addr-port pair
  */
  */
 static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
 static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
-					nes_addr_t dst_addr, u16 dst_port,
-					enum nes_cm_listener_state listener_state, int local)
+					     nes_addr_t dst_addr, u16 dst_port,
+					     enum nes_cm_listener_state listener_state)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	struct nes_cm_listener *listen_node;
 	struct nes_cm_listener *listen_node;
@@ -1298,13 +1178,9 @@ static struct nes_cm_listener *find_listener(struct nes_cm_core *cm_core,
 	/* walk list and find cm_node associated with this session ID */
 	/* walk list and find cm_node associated with this session ID */
 	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
 	spin_lock_irqsave(&cm_core->listen_list_lock, flags);
 	list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
 	list_for_each_entry(listen_node, &cm_core->listen_list.list, list) {
-		if (local) {
-			listen_addr = listen_node->loc_addr;
-			listen_port = listen_node->loc_port;
-		} else {
-			listen_addr = listen_node->mapped_loc_addr;
-			listen_port = listen_node->mapped_loc_port;
-		}
+		listen_addr = listen_node->loc_addr;
+		listen_port = listen_node->loc_port;
+
 		/* compare node pair, return node handle if a match */
 		/* compare node pair, return node handle if a match */
 		if (((listen_addr == dst_addr) ||
 		if (((listen_addr == dst_addr) ||
 		     listen_addr == 0x00000000) &&
 		     listen_addr == 0x00000000) &&
@@ -1443,17 +1319,13 @@ static int mini_cm_dec_refcnt_listen(struct nes_cm_core *cm_core,
 
 
 		if (listener->nesvnic) {
 		if (listener->nesvnic) {
 			nes_manage_apbvt(listener->nesvnic,
 			nes_manage_apbvt(listener->nesvnic,
-				listener->mapped_loc_port,
+				listener->loc_port,
 				PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
 				PCI_FUNC(listener->nesvnic->nesdev->pcidev->devfn),
 				NES_MANAGE_APBVT_DEL);
 				NES_MANAGE_APBVT_DEL);
 
 
-			nes_remove_mapinfo(listener->loc_addr,
-					listener->loc_port,
-					listener->mapped_loc_addr,
-					listener->mapped_loc_port);
 			nes_debug(NES_DBG_NLMSG,
 			nes_debug(NES_DBG_NLMSG,
-					"Delete APBVT mapped_loc_port = %04X\n",
-					listener->mapped_loc_port);
+					"Delete APBVT loc_port = %04X\n",
+					listener->loc_port);
 		}
 		}
 
 
 		nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
 		nes_debug(NES_DBG_CM, "destroying listener (%p)\n", listener);
@@ -1602,11 +1474,6 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
 	cm_node->rem_addr = cm_info->rem_addr;
 	cm_node->rem_addr = cm_info->rem_addr;
 	cm_node->rem_port = cm_info->rem_port;
 	cm_node->rem_port = cm_info->rem_port;
 
 
-	cm_node->mapped_loc_addr = cm_info->mapped_loc_addr;
-	cm_node->mapped_rem_addr = cm_info->mapped_rem_addr;
-	cm_node->mapped_loc_port = cm_info->mapped_loc_port;
-	cm_node->mapped_rem_port = cm_info->mapped_rem_port;
-
 	cm_node->mpa_frame_rev = mpa_version;
 	cm_node->mpa_frame_rev = mpa_version;
 	cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
 	cm_node->send_rdma0_op = SEND_RDMA_READ_ZERO;
 	cm_node->mpav2_ird_ord = 0;
 	cm_node->mpav2_ird_ord = 0;
@@ -1655,10 +1522,10 @@ static struct nes_cm_node *make_cm_node(struct nes_cm_core *cm_core,
 	cm_node->loopbackpartner = NULL;
 	cm_node->loopbackpartner = NULL;
 
 
 	/* get the mac addr for the remote node */
 	/* get the mac addr for the remote node */
-	oldarpindex = nes_arp_table(nesdev, cm_node->mapped_rem_addr,
-				NULL, NES_ARP_RESOLVE);
-	arpindex = nes_addr_resolve_neigh(nesvnic,
-				cm_node->mapped_rem_addr, oldarpindex);
+	oldarpindex = nes_arp_table(nesdev, cm_node->rem_addr,
+				    NULL, NES_ARP_RESOLVE);
+	arpindex = nes_addr_resolve_neigh(nesvnic, cm_node->rem_addr,
+					  oldarpindex);
 	if (arpindex < 0) {
 	if (arpindex < 0) {
 		kfree(cm_node);
 		kfree(cm_node);
 		return NULL;
 		return NULL;
@@ -1720,14 +1587,12 @@ static int rem_ref_cm_node(struct nes_cm_core *cm_core,
 		mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
 		mini_cm_dec_refcnt_listen(cm_core, cm_node->listener, 0);
 	} else {
 	} else {
 		if (cm_node->apbvt_set && cm_node->nesvnic) {
 		if (cm_node->apbvt_set && cm_node->nesvnic) {
-			nes_manage_apbvt(cm_node->nesvnic, cm_node->mapped_loc_port,
+			nes_manage_apbvt(cm_node->nesvnic, cm_node->loc_port,
 					 PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
 					 PCI_FUNC(cm_node->nesvnic->nesdev->pcidev->devfn),
 					 NES_MANAGE_APBVT_DEL);
 					 NES_MANAGE_APBVT_DEL);
 		}
 		}
-		nes_debug(NES_DBG_NLMSG, "Delete APBVT mapped_loc_port = %04X\n",
-					cm_node->mapped_loc_port);
-		nes_remove_mapinfo(cm_node->loc_addr, cm_node->loc_port,
-			cm_node->mapped_loc_addr, cm_node->mapped_loc_port);
+		nes_debug(NES_DBG_NLMSG, "Delete APBVT loc_port = %04X\n",
+			  cm_node->loc_port);
 	}
 	}
 
 
 	atomic_dec(&cm_core->node_cnt);
 	atomic_dec(&cm_core->node_cnt);
@@ -2184,7 +2049,6 @@ static int handle_ack_pkt(struct nes_cm_node *cm_node, struct sk_buff *skb,
 		cm_node->state = NES_CM_STATE_ESTABLISHED;
 		cm_node->state = NES_CM_STATE_ESTABLISHED;
 		if (datasize) {
 		if (datasize) {
 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
 			cm_node->tcp_cntxt.rcv_nxt = inc_sequence + datasize;
-			nes_get_remote_addr(cm_node);
 			handle_rcv_mpa(cm_node, skb);
 			handle_rcv_mpa(cm_node, skb);
 		} else { /* rcvd ACK only */
 		} else { /* rcvd ACK only */
 			dev_kfree_skb_any(skb);
 			dev_kfree_skb_any(skb);
@@ -2399,17 +2263,14 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
 			struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
 			struct nes_vnic *nesvnic, struct nes_cm_info *cm_info)
 {
 {
 	struct nes_cm_listener *listener;
 	struct nes_cm_listener *listener;
-	struct iwpm_dev_data pm_reg_msg;
-	struct iwpm_sa_data pm_msg;
 	unsigned long flags;
 	unsigned long flags;
-	int iwpm_err = 0;
 
 
 	nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
 	nes_debug(NES_DBG_CM, "Search for 0x%08x : 0x%04x\n",
 		  cm_info->loc_addr, cm_info->loc_port);
 		  cm_info->loc_addr, cm_info->loc_port);
 
 
 	/* cannot have multiple matching listeners */
 	/* cannot have multiple matching listeners */
 	listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
 	listener = find_listener(cm_core, cm_info->loc_addr, cm_info->loc_port,
-				NES_CM_LISTENER_EITHER_STATE, 1);
+				NES_CM_LISTENER_EITHER_STATE);
 
 
 	if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
 	if (listener && listener->listener_state == NES_CM_LISTENER_ACTIVE_STATE) {
 		/* find automatically incs ref count ??? */
 		/* find automatically incs ref count ??? */
@@ -2419,22 +2280,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
 	}
 	}
 
 
 	if (!listener) {
 	if (!listener) {
-		nes_form_reg_msg(nesvnic, &pm_reg_msg);
-		iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
-		if (iwpm_err) {
-			nes_debug(NES_DBG_NLMSG,
-			"Port Mapper reg pid fail (err = %d).\n", iwpm_err);
-		}
-		if (iwpm_valid_pid() && !iwpm_err) {
-			nes_form_pm_msg(cm_info, &pm_msg);
-			iwpm_err = iwpm_add_mapping(&pm_msg, RDMA_NL_NES);
-			if (iwpm_err)
-				nes_debug(NES_DBG_NLMSG,
-				"Port Mapper query fail (err = %d).\n", iwpm_err);
-			else
-				nes_record_pm_msg(cm_info, &pm_msg);
-		}
-
 		/* create a CM listen node (1/2 node to compare incoming traffic to) */
 		/* create a CM listen node (1/2 node to compare incoming traffic to) */
 		listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
 		listener = kzalloc(sizeof(*listener), GFP_ATOMIC);
 		if (!listener) {
 		if (!listener) {
@@ -2444,8 +2289,6 @@ static struct nes_cm_listener *mini_cm_listen(struct nes_cm_core *cm_core,
 
 
 		listener->loc_addr = cm_info->loc_addr;
 		listener->loc_addr = cm_info->loc_addr;
 		listener->loc_port = cm_info->loc_port;
 		listener->loc_port = cm_info->loc_port;
-		listener->mapped_loc_addr = cm_info->mapped_loc_addr;
-		listener->mapped_loc_port = cm_info->mapped_loc_port;
 		listener->reused_node = 0;
 		listener->reused_node = 0;
 
 
 		atomic_set(&listener->ref_count, 1);
 		atomic_set(&listener->ref_count, 1);
@@ -2507,18 +2350,18 @@ static struct nes_cm_node *mini_cm_connect(struct nes_cm_core *cm_core,
 
 
 	if (cm_info->loc_addr == cm_info->rem_addr) {
 	if (cm_info->loc_addr == cm_info->rem_addr) {
 		loopbackremotelistener = find_listener(cm_core,
 		loopbackremotelistener = find_listener(cm_core,
-			cm_node->mapped_loc_addr, cm_node->mapped_rem_port,
-			NES_CM_LISTENER_ACTIVE_STATE, 0);
+			cm_node->loc_addr, cm_node->rem_port,
+			NES_CM_LISTENER_ACTIVE_STATE);
 		if (loopbackremotelistener == NULL) {
 		if (loopbackremotelistener == NULL) {
 			create_event(cm_node, NES_CM_EVENT_ABORTED);
 			create_event(cm_node, NES_CM_EVENT_ABORTED);
 		} else {
 		} else {
 			loopback_cm_info = *cm_info;
 			loopback_cm_info = *cm_info;
 			loopback_cm_info.loc_port = cm_info->rem_port;
 			loopback_cm_info.loc_port = cm_info->rem_port;
 			loopback_cm_info.rem_port = cm_info->loc_port;
 			loopback_cm_info.rem_port = cm_info->loc_port;
-			loopback_cm_info.mapped_loc_port =
-				cm_info->mapped_rem_port;
-			loopback_cm_info.mapped_rem_port =
-				cm_info->mapped_loc_port;
+			loopback_cm_info.loc_port =
+				cm_info->rem_port;
+			loopback_cm_info.rem_port =
+				cm_info->loc_port;
 			loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
 			loopback_cm_info.cm_id = loopbackremotelistener->cm_id;
 			loopbackremotenode = make_cm_node(cm_core, nesvnic,
 			loopbackremotenode = make_cm_node(cm_core, nesvnic,
 							  &loopback_cm_info, loopbackremotelistener);
 							  &loopback_cm_info, loopbackremotelistener);
@@ -2747,12 +2590,6 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
 	nfo.rem_addr = ntohl(iph->saddr);
 	nfo.rem_addr = ntohl(iph->saddr);
 	nfo.rem_port = ntohs(tcph->source);
 	nfo.rem_port = ntohs(tcph->source);
 
 
-	/* If port mapper is available these should be mapped address info */
-	nfo.mapped_loc_addr = ntohl(iph->daddr);
-	nfo.mapped_loc_port = ntohs(tcph->dest);
-	nfo.mapped_rem_addr = ntohl(iph->saddr);
-	nfo.mapped_rem_port = ntohs(tcph->source);
-
 	tmp_daddr = cpu_to_be32(iph->daddr);
 	tmp_daddr = cpu_to_be32(iph->daddr);
 	tmp_saddr = cpu_to_be32(iph->saddr);
 	tmp_saddr = cpu_to_be32(iph->saddr);
 
 
@@ -2761,8 +2598,8 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
 
 
 	do {
 	do {
 		cm_node = find_node(cm_core,
 		cm_node = find_node(cm_core,
-				    nfo.mapped_rem_port, nfo.mapped_rem_addr,
-				    nfo.mapped_loc_port, nfo.mapped_loc_addr);
+				    nfo.rem_port, nfo.rem_addr,
+				    nfo.loc_port, nfo.loc_addr);
 
 
 		if (!cm_node) {
 		if (!cm_node) {
 			/* Only type of packet accepted are for */
 			/* Only type of packet accepted are for */
@@ -2771,9 +2608,9 @@ static int mini_cm_recv_pkt(struct nes_cm_core *cm_core,
 				skb_handled = 0;
 				skb_handled = 0;
 				break;
 				break;
 			}
 			}
-			listener = find_listener(cm_core, nfo.mapped_loc_addr,
-					nfo.mapped_loc_port,
-					NES_CM_LISTENER_ACTIVE_STATE, 0);
+			listener = find_listener(cm_core, nfo.loc_addr,
+						 nfo.loc_port,
+						 NES_CM_LISTENER_ACTIVE_STATE);
 			if (!listener) {
 			if (!listener) {
 				nfo.cm_id = NULL;
 				nfo.cm_id = NULL;
 				nfo.conn_type = 0;
 				nfo.conn_type = 0;
@@ -2856,12 +2693,22 @@ static struct nes_cm_core *nes_cm_alloc_core(void)
 
 
 	nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n");
 	nes_debug(NES_DBG_CM, "Enable QUEUE EVENTS\n");
 	cm_core->event_wq = create_singlethread_workqueue("nesewq");
 	cm_core->event_wq = create_singlethread_workqueue("nesewq");
+	if (!cm_core->event_wq)
+		goto out_free_cmcore;
 	cm_core->post_event = nes_cm_post_event;
 	cm_core->post_event = nes_cm_post_event;
 	nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n");
 	nes_debug(NES_DBG_CM, "Enable QUEUE DISCONNECTS\n");
 	cm_core->disconn_wq = create_singlethread_workqueue("nesdwq");
 	cm_core->disconn_wq = create_singlethread_workqueue("nesdwq");
+	if (!cm_core->disconn_wq)
+		goto out_free_wq;
 
 
 	print_core(cm_core);
 	print_core(cm_core);
 	return cm_core;
 	return cm_core;
+
+out_free_wq:
+	destroy_workqueue(cm_core->event_wq);
+out_free_cmcore:
+	kfree(cm_core);
+	return NULL;
 }
 }
 
 
 
 
@@ -3121,8 +2968,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
 			atomic_inc(&cm_disconnects);
 			atomic_inc(&cm_disconnects);
 			cm_event.event = IW_CM_EVENT_DISCONNECT;
 			cm_event.event = IW_CM_EVENT_DISCONNECT;
 			cm_event.status = disconn_status;
 			cm_event.status = disconn_status;
-			cm_event.local_addr = cm_id->local_addr;
-			cm_event.remote_addr = cm_id->remote_addr;
+			cm_event.local_addr = cm_id->m_local_addr;
+			cm_event.remote_addr = cm_id->m_remote_addr;
 			cm_event.private_data = NULL;
 			cm_event.private_data = NULL;
 			cm_event.private_data_len = 0;
 			cm_event.private_data_len = 0;
 
 
@@ -3148,8 +2995,8 @@ static int nes_cm_disconn_true(struct nes_qp *nesqp)
 			cm_event.event = IW_CM_EVENT_CLOSE;
 			cm_event.event = IW_CM_EVENT_CLOSE;
 			cm_event.status = 0;
 			cm_event.status = 0;
 			cm_event.provider_data = cm_id->provider_data;
 			cm_event.provider_data = cm_id->provider_data;
-			cm_event.local_addr = cm_id->local_addr;
-			cm_event.remote_addr = cm_id->remote_addr;
+			cm_event.local_addr = cm_id->m_local_addr;
+			cm_event.remote_addr = cm_id->m_remote_addr;
 			cm_event.private_data = NULL;
 			cm_event.private_data = NULL;
 			cm_event.private_data_len = 0;
 			cm_event.private_data_len = 0;
 
 
@@ -3240,8 +3087,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	u8 *start_ptr = &start_addr;
 	u8 *start_ptr = &start_addr;
 	u8 **start_buff = &start_ptr;
 	u8 **start_buff = &start_ptr;
 	u16 buff_len = 0;
 	u16 buff_len = 0;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
 
 
 	ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
 	ibqp = nes_get_qp(cm_id->device, conn_param->qpn);
 	if (!ibqp)
 	if (!ibqp)
@@ -3378,11 +3225,11 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	nes_cm_init_tsa_conn(nesqp, cm_node);
 	nes_cm_init_tsa_conn(nesqp, cm_node);
 
 
 	nesqp->nesqp_context->tcpPorts[0] =
 	nesqp->nesqp_context->tcpPorts[0] =
-				cpu_to_le16(cm_node->mapped_loc_port);
+				cpu_to_le16(cm_node->loc_port);
 	nesqp->nesqp_context->tcpPorts[1] =
 	nesqp->nesqp_context->tcpPorts[1] =
-				cpu_to_le16(cm_node->mapped_rem_port);
+				cpu_to_le16(cm_node->rem_port);
 
 
-	nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
+	nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr);
 
 
 	nesqp->nesqp_context->misc2 |= cpu_to_le32(
 	nesqp->nesqp_context->misc2 |= cpu_to_le32(
 		(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
 		(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3406,9 +3253,9 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	memset(&nes_quad, 0, sizeof(nes_quad));
 	memset(&nes_quad, 0, sizeof(nes_quad));
 	nes_quad.DstIpAdrIndex =
 	nes_quad.DstIpAdrIndex =
 		cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
 		cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
-	nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
-	nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
-	nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
+	nes_quad.SrcIpadr = htonl(cm_node->rem_addr);
+	nes_quad.TcpPorts[0] = htons(cm_node->rem_port);
+	nes_quad.TcpPorts[1] = htons(cm_node->loc_port);
 
 
 	/* Produce hash key */
 	/* Produce hash key */
 	crc_value = get_crc_value(&nes_quad);
 	crc_value = get_crc_value(&nes_quad);
@@ -3437,8 +3284,8 @@ int nes_accept(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	cm_event.event = IW_CM_EVENT_ESTABLISHED;
 	cm_event.event = IW_CM_EVENT_ESTABLISHED;
 	cm_event.status = 0;
 	cm_event.status = 0;
 	cm_event.provider_data = (void *)nesqp;
 	cm_event.provider_data = (void *)nesqp;
-	cm_event.local_addr = cm_id->local_addr;
-	cm_event.remote_addr = cm_id->remote_addr;
+	cm_event.local_addr = cm_id->m_local_addr;
+	cm_event.remote_addr = cm_id->m_remote_addr;
 	cm_event.private_data = NULL;
 	cm_event.private_data = NULL;
 	cm_event.private_data_len = 0;
 	cm_event.private_data_len = 0;
 	cm_event.ird = cm_node->ird_size;
 	cm_event.ird = cm_node->ird_size;
@@ -3508,11 +3355,8 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	struct nes_cm_node *cm_node;
 	struct nes_cm_node *cm_node;
 	struct nes_cm_info cm_info;
 	struct nes_cm_info cm_info;
 	int apbvt_set = 0;
 	int apbvt_set = 0;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->remote_addr;
-	struct iwpm_dev_data pm_reg_msg;
-	struct iwpm_sa_data pm_msg;
-	int iwpm_err = 0;
+	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
 
 
 	if (cm_id->remote_addr.ss_family != AF_INET)
 	if (cm_id->remote_addr.ss_family != AF_INET)
 		return -ENOSYS;
 		return -ENOSYS;
@@ -3558,37 +3402,13 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	cm_info.cm_id = cm_id;
 	cm_info.cm_id = cm_id;
 	cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
 	cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
 
 
-	/* No port mapper available, go with the specified peer information */
-	cm_info.mapped_loc_addr = cm_info.loc_addr;
-	cm_info.mapped_loc_port = cm_info.loc_port;
-	cm_info.mapped_rem_addr = cm_info.rem_addr;
-	cm_info.mapped_rem_port = cm_info.rem_port;
-
-	nes_form_reg_msg(nesvnic, &pm_reg_msg);
-	iwpm_err = iwpm_register_pid(&pm_reg_msg, RDMA_NL_NES);
-	if (iwpm_err) {
-		nes_debug(NES_DBG_NLMSG,
-			"Port Mapper reg pid fail (err = %d).\n", iwpm_err);
-	}
-	if (iwpm_valid_pid() && !iwpm_err) {
-		nes_form_pm_msg(&cm_info, &pm_msg);
-		iwpm_err = iwpm_add_and_query_mapping(&pm_msg, RDMA_NL_NES);
-		if (iwpm_err)
-			nes_debug(NES_DBG_NLMSG,
-			"Port Mapper query fail (err = %d).\n", iwpm_err);
-		else
-			nes_record_pm_msg(&cm_info, &pm_msg);
-	}
-
 	if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
 	if (laddr->sin_addr.s_addr != raddr->sin_addr.s_addr) {
-		nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
-			PCI_FUNC(nesdev->pcidev->devfn), NES_MANAGE_APBVT_ADD);
+		nes_manage_apbvt(nesvnic, cm_info.loc_port,
+				 PCI_FUNC(nesdev->pcidev->devfn),
+				 NES_MANAGE_APBVT_ADD);
 		apbvt_set = 1;
 		apbvt_set = 1;
 	}
 	}
 
 
-	if (nes_create_mapinfo(&cm_info))
-		return -ENOMEM;
-
 	cm_id->add_ref(cm_id);
 	cm_id->add_ref(cm_id);
 
 
 	/* create a connect CM node connection */
 	/* create a connect CM node connection */
@@ -3597,14 +3417,12 @@ int nes_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 					  &cm_info);
 					  &cm_info);
 	if (!cm_node) {
 	if (!cm_node) {
 		if (apbvt_set)
 		if (apbvt_set)
-			nes_manage_apbvt(nesvnic, cm_info.mapped_loc_port,
+			nes_manage_apbvt(nesvnic, cm_info.loc_port,
 					 PCI_FUNC(nesdev->pcidev->devfn),
 					 PCI_FUNC(nesdev->pcidev->devfn),
 					 NES_MANAGE_APBVT_DEL);
 					 NES_MANAGE_APBVT_DEL);
 
 
-		nes_debug(NES_DBG_NLMSG, "Delete mapped_loc_port = %04X\n",
-				cm_info.mapped_loc_port);
-		nes_remove_mapinfo(cm_info.loc_addr, cm_info.loc_port,
-			cm_info.mapped_loc_addr, cm_info.mapped_loc_port);
+		nes_debug(NES_DBG_NLMSG, "Delete loc_port = %04X\n",
+			  cm_info.loc_port);
 		cm_id->rem_ref(cm_id);
 		cm_id->rem_ref(cm_id);
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
@@ -3633,12 +3451,12 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
 	struct nes_cm_listener *cm_node;
 	struct nes_cm_listener *cm_node;
 	struct nes_cm_info cm_info;
 	struct nes_cm_info cm_info;
 	int err;
 	int err;
-	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->local_addr;
+	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
 
 
 	nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
 	nes_debug(NES_DBG_CM, "cm_id = %p, local port = 0x%04X.\n",
 		  cm_id, ntohs(laddr->sin_port));
 		  cm_id, ntohs(laddr->sin_port));
 
 
-	if (cm_id->local_addr.ss_family != AF_INET)
+	if (cm_id->m_local_addr.ss_family != AF_INET)
 		return -ENOSYS;
 		return -ENOSYS;
 	nesvnic = to_nesvnic(cm_id->device);
 	nesvnic = to_nesvnic(cm_id->device);
 	if (!nesvnic)
 	if (!nesvnic)
@@ -3658,10 +3476,6 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
 
 
 	cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
 	cm_info.conn_type = NES_CM_IWARP_CONN_TYPE;
 
 
-	/* No port mapper available, go with the specified info */
-	cm_info.mapped_loc_addr = cm_info.loc_addr;
-	cm_info.mapped_loc_port = cm_info.loc_port;
-
 	cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
 	cm_node = g_cm_core->api->listen(g_cm_core, nesvnic, &cm_info);
 	if (!cm_node) {
 	if (!cm_node) {
 		printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
 		printk(KERN_ERR "%s[%u] Error returned from listen API call\n",
@@ -3673,10 +3487,7 @@ int nes_create_listen(struct iw_cm_id *cm_id, int backlog)
 	cm_node->tos = cm_id->tos;
 	cm_node->tos = cm_id->tos;
 
 
 	if (!cm_node->reused_node) {
 	if (!cm_node->reused_node) {
-		if (nes_create_mapinfo(&cm_info))
-			return -ENOMEM;
-
-		err = nes_manage_apbvt(nesvnic, cm_node->mapped_loc_port,
+		err = nes_manage_apbvt(nesvnic, cm_node->loc_port,
 				       PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
 				       PCI_FUNC(nesvnic->nesdev->pcidev->devfn),
 				       NES_MANAGE_APBVT_ADD);
 				       NES_MANAGE_APBVT_ADD);
 		if (err) {
 		if (err) {
@@ -3786,8 +3597,8 @@ static void cm_event_connected(struct nes_cm_event *event)
 	nesvnic = to_nesvnic(nesqp->ibqp.device);
 	nesvnic = to_nesvnic(nesqp->ibqp.device);
 	nesdev = nesvnic->nesdev;
 	nesdev = nesvnic->nesdev;
 	nesadapter = nesdev->nesadapter;
 	nesadapter = nesdev->nesadapter;
-	laddr = (struct sockaddr_in *)&cm_id->local_addr;
-	raddr = (struct sockaddr_in *)&cm_id->remote_addr;
+	laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
+	raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
 	cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr;
 	cm_event_laddr = (struct sockaddr_in *)&cm_event.local_addr;
 
 
 	if (nesqp->destroyed)
 	if (nesqp->destroyed)
@@ -3802,10 +3613,10 @@ static void cm_event_connected(struct nes_cm_event *event)
 
 
 	/* set the QP tsa context */
 	/* set the QP tsa context */
 	nesqp->nesqp_context->tcpPorts[0] =
 	nesqp->nesqp_context->tcpPorts[0] =
-			cpu_to_le16(cm_node->mapped_loc_port);
+			cpu_to_le16(cm_node->loc_port);
 	nesqp->nesqp_context->tcpPorts[1] =
 	nesqp->nesqp_context->tcpPorts[1] =
-			cpu_to_le16(cm_node->mapped_rem_port);
-	nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->mapped_rem_addr);
+			cpu_to_le16(cm_node->rem_port);
+	nesqp->nesqp_context->ip0 = cpu_to_le32(cm_node->rem_addr);
 
 
 	nesqp->nesqp_context->misc2 |= cpu_to_le32(
 	nesqp->nesqp_context->misc2 |= cpu_to_le32(
 			(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
 			(u32)PCI_FUNC(nesdev->pcidev->devfn) <<
@@ -3835,9 +3646,9 @@ static void cm_event_connected(struct nes_cm_event *event)
 
 
 	nes_quad.DstIpAdrIndex =
 	nes_quad.DstIpAdrIndex =
 		cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
 		cpu_to_le32((u32)PCI_FUNC(nesdev->pcidev->devfn) << 24);
-	nes_quad.SrcIpadr = htonl(cm_node->mapped_rem_addr);
-	nes_quad.TcpPorts[0] = htons(cm_node->mapped_rem_port);
-	nes_quad.TcpPorts[1] = htons(cm_node->mapped_loc_port);
+	nes_quad.SrcIpadr = htonl(cm_node->rem_addr);
+	nes_quad.TcpPorts[0] = htons(cm_node->rem_port);
+	nes_quad.TcpPorts[1] = htons(cm_node->loc_port);
 
 
 	/* Produce hash key */
 	/* Produce hash key */
 	crc_value = get_crc_value(&nes_quad);
 	crc_value = get_crc_value(&nes_quad);
@@ -3858,14 +3669,14 @@ static void cm_event_connected(struct nes_cm_event *event)
 	cm_event.provider_data = cm_id->provider_data;
 	cm_event.provider_data = cm_id->provider_data;
 	cm_event_laddr->sin_family = AF_INET;
 	cm_event_laddr->sin_family = AF_INET;
 	cm_event_laddr->sin_port = laddr->sin_port;
 	cm_event_laddr->sin_port = laddr->sin_port;
-	cm_event.remote_addr = cm_id->remote_addr;
+	cm_event.remote_addr = cm_id->m_remote_addr;
 
 
 	cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
 	cm_event.private_data = (void *)event->cm_node->mpa_frame_buf;
 	cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size;
 	cm_event.private_data_len = (u8)event->cm_node->mpa_frame_size;
 	cm_event.ird = cm_node->ird_size;
 	cm_event.ird = cm_node->ird_size;
 	cm_event.ord = cm_node->ord_size;
 	cm_event.ord = cm_node->ord_size;
 
 
-	cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.rem_addr);
+	cm_event_laddr->sin_addr.s_addr = htonl(event->cm_info.loc_addr);
 	ret = cm_id->event_handler(cm_id, &cm_event);
 	ret = cm_id->event_handler(cm_id, &cm_event);
 	nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
 	nes_debug(NES_DBG_CM, "OFA CM event_handler returned, ret=%d\n", ret);
 
 
@@ -3913,8 +3724,8 @@ static void cm_event_connect_error(struct nes_cm_event *event)
 	cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
 	cm_event.event = IW_CM_EVENT_CONNECT_REPLY;
 	cm_event.status = -ECONNRESET;
 	cm_event.status = -ECONNRESET;
 	cm_event.provider_data = cm_id->provider_data;
 	cm_event.provider_data = cm_id->provider_data;
-	cm_event.local_addr = cm_id->local_addr;
-	cm_event.remote_addr = cm_id->remote_addr;
+	cm_event.local_addr = cm_id->m_local_addr;
+	cm_event.remote_addr = cm_id->m_remote_addr;
 	cm_event.private_data = NULL;
 	cm_event.private_data = NULL;
 	cm_event.private_data_len = 0;
 	cm_event.private_data_len = 0;
 
 
@@ -3970,8 +3781,8 @@ static void cm_event_reset(struct nes_cm_event *event)
 	cm_event.event = IW_CM_EVENT_DISCONNECT;
 	cm_event.event = IW_CM_EVENT_DISCONNECT;
 	cm_event.status = -ECONNRESET;
 	cm_event.status = -ECONNRESET;
 	cm_event.provider_data = cm_id->provider_data;
 	cm_event.provider_data = cm_id->provider_data;
-	cm_event.local_addr = cm_id->local_addr;
-	cm_event.remote_addr = cm_id->remote_addr;
+	cm_event.local_addr = cm_id->m_local_addr;
+	cm_event.remote_addr = cm_id->m_remote_addr;
 	cm_event.private_data = NULL;
 	cm_event.private_data = NULL;
 	cm_event.private_data_len = 0;
 	cm_event.private_data_len = 0;
 
 
@@ -3981,8 +3792,8 @@ static void cm_event_reset(struct nes_cm_event *event)
 	cm_event.event = IW_CM_EVENT_CLOSE;
 	cm_event.event = IW_CM_EVENT_CLOSE;
 	cm_event.status = 0;
 	cm_event.status = 0;
 	cm_event.provider_data = cm_id->provider_data;
 	cm_event.provider_data = cm_id->provider_data;
-	cm_event.local_addr = cm_id->local_addr;
-	cm_event.remote_addr = cm_id->remote_addr;
+	cm_event.local_addr = cm_id->m_local_addr;
+	cm_event.remote_addr = cm_id->m_remote_addr;
 	cm_event.private_data = NULL;
 	cm_event.private_data = NULL;
 	cm_event.private_data_len = 0;
 	cm_event.private_data_len = 0;
 	nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node);
 	nes_debug(NES_DBG_CM, "NODE %p Generating CLOSE\n", event->cm_node);

+ 2 - 9
drivers/infiniband/hw/nes/nes_cm.h

@@ -293,8 +293,8 @@ struct nes_cm_listener {
 	struct list_head           list;
 	struct list_head           list;
 	struct nes_cm_core         *cm_core;
 	struct nes_cm_core         *cm_core;
 	u8                         loc_mac[ETH_ALEN];
 	u8                         loc_mac[ETH_ALEN];
-	nes_addr_t                 loc_addr, mapped_loc_addr;
-	u16                        loc_port, mapped_loc_port;
+	nes_addr_t                 loc_addr;
+	u16                        loc_port;
 	struct iw_cm_id            *cm_id;
 	struct iw_cm_id            *cm_id;
 	enum nes_cm_conn_type      conn_type;
 	enum nes_cm_conn_type      conn_type;
 	atomic_t                   ref_count;
 	atomic_t                   ref_count;
@@ -309,9 +309,7 @@ struct nes_cm_listener {
 /* per connection node and node state information */
 /* per connection node and node state information */
 struct nes_cm_node {
 struct nes_cm_node {
 	nes_addr_t                loc_addr, rem_addr;
 	nes_addr_t                loc_addr, rem_addr;
-	nes_addr_t                mapped_loc_addr, mapped_rem_addr;
 	u16                       loc_port, rem_port;
 	u16                       loc_port, rem_port;
-	u16                       mapped_loc_port, mapped_rem_port;
 
 
 	u8                        loc_mac[ETH_ALEN];
 	u8                        loc_mac[ETH_ALEN];
 	u8                        rem_mac[ETH_ALEN];
 	u8                        rem_mac[ETH_ALEN];
@@ -368,11 +366,6 @@ struct nes_cm_info {
 	u16 rem_port;
 	u16 rem_port;
 	nes_addr_t loc_addr;
 	nes_addr_t loc_addr;
 	nes_addr_t rem_addr;
 	nes_addr_t rem_addr;
-	u16 mapped_loc_port;
-	u16 mapped_rem_port;
-	nes_addr_t mapped_loc_addr;
-	nes_addr_t mapped_rem_addr;
-
 	enum nes_cm_conn_type  conn_type;
 	enum nes_cm_conn_type  conn_type;
 	int backlog;
 	int backlog;
 };
 };

+ 1 - 43
drivers/infiniband/hw/nes/nes_hw.c

@@ -35,18 +35,11 @@
 #include <linux/moduleparam.h>
 #include <linux/moduleparam.h>
 #include <linux/netdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/etherdevice.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
 #include <linux/if_vlan.h>
 #include <linux/if_vlan.h>
-#include <linux/inet_lro.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 
 
 #include "nes.h"
 #include "nes.h"
 
 
-static unsigned int nes_lro_max_aggr = NES_LRO_MAX_AGGR;
-module_param(nes_lro_max_aggr, uint, 0444);
-MODULE_PARM_DESC(nes_lro_max_aggr, "NIC LRO max packet aggregation");
-
 static int wide_ppm_offset;
 static int wide_ppm_offset;
 module_param(wide_ppm_offset, int, 0644);
 module_param(wide_ppm_offset, int, 0644);
 MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm");
 MODULE_PARM_DESC(wide_ppm_offset, "Increase CX4 interface clock ppm offset, 0=100ppm (default), 1=300ppm");
@@ -1642,25 +1635,6 @@ static void nes_rq_wqes_timeout(unsigned long parm)
 }
 }
 
 
 
 
-static int nes_lro_get_skb_hdr(struct sk_buff *skb, void **iphdr,
-			       void **tcph, u64 *hdr_flags, void *priv)
-{
-	unsigned int ip_len;
-	struct iphdr *iph;
-	skb_reset_network_header(skb);
-	iph = ip_hdr(skb);
-	if (iph->protocol != IPPROTO_TCP)
-		return -1;
-	ip_len = ip_hdrlen(skb);
-	skb_set_transport_header(skb, ip_len);
-	*tcph = tcp_hdr(skb);
-
-	*hdr_flags = LRO_IPV4 | LRO_TCP;
-	*iphdr = iph;
-	return 0;
-}
-
-
 /**
 /**
  * nes_init_nic_qp
  * nes_init_nic_qp
  */
  */
@@ -1895,14 +1869,6 @@ int nes_init_nic_qp(struct nes_device *nesdev, struct net_device *netdev)
 		return -ENOMEM;
 		return -ENOMEM;
 	}
 	}
 
 
-	nesvnic->lro_mgr.max_aggr       = nes_lro_max_aggr;
-	nesvnic->lro_mgr.max_desc       = NES_MAX_LRO_DESCRIPTORS;
-	nesvnic->lro_mgr.lro_arr        = nesvnic->lro_desc;
-	nesvnic->lro_mgr.get_skb_header = nes_lro_get_skb_hdr;
-	nesvnic->lro_mgr.features       = LRO_F_NAPI | LRO_F_EXTRACT_VLAN_ID;
-	nesvnic->lro_mgr.dev            = netdev;
-	nesvnic->lro_mgr.ip_summed      = CHECKSUM_UNNECESSARY;
-	nesvnic->lro_mgr.ip_summed_aggr = CHECKSUM_UNNECESSARY;
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2809,13 +2775,10 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 	u16 pkt_type;
 	u16 pkt_type;
 	u16 rqes_processed = 0;
 	u16 rqes_processed = 0;
 	u8 sq_cqes = 0;
 	u8 sq_cqes = 0;
-	u8 nes_use_lro = 0;
 
 
 	head = cq->cq_head;
 	head = cq->cq_head;
 	cq_size = cq->cq_size;
 	cq_size = cq->cq_size;
 	cq->cqes_pending = 1;
 	cq->cqes_pending = 1;
-	if (nesvnic->netdev->features & NETIF_F_LRO)
-		nes_use_lro = 1;
 	do {
 	do {
 		if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
 		if (le32_to_cpu(cq->cq_vbase[head].cqe_words[NES_NIC_CQE_MISC_IDX]) &
 				NES_NIC_CQE_VALID) {
 				NES_NIC_CQE_VALID) {
@@ -2950,10 +2913,7 @@ void nes_nic_ce_handler(struct nes_device *nesdev, struct nes_hw_nic_cq *cq)
 
 
 					__vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag);
 					__vlan_hwaccel_put_tag(rx_skb, htons(ETH_P_8021Q), vlan_tag);
 				}
 				}
-				if (nes_use_lro)
-					lro_receive_skb(&nesvnic->lro_mgr, rx_skb, NULL);
-				else
-					netif_receive_skb(rx_skb);
+				napi_gro_receive(&nesvnic->napi, rx_skb);
 
 
 skip_rx_indicate0:
 skip_rx_indicate0:
 				;
 				;
@@ -2984,8 +2944,6 @@ skip_rx_indicate0:
 
 
 	} while (1);
 	} while (1);
 
 
-	if (nes_use_lro)
-		lro_flush_all(&nesvnic->lro_mgr);
 	if (sq_cqes) {
 	if (sq_cqes) {
 		barrier();
 		barrier();
 		/* restart the queue if it had been stopped */
 		/* restart the queue if it had been stopped */

+ 0 - 7
drivers/infiniband/hw/nes/nes_hw.h

@@ -33,8 +33,6 @@
 #ifndef __NES_HW_H
 #ifndef __NES_HW_H
 #define __NES_HW_H
 #define __NES_HW_H
 
 
-#include <linux/inet_lro.h>
-
 #define NES_PHY_TYPE_CX4       1
 #define NES_PHY_TYPE_CX4       1
 #define NES_PHY_TYPE_1G        2
 #define NES_PHY_TYPE_1G        2
 #define NES_PHY_TYPE_ARGUS     4
 #define NES_PHY_TYPE_ARGUS     4
@@ -1049,8 +1047,6 @@ struct nes_hw_tune_timer {
 #define NES_TIMER_ENABLE_LIMIT      4
 #define NES_TIMER_ENABLE_LIMIT      4
 #define NES_MAX_LINK_INTERRUPTS     128
 #define NES_MAX_LINK_INTERRUPTS     128
 #define NES_MAX_LINK_CHECK          200
 #define NES_MAX_LINK_CHECK          200
-#define NES_MAX_LRO_DESCRIPTORS     32
-#define NES_LRO_MAX_AGGR            64
 
 
 struct nes_adapter {
 struct nes_adapter {
 	u64              fw_ver;
 	u64              fw_ver;
@@ -1263,9 +1259,6 @@ struct nes_vnic {
 	u8  next_qp_nic_index;
 	u8  next_qp_nic_index;
 	u8  of_device_registered;
 	u8  of_device_registered;
 	u8  rdma_enabled;
 	u8  rdma_enabled;
-	u32 lro_max_aggr;
-	struct net_lro_mgr lro_mgr;
-	struct net_lro_desc lro_desc[NES_MAX_LRO_DESCRIPTORS];
 	struct timer_list event_timer;
 	struct timer_list event_timer;
 	enum ib_event_type delayed_event;
 	enum ib_event_type delayed_event;
 	enum ib_event_type last_dispatched_event;
 	enum ib_event_type last_dispatched_event;

+ 0 - 7
drivers/infiniband/hw/nes/nes_nic.c

@@ -1085,9 +1085,6 @@ static const char nes_ethtool_stringset[][ETH_GSTRING_LEN] = {
 	"Free 4Kpbls",
 	"Free 4Kpbls",
 	"Free 256pbls",
 	"Free 256pbls",
 	"Timer Inits",
 	"Timer Inits",
-	"LRO aggregated",
-	"LRO flushed",
-	"LRO no_desc",
 	"PAU CreateQPs",
 	"PAU CreateQPs",
 	"PAU DestroyQPs",
 	"PAU DestroyQPs",
 };
 };
@@ -1302,9 +1299,6 @@ static void nes_netdev_get_ethtool_stats(struct net_device *netdev,
 	target_stat_values[++index] = nesadapter->free_4kpbl;
 	target_stat_values[++index] = nesadapter->free_4kpbl;
 	target_stat_values[++index] = nesadapter->free_256pbl;
 	target_stat_values[++index] = nesadapter->free_256pbl;
 	target_stat_values[++index] = int_mod_timer_init;
 	target_stat_values[++index] = int_mod_timer_init;
-	target_stat_values[++index] = nesvnic->lro_mgr.stats.aggregated;
-	target_stat_values[++index] = nesvnic->lro_mgr.stats.flushed;
-	target_stat_values[++index] = nesvnic->lro_mgr.stats.no_desc;
 	target_stat_values[++index] = atomic_read(&pau_qps_created);
 	target_stat_values[++index] = atomic_read(&pau_qps_created);
 	target_stat_values[++index] = atomic_read(&pau_qps_destroyed);
 	target_stat_values[++index] = atomic_read(&pau_qps_destroyed);
 }
 }
@@ -1709,7 +1703,6 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
 		netdev->hw_features |= NETIF_F_TSO;
 		netdev->hw_features |= NETIF_F_TSO;
 
 
 	netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX;
 	netdev->features = netdev->hw_features | NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_CTAG_TX;
-	netdev->hw_features |= NETIF_F_LRO;
 
 
 	nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
 	nes_debug(NES_DBG_INIT, "nesvnic = %p, reported features = 0x%lX, QPid = %d,"
 			" nic_index = %d, logical_port = %d, mac_index = %d.\n",
 			" nic_index = %d, logical_port = %d, mac_index = %d.\n",

+ 4 - 1
drivers/infiniband/hw/nes/nes_verbs.c

@@ -56,7 +56,8 @@ static int nes_dereg_mr(struct ib_mr *ib_mr);
 /**
 /**
  * nes_alloc_mw
  * nes_alloc_mw
  */
  */
-static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type)
+static struct ib_mw *nes_alloc_mw(struct ib_pd *ibpd, enum ib_mw_type type,
+				  struct ib_udata *udata)
 {
 {
 	struct nes_pd *nespd = to_nespd(ibpd);
 	struct nes_pd *nespd = to_nespd(ibpd);
 	struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
 	struct nes_vnic *nesvnic = to_nesvnic(ibpd->device);
@@ -3768,6 +3769,8 @@ struct nes_ib_device *nes_init_ofa_device(struct net_device *netdev)
 	nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
 	nesibdev->ibdev.iwcm->create_listen = nes_create_listen;
 	nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
 	nesibdev->ibdev.iwcm->destroy_listen = nes_destroy_listen;
 	nesibdev->ibdev.get_port_immutable   = nes_port_immutable;
 	nesibdev->ibdev.get_port_immutable   = nes_port_immutable;
+	memcpy(nesibdev->ibdev.iwcm->ifname, netdev->name,
+	       sizeof(nesibdev->ibdev.iwcm->ifname));
 
 
 	return nesibdev;
 	return nesibdev;
 }
 }

+ 8 - 0
drivers/infiniband/hw/ocrdma/ocrdma.h

@@ -114,6 +114,7 @@ struct ocrdma_dev_attr {
 	u8 local_ca_ack_delay;
 	u8 local_ca_ack_delay;
 	u8 ird;
 	u8 ird;
 	u8 num_ird_pages;
 	u8 num_ird_pages;
+	u8 udp_encap;
 };
 };
 
 
 struct ocrdma_dma_mem {
 struct ocrdma_dma_mem {
@@ -356,6 +357,7 @@ struct ocrdma_ah {
 	struct ocrdma_av *av;
 	struct ocrdma_av *av;
 	u16 sgid_index;
 	u16 sgid_index;
 	u32 id;
 	u32 id;
+	u8 hdr_type;
 };
 };
 
 
 struct ocrdma_qp_hwq_info {
 struct ocrdma_qp_hwq_info {
@@ -598,4 +600,10 @@ static inline u8 ocrdma_get_ae_link_state(u32 ae_state)
 	return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
 	return ((ae_state & OCRDMA_AE_LSC_LS_MASK) >> OCRDMA_AE_LSC_LS_SHIFT);
 }
 }
 
 
+static inline bool ocrdma_is_udp_encap_supported(struct ocrdma_dev *dev)
+{
+	return (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV4) ||
+	       (dev->attr.udp_encap & OCRDMA_L3_TYPE_IPV6);
+}
+
 #endif
 #endif

+ 64 - 13
drivers/infiniband/hw/ocrdma/ocrdma_ah.c

@@ -55,18 +55,46 @@
 
 
 #define OCRDMA_VID_PCP_SHIFT	0xD
 #define OCRDMA_VID_PCP_SHIFT	0xD
 
 
+static u16 ocrdma_hdr_type_to_proto_num(int devid, u8 hdr_type)
+{
+	switch (hdr_type) {
+	case OCRDMA_L3_TYPE_IB_GRH:
+		return (u16)0x8915;
+	case OCRDMA_L3_TYPE_IPV4:
+		return (u16)0x0800;
+	case OCRDMA_L3_TYPE_IPV6:
+		return (u16)0x86dd;
+	default:
+		pr_err("ocrdma%d: Invalid network header\n", devid);
+		return 0;
+	}
+}
+
 static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 			struct ib_ah_attr *attr, union ib_gid *sgid,
 			struct ib_ah_attr *attr, union ib_gid *sgid,
 			int pdid, bool *isvlan, u16 vlan_tag)
 			int pdid, bool *isvlan, u16 vlan_tag)
 {
 {
-	int status = 0;
+	int status;
 	struct ocrdma_eth_vlan eth;
 	struct ocrdma_eth_vlan eth;
 	struct ocrdma_grh grh;
 	struct ocrdma_grh grh;
 	int eth_sz;
 	int eth_sz;
+	u16 proto_num = 0;
+	u8 nxthdr = 0x11;
+	struct iphdr ipv4;
+	union {
+		struct sockaddr     _sockaddr;
+		struct sockaddr_in  _sockaddr_in;
+		struct sockaddr_in6 _sockaddr_in6;
+	} sgid_addr, dgid_addr;
 
 
 	memset(&eth, 0, sizeof(eth));
 	memset(&eth, 0, sizeof(eth));
 	memset(&grh, 0, sizeof(grh));
 	memset(&grh, 0, sizeof(grh));
 
 
+	/* Protocol Number */
+	proto_num = ocrdma_hdr_type_to_proto_num(dev->id, ah->hdr_type);
+	if (!proto_num)
+		return -EINVAL;
+	nxthdr = (proto_num == 0x8915) ? 0x1b : 0x11;
 	/* VLAN */
 	/* VLAN */
 	if (!vlan_tag || (vlan_tag > 0xFFF))
 	if (!vlan_tag || (vlan_tag > 0xFFF))
 		vlan_tag = dev->pvid;
 		vlan_tag = dev->pvid;
@@ -78,13 +106,13 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 				dev->id);
 				dev->id);
 		}
 		}
 		eth.eth_type = cpu_to_be16(0x8100);
 		eth.eth_type = cpu_to_be16(0x8100);
-		eth.roce_eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
+		eth.roce_eth_type = cpu_to_be16(proto_num);
 		vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
 		vlan_tag |= (dev->sl & 0x07) << OCRDMA_VID_PCP_SHIFT;
 		eth.vlan_tag = cpu_to_be16(vlan_tag);
 		eth.vlan_tag = cpu_to_be16(vlan_tag);
 		eth_sz = sizeof(struct ocrdma_eth_vlan);
 		eth_sz = sizeof(struct ocrdma_eth_vlan);
 		*isvlan = true;
 		*isvlan = true;
 	} else {
 	} else {
-		eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
+		eth.eth_type = cpu_to_be16(proto_num);
 		eth_sz = sizeof(struct ocrdma_eth_basic);
 		eth_sz = sizeof(struct ocrdma_eth_basic);
 	}
 	}
 	/* MAC */
 	/* MAC */
@@ -93,18 +121,33 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
 	if (status)
 	if (status)
 		return status;
 		return status;
 	ah->sgid_index = attr->grh.sgid_index;
 	ah->sgid_index = attr->grh.sgid_index;
-	memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
-	memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
-
-	grh.tclass_flow = cpu_to_be32((6 << 28) |
-			(attr->grh.traffic_class << 24) |
-			attr->grh.flow_label);
-	/* 0x1b is next header value in GRH */
-	grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
-			(0x1b << 8) | attr->grh.hop_limit);
 	/* Eth HDR */
 	/* Eth HDR */
 	memcpy(&ah->av->eth_hdr, &eth, eth_sz);
 	memcpy(&ah->av->eth_hdr, &eth, eth_sz);
-	memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
+	if (ah->hdr_type == RDMA_NETWORK_IPV4) {
+		*((__be16 *)&ipv4) = htons((4 << 12) | (5 << 8) |
+					   attr->grh.traffic_class);
+		ipv4.id = cpu_to_be16(pdid);
+		ipv4.frag_off = htons(IP_DF);
+		ipv4.tot_len = htons(0);
+		ipv4.ttl = attr->grh.hop_limit;
+		ipv4.protocol = nxthdr;
+		rdma_gid2ip(&sgid_addr._sockaddr, sgid);
+		ipv4.saddr = sgid_addr._sockaddr_in.sin_addr.s_addr;
+		rdma_gid2ip(&dgid_addr._sockaddr, &attr->grh.dgid);
+		ipv4.daddr = dgid_addr._sockaddr_in.sin_addr.s_addr;
+		memcpy((u8 *)ah->av + eth_sz, &ipv4, sizeof(struct iphdr));
+	} else {
+		memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
+		grh.tclass_flow = cpu_to_be32((6 << 28) |
+					      (attr->grh.traffic_class << 24) |
+					      attr->grh.flow_label);
+		memcpy(&grh.dgid[0], attr->grh.dgid.raw,
+		       sizeof(attr->grh.dgid.raw));
+		grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
+						(nxthdr << 8) |
+						attr->grh.hop_limit);
+		memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
+	}
 	if (*isvlan)
 	if (*isvlan)
 		ah->av->valid |= OCRDMA_AV_VLAN_VALID;
 		ah->av->valid |= OCRDMA_AV_VLAN_VALID;
 	ah->av->valid = cpu_to_le32(ah->av->valid);
 	ah->av->valid = cpu_to_le32(ah->av->valid);
@@ -128,6 +171,7 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
 
 
 	if (atomic_cmpxchg(&dev->update_sl, 1, 0))
 	if (atomic_cmpxchg(&dev->update_sl, 1, 0))
 		ocrdma_init_service_level(dev);
 		ocrdma_init_service_level(dev);
+
 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
 	ah = kzalloc(sizeof(*ah), GFP_ATOMIC);
 	if (!ah)
 	if (!ah)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
@@ -148,6 +192,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
 			vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
 			vlan_tag = vlan_dev_vlan_id(sgid_attr.ndev);
 		dev_put(sgid_attr.ndev);
 		dev_put(sgid_attr.ndev);
 	}
 	}
+	/* Get network header type for this GID */
+	ah->hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
 
 
 	if ((pd->uctx) &&
 	if ((pd->uctx) &&
 	    (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
 	    (!rdma_is_multicast_addr((struct in6_addr *)attr->grh.dgid.raw)) &&
@@ -172,6 +218,11 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
 		ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
 		ahid_addr = pd->uctx->ah_tbl.va + attr->dlid;
 		*ahid_addr = 0;
 		*ahid_addr = 0;
 		*ahid_addr |= ah->id & OCRDMA_AH_ID_MASK;
 		*ahid_addr |= ah->id & OCRDMA_AH_ID_MASK;
+		if (ocrdma_is_udp_encap_supported(dev)) {
+			*ahid_addr |= ((u32)ah->hdr_type &
+				       OCRDMA_AH_L3_TYPE_MASK) <<
+				       OCRDMA_AH_L3_TYPE_SHIFT;
+		}
 		if (isvlan)
 		if (isvlan)
 			*ahid_addr |= (OCRDMA_AH_VLAN_VALID_MASK <<
 			*ahid_addr |= (OCRDMA_AH_VLAN_VALID_MASK <<
 				       OCRDMA_AH_VLAN_VALID_SHIFT);
 				       OCRDMA_AH_VLAN_VALID_SHIFT);

+ 3 - 2
drivers/infiniband/hw/ocrdma/ocrdma_ah.h

@@ -46,9 +46,10 @@
 enum {
 enum {
 	OCRDMA_AH_ID_MASK		= 0x3FF,
 	OCRDMA_AH_ID_MASK		= 0x3FF,
 	OCRDMA_AH_VLAN_VALID_MASK	= 0x01,
 	OCRDMA_AH_VLAN_VALID_MASK	= 0x01,
-	OCRDMA_AH_VLAN_VALID_SHIFT	= 0x1F
+	OCRDMA_AH_VLAN_VALID_SHIFT	= 0x1F,
+	OCRDMA_AH_L3_TYPE_MASK		= 0x03,
+	OCRDMA_AH_L3_TYPE_SHIFT		= 0x1D /* 29 bits */
 };
 };
-
 struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
 struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *);
 int ocrdma_destroy_ah(struct ib_ah *);
 int ocrdma_destroy_ah(struct ib_ah *);
 int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);
 int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *);

+ 27 - 6
drivers/infiniband/hw/ocrdma/ocrdma_hw.c

@@ -1113,7 +1113,7 @@ mbx_err:
 static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe,
 static int ocrdma_nonemb_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe,
 				 void *payload_va)
 				 void *payload_va)
 {
 {
-	int status = 0;
+	int status;
 	struct ocrdma_mbx_rsp *rsp = payload_va;
 	struct ocrdma_mbx_rsp *rsp = payload_va;
 
 
 	if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
 	if ((mqe->hdr.spcl_sge_cnt_emb & OCRDMA_MQE_HDR_EMB_MASK) >>
@@ -1144,6 +1144,9 @@ static void ocrdma_get_attr(struct ocrdma_dev *dev,
 	attr->max_pd =
 	attr->max_pd =
 	    (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >>
 	    (rsp->max_pd_ca_ack_delay & OCRDMA_MBX_QUERY_CFG_MAX_PD_MASK) >>
 	    OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT;
 	    OCRDMA_MBX_QUERY_CFG_MAX_PD_SHIFT;
+	attr->udp_encap = (rsp->max_pd_ca_ack_delay &
+			   OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK) >>
+			   OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT;
 	attr->max_dpp_pds =
 	attr->max_dpp_pds =
 	   (rsp->max_dpp_pds_credits & OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK) >>
 	   (rsp->max_dpp_pds_credits & OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_MASK) >>
 	    OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET;
 	    OCRDMA_MBX_QUERY_CFG_MAX_DPP_PDS_OFFSET;
@@ -2138,7 +2141,6 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
 			   enum ib_qp_state *old_ib_state)
 			   enum ib_qp_state *old_ib_state)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
-	int status = 0;
 	enum ocrdma_qp_state new_state;
 	enum ocrdma_qp_state new_state;
 	new_state = get_ocrdma_qp_state(new_ib_state);
 	new_state = get_ocrdma_qp_state(new_ib_state);
 
 
@@ -2163,7 +2165,7 @@ int ocrdma_qp_state_change(struct ocrdma_qp *qp, enum ib_qp_state new_ib_state,
 	qp->state = new_state;
 	qp->state = new_state;
 
 
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 	spin_unlock_irqrestore(&qp->q_lock, flags);
-	return status;
+	return 0;
 }
 }
 
 
 static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp)
 static u32 ocrdma_set_create_qp_mbx_access_flags(struct ocrdma_qp *qp)
@@ -2501,7 +2503,12 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 	union ib_gid sgid, zgid;
 	union ib_gid sgid, zgid;
 	struct ib_gid_attr sgid_attr;
 	struct ib_gid_attr sgid_attr;
 	u32 vlan_id = 0xFFFF;
 	u32 vlan_id = 0xFFFF;
-	u8 mac_addr[6];
+	u8 mac_addr[6], hdr_type;
+	union {
+		struct sockaddr     _sockaddr;
+		struct sockaddr_in  _sockaddr_in;
+		struct sockaddr_in6 _sockaddr_in6;
+	} sgid_addr, dgid_addr;
 	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
 	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
 
 
 	if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
 	if ((ah_attr->ah_flags & IB_AH_GRH) == 0)
@@ -2516,6 +2523,8 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 	cmd->params.hop_lmt_rq_psn |=
 	cmd->params.hop_lmt_rq_psn |=
 	    (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
 	    (ah_attr->grh.hop_limit << OCRDMA_QP_PARAMS_HOP_LMT_SHIFT);
 	cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
 	cmd->flags |= OCRDMA_QP_PARA_FLOW_LBL_VALID;
+
+	/* GIDs */
 	memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
 	memcpy(&cmd->params.dgid[0], &ah_attr->grh.dgid.raw[0],
 	       sizeof(cmd->params.dgid));
 	       sizeof(cmd->params.dgid));
 
 
@@ -2538,6 +2547,16 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 		return status;
 		return status;
 	cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
 	cmd->params.dmac_b0_to_b3 = mac_addr[0] | (mac_addr[1] << 8) |
 				(mac_addr[2] << 16) | (mac_addr[3] << 24);
 				(mac_addr[2] << 16) | (mac_addr[3] << 24);
+
+	hdr_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+	if (hdr_type == RDMA_NETWORK_IPV4) {
+		rdma_gid2ip(&sgid_addr._sockaddr, &sgid);
+		rdma_gid2ip(&dgid_addr._sockaddr, &ah_attr->grh.dgid);
+		memcpy(&cmd->params.dgid[0],
+		       &dgid_addr._sockaddr_in.sin_addr.s_addr, 4);
+		memcpy(&cmd->params.sgid[0],
+		       &sgid_addr._sockaddr_in.sin_addr.s_addr, 4);
+	}
 	/* convert them to LE format. */
 	/* convert them to LE format. */
 	ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
 	ocrdma_cpu_to_le32(&cmd->params.dgid[0], sizeof(cmd->params.dgid));
 	ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
 	ocrdma_cpu_to_le32(&cmd->params.sgid[0], sizeof(cmd->params.sgid));
@@ -2558,7 +2577,9 @@ static int ocrdma_set_av_params(struct ocrdma_qp *qp,
 		cmd->params.rnt_rc_sl_fl |=
 		cmd->params.rnt_rc_sl_fl |=
 			(dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
 			(dev->sl & 0x07) << OCRDMA_QP_PARAMS_SL_SHIFT;
 	}
 	}
-
+	cmd->params.max_sge_recv_flags |= ((hdr_type <<
+					OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT) &
+					OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -2871,7 +2892,7 @@ int ocrdma_mbx_destroy_srq(struct ocrdma_dev *dev, struct ocrdma_srq *srq)
 static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
 static int ocrdma_mbx_get_dcbx_config(struct ocrdma_dev *dev, u32 ptype,
 				      struct ocrdma_dcbx_cfg *dcbxcfg)
 				      struct ocrdma_dcbx_cfg *dcbxcfg)
 {
 {
-	int status = 0;
+	int status;
 	dma_addr_t pa;
 	dma_addr_t pa;
 	struct ocrdma_mqe cmd;
 	struct ocrdma_mqe cmd;
 
 

+ 4 - 0
drivers/infiniband/hw/ocrdma/ocrdma_main.c

@@ -89,8 +89,10 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
 			         struct ib_port_immutable *immutable)
 			         struct ib_port_immutable *immutable)
 {
 {
 	struct ib_port_attr attr;
 	struct ib_port_attr attr;
+	struct ocrdma_dev *dev;
 	int err;
 	int err;
 
 
+	dev = get_ocrdma_dev(ibdev);
 	err = ocrdma_query_port(ibdev, port_num, &attr);
 	err = ocrdma_query_port(ibdev, port_num, &attr);
 	if (err)
 	if (err)
 		return err;
 		return err;
@@ -98,6 +100,8 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->pkey_tbl_len = attr.pkey_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
 	immutable->gid_tbl_len = attr.gid_tbl_len;
 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
 	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+	if (ocrdma_is_udp_encap_supported(dev))
+		immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
 
 	return 0;
 	return 0;

+ 13 - 3
drivers/infiniband/hw/ocrdma/ocrdma_sli.h

@@ -140,7 +140,11 @@ enum {
 	OCRDMA_DB_RQ_SHIFT		= 24
 	OCRDMA_DB_RQ_SHIFT		= 24
 };
 };
 
 
-#define OCRDMA_ROUDP_FLAGS_SHIFT	0x03
+enum {
+	OCRDMA_L3_TYPE_IB_GRH   = 0x00,
+	OCRDMA_L3_TYPE_IPV4     = 0x01,
+	OCRDMA_L3_TYPE_IPV6     = 0x02
+};
 
 
 #define OCRDMA_DB_CQ_RING_ID_MASK       0x3FF	/* bits 0 - 9 */
 #define OCRDMA_DB_CQ_RING_ID_MASK       0x3FF	/* bits 0 - 9 */
 #define OCRDMA_DB_CQ_RING_ID_EXT_MASK  0x0C00	/* bits 10-11 of qid at 12-11 */
 #define OCRDMA_DB_CQ_RING_ID_EXT_MASK  0x0C00	/* bits 10-11 of qid at 12-11 */
@@ -546,7 +550,8 @@ enum {
 	OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT		= 8,
 	OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT		= 8,
 	OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK		= 0xFF <<
 	OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_MASK		= 0xFF <<
 				OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT,
 				OCRDMA_MBX_QUERY_CFG_CA_ACK_DELAY_SHIFT,
-
+	OCRDMA_MBX_QUERY_CFG_L3_TYPE_SHIFT		= 3,
+	OCRDMA_MBX_QUERY_CFG_L3_TYPE_MASK		= 0x18,
 	OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT		= 0,
 	OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_SHIFT		= 0,
 	OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK		= 0xFFFF,
 	OCRDMA_MBX_QUERY_CFG_MAX_SEND_SGE_MASK		= 0xFFFF,
 	OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT	= 16,
 	OCRDMA_MBX_QUERY_CFG_MAX_WRITE_SGE_SHIFT	= 16,
@@ -1107,6 +1112,8 @@ enum {
 	OCRDMA_QP_PARAMS_STATE_MASK		= BIT(5) | BIT(6) | BIT(7),
 	OCRDMA_QP_PARAMS_STATE_MASK		= BIT(5) | BIT(6) | BIT(7),
 	OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC	= BIT(8),
 	OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC	= BIT(8),
 	OCRDMA_QP_PARAMS_FLAGS_INB_ATEN		= BIT(9),
 	OCRDMA_QP_PARAMS_FLAGS_INB_ATEN		= BIT(9),
+	OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_SHIFT    = 11,
+	OCRDMA_QP_PARAMS_FLAGS_L3_TYPE_MASK     = BIT(11) | BIT(12) | BIT(13),
 	OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT	= 16,
 	OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT	= 16,
 	OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK	= 0xFFFF <<
 	OCRDMA_QP_PARAMS_MAX_SGE_RECV_MASK	= 0xFFFF <<
 					OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
 					OCRDMA_QP_PARAMS_MAX_SGE_RECV_SHIFT,
@@ -1735,8 +1742,11 @@ enum {
 
 
 	/* w1 */
 	/* w1 */
 	OCRDMA_CQE_UD_XFER_LEN_SHIFT	= 16,
 	OCRDMA_CQE_UD_XFER_LEN_SHIFT	= 16,
+	OCRDMA_CQE_UD_XFER_LEN_MASK     = 0x1FFF,
 	OCRDMA_CQE_PKEY_SHIFT		= 0,
 	OCRDMA_CQE_PKEY_SHIFT		= 0,
 	OCRDMA_CQE_PKEY_MASK		= 0xFFFF,
 	OCRDMA_CQE_PKEY_MASK		= 0xFFFF,
+	OCRDMA_CQE_UD_L3TYPE_SHIFT      = 29,
+	OCRDMA_CQE_UD_L3TYPE_MASK       = 0x07,
 
 
 	/* w2 */
 	/* w2 */
 	OCRDMA_CQE_QPN_SHIFT		= 0,
 	OCRDMA_CQE_QPN_SHIFT		= 0,
@@ -1861,7 +1871,7 @@ struct ocrdma_ewqe_ud_hdr {
 	u32 rsvd_dest_qpn;
 	u32 rsvd_dest_qpn;
 	u32 qkey;
 	u32 qkey;
 	u32 rsvd_ahid;
 	u32 rsvd_ahid;
-	u32 rsvd;
+	u32 hdr_type;
 };
 };
 
 
 /* extended wqe followed by hdr_wqe for Fast Memory register */
 /* extended wqe followed by hdr_wqe for Fast Memory register */

+ 2 - 2
drivers/infiniband/hw/ocrdma/ocrdma_stats.c

@@ -610,7 +610,7 @@ static char *ocrdma_driver_dbg_stats(struct ocrdma_dev *dev)
 static void ocrdma_update_stats(struct ocrdma_dev *dev)
 static void ocrdma_update_stats(struct ocrdma_dev *dev)
 {
 {
 	ulong now = jiffies, secs;
 	ulong now = jiffies, secs;
-	int status = 0;
+	int status;
 	struct ocrdma_rdma_stats_resp *rdma_stats =
 	struct ocrdma_rdma_stats_resp *rdma_stats =
 		      (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
 		      (struct ocrdma_rdma_stats_resp *)dev->stats_mem.va;
 	struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats;
 	struct ocrdma_rsrc_stats *rsrc_stats = &rdma_stats->act_rsrc_stats;
@@ -641,7 +641,7 @@ static ssize_t ocrdma_dbgfs_ops_write(struct file *filp,
 {
 {
 	char tmp_str[32];
 	char tmp_str[32];
 	long reset;
 	long reset;
-	int status = 0;
+	int status;
 	struct ocrdma_stats *pstats = filp->private_data;
 	struct ocrdma_stats *pstats = filp->private_data;
 	struct ocrdma_dev *dev = pstats->dev;
 	struct ocrdma_dev *dev = pstats->dev;
 
 

+ 26 - 12
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

@@ -419,7 +419,7 @@ static struct ocrdma_pd *_ocrdma_alloc_pd(struct ocrdma_dev *dev,
 					  struct ib_udata *udata)
 					  struct ib_udata *udata)
 {
 {
 	struct ocrdma_pd *pd = NULL;
 	struct ocrdma_pd *pd = NULL;
-	int status = 0;
+	int status;
 
 
 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
 	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
 	if (!pd)
 	if (!pd)
@@ -468,7 +468,7 @@ static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
 static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
 static int _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
 			      struct ocrdma_pd *pd)
 			      struct ocrdma_pd *pd)
 {
 {
-	int status = 0;
+	int status;
 
 
 	if (dev->pd_mgr->pd_prealloc_valid)
 	if (dev->pd_mgr->pd_prealloc_valid)
 		status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
 		status = ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
@@ -596,7 +596,7 @@ map_err:
 
 
 int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
 int ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
 {
 {
-	int status = 0;
+	int status;
 	struct ocrdma_mm *mm, *tmp;
 	struct ocrdma_mm *mm, *tmp;
 	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
 	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
@@ -623,7 +623,7 @@ int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
 	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
 	u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
 	u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
 	unsigned long len = (vma->vm_end - vma->vm_start);
 	unsigned long len = (vma->vm_end - vma->vm_start);
-	int status = 0;
+	int status;
 	bool found;
 	bool found;
 
 
 	if (vma->vm_start & (PAGE_SIZE - 1))
 	if (vma->vm_start & (PAGE_SIZE - 1))
@@ -1285,7 +1285,7 @@ static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
 				struct ib_udata *udata, int dpp_offset,
 				struct ib_udata *udata, int dpp_offset,
 				int dpp_credit_lmt, int srq)
 				int dpp_credit_lmt, int srq)
 {
 {
-	int status = 0;
+	int status;
 	u64 usr_db;
 	u64 usr_db;
 	struct ocrdma_create_qp_uresp uresp;
 	struct ocrdma_create_qp_uresp uresp;
 	struct ocrdma_pd *pd = qp->pd;
 	struct ocrdma_pd *pd = qp->pd;
@@ -1494,9 +1494,7 @@ int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	 */
 	 */
 	if (status < 0)
 	if (status < 0)
 		return status;
 		return status;
-	status = ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
-
-	return status;
+	return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
 }
 }
 
 
 int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -1949,7 +1947,7 @@ int ocrdma_modify_srq(struct ib_srq *ibsrq,
 		      enum ib_srq_attr_mask srq_attr_mask,
 		      enum ib_srq_attr_mask srq_attr_mask,
 		      struct ib_udata *udata)
 		      struct ib_udata *udata)
 {
 {
-	int status = 0;
+	int status;
 	struct ocrdma_srq *srq;
 	struct ocrdma_srq *srq;
 
 
 	srq = get_ocrdma_srq(ibsrq);
 	srq = get_ocrdma_srq(ibsrq);
@@ -2005,6 +2003,7 @@ static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
 	else
 	else
 		ud_hdr->qkey = ud_wr(wr)->remote_qkey;
 		ud_hdr->qkey = ud_wr(wr)->remote_qkey;
 	ud_hdr->rsvd_ahid = ah->id;
 	ud_hdr->rsvd_ahid = ah->id;
+	ud_hdr->hdr_type = ah->hdr_type;
 	if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
 	if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
 		hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
 		hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
 }
 }
@@ -2717,9 +2716,11 @@ static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
 	return expand;
 	return expand;
 }
 }
 
 
-static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
+static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc,
+				 struct ocrdma_cqe *cqe)
 {
 {
 	int status;
 	int status;
+	u16 hdr_type = 0;
 
 
 	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
 	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
 		OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
 		OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
@@ -2728,7 +2729,17 @@ static int ocrdma_update_ud_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe)
 	ibwc->pkey_index = 0;
 	ibwc->pkey_index = 0;
 	ibwc->wc_flags = IB_WC_GRH;
 	ibwc->wc_flags = IB_WC_GRH;
 	ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
 	ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
-					OCRDMA_CQE_UD_XFER_LEN_SHIFT);
+			  OCRDMA_CQE_UD_XFER_LEN_SHIFT) &
+			  OCRDMA_CQE_UD_XFER_LEN_MASK;
+
+	if (ocrdma_is_udp_encap_supported(dev)) {
+		hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
+			    OCRDMA_CQE_UD_L3TYPE_SHIFT) &
+			    OCRDMA_CQE_UD_L3TYPE_MASK;
+		ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
+		ibwc->network_hdr_type = hdr_type;
+	}
+
 	return status;
 	return status;
 }
 }
 
 
@@ -2791,12 +2802,15 @@ static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
 static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
 static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
 				     struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
 				     struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
 {
 {
+	struct ocrdma_dev *dev;
+
+	dev = get_ocrdma_dev(qp->ibqp.device);
 	ibwc->opcode = IB_WC_RECV;
 	ibwc->opcode = IB_WC_RECV;
 	ibwc->qp = &qp->ibqp;
 	ibwc->qp = &qp->ibqp;
 	ibwc->status = IB_WC_SUCCESS;
 	ibwc->status = IB_WC_SUCCESS;
 
 
 	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
 	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
-		ocrdma_update_ud_rcqe(ibwc, cqe);
+		ocrdma_update_ud_rcqe(dev, ibwc, cqe);
 	else
 	else
 		ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
 		ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
 
 

+ 1 - 1
drivers/infiniband/hw/qib/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_QIB
 config INFINIBAND_QIB
 	tristate "Intel PCIe HCA support"
 	tristate "Intel PCIe HCA support"
-	depends on 64BIT
+	depends on 64BIT && INFINIBAND_RDMAVT
 	---help---
 	---help---
 	This is a low-level driver for Intel PCIe QLE InfiniBand host
 	This is a low-level driver for Intel PCIe QLE InfiniBand host
 	channel adapters.  This driver does not support the Intel
 	channel adapters.  This driver does not support the Intel

+ 5 - 5
drivers/infiniband/hw/qib/Makefile

@@ -1,11 +1,11 @@
 obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o
 obj-$(CONFIG_INFINIBAND_QIB) += ib_qib.o
 
 
-ib_qib-y := qib_cq.o qib_diag.o qib_dma.o qib_driver.o qib_eeprom.o \
-	qib_file_ops.o qib_fs.o qib_init.o qib_intr.o qib_keys.o \
-	qib_mad.o qib_mmap.o qib_mr.o qib_pcie.o qib_pio_copy.o \
-	qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o qib_srq.o \
+ib_qib-y := qib_diag.o qib_driver.o qib_eeprom.o \
+	qib_file_ops.o qib_fs.o qib_init.o qib_intr.o \
+	qib_mad.o qib_pcie.o qib_pio_copy.o \
+	qib_qp.o qib_qsfp.o qib_rc.o qib_ruc.o qib_sdma.o \
 	qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \
 	qib_sysfs.o qib_twsi.o qib_tx.o qib_uc.o qib_ud.o \
-	qib_user_pages.o qib_user_sdma.o qib_verbs_mcast.o qib_iba7220.o \
+	qib_user_pages.o qib_user_sdma.o qib_iba7220.o \
 	qib_sd7220.o qib_iba7322.o qib_verbs.o
 	qib_sd7220.o qib_iba7322.o qib_verbs.o
 
 
 # 6120 has no fallback if no MSI interrupts, others can do INTx
 # 6120 has no fallback if no MSI interrupts, others can do INTx

+ 21 - 12
drivers/infiniband/hw/qib/qib.h

@@ -52,6 +52,7 @@
 #include <linux/kref.h>
 #include <linux/kref.h>
 #include <linux/sched.h>
 #include <linux/sched.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
+#include <rdma/rdma_vt.h>
 
 
 #include "qib_common.h"
 #include "qib_common.h"
 #include "qib_verbs.h"
 #include "qib_verbs.h"
@@ -229,9 +230,6 @@ struct qib_ctxtdata {
 	u8 redirect_seq_cnt;
 	u8 redirect_seq_cnt;
 	/* ctxt rcvhdrq head offset */
 	/* ctxt rcvhdrq head offset */
 	u32 head;
 	u32 head;
-	/* lookaside fields */
-	struct qib_qp *lookaside_qp;
-	u32 lookaside_qpn;
 	/* QPs waiting for context processing */
 	/* QPs waiting for context processing */
 	struct list_head qp_wait_list;
 	struct list_head qp_wait_list;
 #ifdef CONFIG_DEBUG_FS
 #ifdef CONFIG_DEBUG_FS
@@ -240,7 +238,7 @@ struct qib_ctxtdata {
 #endif
 #endif
 };
 };
 
 
-struct qib_sge_state;
+struct rvt_sge_state;
 
 
 struct qib_sdma_txreq {
 struct qib_sdma_txreq {
 	int                 flags;
 	int                 flags;
@@ -258,14 +256,14 @@ struct qib_sdma_desc {
 
 
 struct qib_verbs_txreq {
 struct qib_verbs_txreq {
 	struct qib_sdma_txreq   txreq;
 	struct qib_sdma_txreq   txreq;
-	struct qib_qp           *qp;
-	struct qib_swqe         *wqe;
+	struct rvt_qp           *qp;
+	struct rvt_swqe         *wqe;
 	u32                     dwords;
 	u32                     dwords;
 	u16                     hdr_dwords;
 	u16                     hdr_dwords;
 	u16                     hdr_inx;
 	u16                     hdr_inx;
 	struct qib_pio_header	*align_buf;
 	struct qib_pio_header	*align_buf;
-	struct qib_mregion	*mr;
-	struct qib_sge_state    *ss;
+	struct rvt_mregion	*mr;
+	struct rvt_sge_state    *ss;
 };
 };
 
 
 #define QIB_SDMA_TXREQ_F_USELARGEBUF  0x1
 #define QIB_SDMA_TXREQ_F_USELARGEBUF  0x1
@@ -1096,8 +1094,6 @@ struct qib_devdata {
 	u16 psxmitwait_check_rate;
 	u16 psxmitwait_check_rate;
 	/* high volume overflow errors defered to tasklet */
 	/* high volume overflow errors defered to tasklet */
 	struct tasklet_struct error_tasklet;
 	struct tasklet_struct error_tasklet;
-	/* per device cq worker */
-	struct kthread_worker *worker;
 
 
 	int assigned_node_id; /* NUMA node closest to HCA */
 	int assigned_node_id; /* NUMA node closest to HCA */
 };
 };
@@ -1135,8 +1131,9 @@ extern spinlock_t qib_devs_lock;
 extern struct qib_devdata *qib_lookup(int unit);
 extern struct qib_devdata *qib_lookup(int unit);
 extern u32 qib_cpulist_count;
 extern u32 qib_cpulist_count;
 extern unsigned long *qib_cpulist;
 extern unsigned long *qib_cpulist;
-
+extern u16 qpt_mask;
 extern unsigned qib_cc_table_size;
 extern unsigned qib_cc_table_size;
+
 int qib_init(struct qib_devdata *, int);
 int qib_init(struct qib_devdata *, int);
 int init_chip_wc_pat(struct qib_devdata *dd, u32);
 int init_chip_wc_pat(struct qib_devdata *dd, u32);
 int qib_enable_wc(struct qib_devdata *dd);
 int qib_enable_wc(struct qib_devdata *dd);
@@ -1323,7 +1320,7 @@ void __qib_sdma_intr(struct qib_pportdata *);
 void qib_sdma_intr(struct qib_pportdata *);
 void qib_sdma_intr(struct qib_pportdata *);
 void qib_user_sdma_send_desc(struct qib_pportdata *dd,
 void qib_user_sdma_send_desc(struct qib_pportdata *dd,
 			struct list_head *pktlist);
 			struct list_head *pktlist);
-int qib_sdma_verbs_send(struct qib_pportdata *, struct qib_sge_state *,
+int qib_sdma_verbs_send(struct qib_pportdata *, struct rvt_sge_state *,
 			u32, struct qib_verbs_txreq *);
 			u32, struct qib_verbs_txreq *);
 /* ppd->sdma_lock should be locked before calling this. */
 /* ppd->sdma_lock should be locked before calling this. */
 int qib_sdma_make_progress(struct qib_pportdata *dd);
 int qib_sdma_make_progress(struct qib_pportdata *dd);
@@ -1454,6 +1451,8 @@ u64 qib_sps_ints(void);
 dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
 dma_addr_t qib_map_page(struct pci_dev *, struct page *, unsigned long,
 			  size_t, int);
 			  size_t, int);
 const char *qib_get_unit_name(int unit);
 const char *qib_get_unit_name(int unit);
+const char *qib_get_card_name(struct rvt_dev_info *rdi);
+struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi);
 
 
 /*
 /*
  * Flush write combining store buffers (if present) and perform a write
  * Flush write combining store buffers (if present) and perform a write
@@ -1540,4 +1539,14 @@ struct qib_hwerror_msgs {
 void qib_format_hwerrors(u64 hwerrs,
 void qib_format_hwerrors(u64 hwerrs,
 			 const struct qib_hwerror_msgs *hwerrmsgs,
 			 const struct qib_hwerror_msgs *hwerrmsgs,
 			 size_t nhwerrmsgs, char *msg, size_t lmsg);
 			 size_t nhwerrmsgs, char *msg, size_t lmsg);
+
+void qib_stop_send_queue(struct rvt_qp *qp);
+void qib_quiesce_qp(struct rvt_qp *qp);
+void qib_flush_qp_waiters(struct rvt_qp *qp);
+int qib_mtu_to_path_mtu(u32 mtu);
+u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu);
+void qib_notify_error_qp(struct rvt_qp *qp);
+int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			   struct ib_qp_attr *attr);
+
 #endif                          /* _QIB_KERNEL_H */
 #endif                          /* _QIB_KERNEL_H */

+ 0 - 3
drivers/infiniband/hw/qib/qib_common.h

@@ -742,14 +742,11 @@ struct qib_tid_session_member {
 #define SIZE_OF_CRC 1
 #define SIZE_OF_CRC 1
 
 
 #define QIB_DEFAULT_P_KEY 0xFFFF
 #define QIB_DEFAULT_P_KEY 0xFFFF
-#define QIB_PERMISSIVE_LID 0xFFFF
 #define QIB_AETH_CREDIT_SHIFT 24
 #define QIB_AETH_CREDIT_SHIFT 24
 #define QIB_AETH_CREDIT_MASK 0x1F
 #define QIB_AETH_CREDIT_MASK 0x1F
 #define QIB_AETH_CREDIT_INVAL 0x1F
 #define QIB_AETH_CREDIT_INVAL 0x1F
 #define QIB_PSN_MASK 0xFFFFFF
 #define QIB_PSN_MASK 0xFFFFFF
 #define QIB_MSN_MASK 0xFFFFFF
 #define QIB_MSN_MASK 0xFFFFFF
-#define QIB_QPN_MASK 0xFFFFFF
-#define QIB_MULTICAST_LID_BASE 0xC000
 #define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK
 #define QIB_EAGER_TID_ID QLOGIC_IB_I_TID_MASK
 #define QIB_MULTICAST_QPN 0xFFFFFF
 #define QIB_MULTICAST_QPN 0xFFFFFF
 
 

+ 0 - 545
drivers/infiniband/hw/qib/qib_cq.c

@@ -1,545 +0,0 @@
-/*
- * Copyright (c) 2013 Intel Corporation.  All rights reserved.
- * Copyright (c) 2006, 2007, 2008, 2010 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/kthread.h>
-
-#include "qib_verbs.h"
-#include "qib.h"
-
-/**
- * qib_cq_enter - add a new entry to the completion queue
- * @cq: completion queue
- * @entry: work completion entry to add
- * @sig: true if @entry is a solicitated entry
- *
- * This may be called with qp->s_lock held.
- */
-void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int solicited)
-{
-	struct qib_cq_wc *wc;
-	unsigned long flags;
-	u32 head;
-	u32 next;
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	/*
-	 * Note that the head pointer might be writable by user processes.
-	 * Take care to verify it is a sane value.
-	 */
-	wc = cq->queue;
-	head = wc->head;
-	if (head >= (unsigned) cq->ibcq.cqe) {
-		head = cq->ibcq.cqe;
-		next = 0;
-	} else
-		next = head + 1;
-	if (unlikely(next == wc->tail)) {
-		spin_unlock_irqrestore(&cq->lock, flags);
-		if (cq->ibcq.event_handler) {
-			struct ib_event ev;
-
-			ev.device = cq->ibcq.device;
-			ev.element.cq = &cq->ibcq;
-			ev.event = IB_EVENT_CQ_ERR;
-			cq->ibcq.event_handler(&ev, cq->ibcq.cq_context);
-		}
-		return;
-	}
-	if (cq->ip) {
-		wc->uqueue[head].wr_id = entry->wr_id;
-		wc->uqueue[head].status = entry->status;
-		wc->uqueue[head].opcode = entry->opcode;
-		wc->uqueue[head].vendor_err = entry->vendor_err;
-		wc->uqueue[head].byte_len = entry->byte_len;
-		wc->uqueue[head].ex.imm_data =
-			(__u32 __force)entry->ex.imm_data;
-		wc->uqueue[head].qp_num = entry->qp->qp_num;
-		wc->uqueue[head].src_qp = entry->src_qp;
-		wc->uqueue[head].wc_flags = entry->wc_flags;
-		wc->uqueue[head].pkey_index = entry->pkey_index;
-		wc->uqueue[head].slid = entry->slid;
-		wc->uqueue[head].sl = entry->sl;
-		wc->uqueue[head].dlid_path_bits = entry->dlid_path_bits;
-		wc->uqueue[head].port_num = entry->port_num;
-		/* Make sure entry is written before the head index. */
-		smp_wmb();
-	} else
-		wc->kqueue[head] = *entry;
-	wc->head = next;
-
-	if (cq->notify == IB_CQ_NEXT_COMP ||
-	    (cq->notify == IB_CQ_SOLICITED &&
-	     (solicited || entry->status != IB_WC_SUCCESS))) {
-		struct kthread_worker *worker;
-		/*
-		 * This will cause send_complete() to be called in
-		 * another thread.
-		 */
-		smp_rmb();
-		worker = cq->dd->worker;
-		if (likely(worker)) {
-			cq->notify = IB_CQ_NONE;
-			cq->triggered++;
-			queue_kthread_work(worker, &cq->comptask);
-		}
-	}
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-}
-
-/**
- * qib_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct qib_cq *cq = to_icq(ibcq);
-	struct qib_cq_wc *wc;
-	unsigned long flags;
-	int npolled;
-	u32 tail;
-
-	/* The kernel can only poll a kernel completion queue */
-	if (cq->ip) {
-		npolled = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	wc = cq->queue;
-	tail = wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (tail == wc->head)
-			break;
-		/* The kernel doesn't need a RMB since it has the lock. */
-		*entry = wc->kqueue[tail];
-		if (tail >= cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	wc->tail = tail;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-	return npolled;
-}
-
-static void send_complete(struct kthread_work *work)
-{
-	struct qib_cq *cq = container_of(work, struct qib_cq, comptask);
-
-	/*
-	 * The completion handler will most likely rearm the notification
-	 * and poll for all pending entries.  If a new completion entry
-	 * is added while we are in this routine, queue_work()
-	 * won't call us again until we return so we check triggered to
-	 * see if we need to call the handler again.
-	 */
-	for (;;) {
-		u8 triggered = cq->triggered;
-
-		/*
-		 * IPoIB connected mode assumes the callback is from a
-		 * soft IRQ. We simulate this by blocking "bottom halves".
-		 * See the implementation for ipoib_cm_handle_tx_wc(),
-		 * netif_tx_lock_bh() and netif_tx_lock().
-		 */
-		local_bh_disable();
-		cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
-		local_bh_enable();
-
-		if (cq->triggered == triggered)
-			return;
-	}
-}
-
-/**
- * qib_create_cq - create a completion queue
- * @ibdev: the device this completion queue is attached to
- * @attr: creation attributes
- * @context: unused by the QLogic_IB driver
- * @udata: user data for libibverbs.so
- *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
- * Called by ib_create_cq() in the generic verbs code.
- */
-struct ib_cq *qib_create_cq(struct ib_device *ibdev,
-			    const struct ib_cq_init_attr *attr,
-			    struct ib_ucontext *context,
-			    struct ib_udata *udata)
-{
-	int entries = attr->cqe;
-	struct qib_ibdev *dev = to_idev(ibdev);
-	struct qib_cq *cq;
-	struct qib_cq_wc *wc;
-	struct ib_cq *ret;
-	u32 sz;
-
-	if (attr->flags)
-		return ERR_PTR(-EINVAL);
-
-	if (entries < 1 || entries > ib_qib_max_cqes) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	/* Allocate the completion queue structure. */
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
-	if (!cq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Allocate the completion queue entries and head/tail pointers.
-	 * This is allocated separately so that it can be resized and
-	 * also mapped into user space.
-	 * We need to use vmalloc() in order to support mmap and large
-	 * numbers of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (entries + 1);
-	else
-		sz += sizeof(struct ib_wc) * (entries + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_cq;
-	}
-
-	/*
-	 * Return the address of the WC as the offset to mmap.
-	 * See qib_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-
-		cq->ip = qib_create_mmap_info(dev, sz, context, wc);
-		if (!cq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wc;
-		}
-
-		err = ib_copy_to_udata(udata, &cq->ip->offset,
-				       sizeof(cq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		cq->ip = NULL;
-
-	spin_lock(&dev->n_cqs_lock);
-	if (dev->n_cqs_allocated == ib_qib_max_cqs) {
-		spin_unlock(&dev->n_cqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_cqs_allocated++;
-	spin_unlock(&dev->n_cqs_lock);
-
-	if (cq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	/*
-	 * ib_create_cq() will initialize cq->ibcq except for cq->ibcq.cqe.
-	 * The number of entries should be >= the number requested or return
-	 * an error.
-	 */
-	cq->dd = dd_from_dev(dev);
-	cq->ibcq.cqe = entries;
-	cq->notify = IB_CQ_NONE;
-	cq->triggered = 0;
-	spin_lock_init(&cq->lock);
-	init_kthread_work(&cq->comptask, send_complete);
-	wc->head = 0;
-	wc->tail = 0;
-	cq->queue = wc;
-
-	ret = &cq->ibcq;
-
-	goto done;
-
-bail_ip:
-	kfree(cq->ip);
-bail_wc:
-	vfree(wc);
-bail_cq:
-	kfree(cq);
-done:
-	return ret;
-}
-
-/**
- * qib_destroy_cq - destroy a completion queue
- * @ibcq: the completion queue to destroy.
- *
- * Returns 0 for success.
- *
- * Called by ib_destroy_cq() in the generic verbs code.
- */
-int qib_destroy_cq(struct ib_cq *ibcq)
-{
-	struct qib_ibdev *dev = to_idev(ibcq->device);
-	struct qib_cq *cq = to_icq(ibcq);
-
-	flush_kthread_work(&cq->comptask);
-	spin_lock(&dev->n_cqs_lock);
-	dev->n_cqs_allocated--;
-	spin_unlock(&dev->n_cqs_lock);
-	if (cq->ip)
-		kref_put(&cq->ip->ref, qib_release_mmap_info);
-	else
-		vfree(cq->queue);
-	kfree(cq);
-
-	return 0;
-}
-
-/**
- * qib_req_notify_cq - change the notification type for a completion queue
- * @ibcq: the completion queue
- * @notify_flags: the type of notification to request
- *
- * Returns 0 for success.
- *
- * This may be called from interrupt context.  Also called by
- * ib_req_notify_cq() in the generic verbs code.
- */
-int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
-{
-	struct qib_cq *cq = to_icq(ibcq);
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&cq->lock, flags);
-	/*
-	 * Don't change IB_CQ_NEXT_COMP to IB_CQ_SOLICITED but allow
-	 * any other transitions (see C11-31 and C11-32 in ch. 11.4.2.2).
-	 */
-	if (cq->notify != IB_CQ_NEXT_COMP)
-		cq->notify = notify_flags & IB_CQ_SOLICITED_MASK;
-
-	if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) &&
-	    cq->queue->head != cq->queue->tail)
-		ret = 1;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-	return ret;
-}
-
-/**
- * qib_resize_cq - change the size of the CQ
- * @ibcq: the completion queue
- *
- * Returns 0 for success.
- */
-int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
-{
-	struct qib_cq *cq = to_icq(ibcq);
-	struct qib_cq_wc *old_wc;
-	struct qib_cq_wc *wc;
-	u32 head, tail, n;
-	int ret;
-	u32 sz;
-
-	if (cqe < 1 || cqe > ib_qib_max_cqes) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	sz = sizeof(*wc);
-	if (udata && udata->outlen >= sizeof(__u64))
-		sz += sizeof(struct ib_uverbs_wc) * (cqe + 1);
-	else
-		sz += sizeof(struct ib_wc) * (cqe + 1);
-	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	/* Check that we can write the offset to mmap. */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		__u64 offset = 0;
-
-		ret = ib_copy_to_udata(udata, &offset, sizeof(offset));
-		if (ret)
-			goto bail_free;
-	}
-
-	spin_lock_irq(&cq->lock);
-	/*
-	 * Make sure head and tail are sane since they
-	 * might be user writable.
-	 */
-	old_wc = cq->queue;
-	head = old_wc->head;
-	if (head > (u32) cq->ibcq.cqe)
-		head = (u32) cq->ibcq.cqe;
-	tail = old_wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	if (head < tail)
-		n = cq->ibcq.cqe + 1 + head - tail;
-	else
-		n = head - tail;
-	if (unlikely((u32)cqe < n)) {
-		ret = -EINVAL;
-		goto bail_unlock;
-	}
-	for (n = 0; tail != head; n++) {
-		if (cq->ip)
-			wc->uqueue[n] = old_wc->uqueue[tail];
-		else
-			wc->kqueue[n] = old_wc->kqueue[tail];
-		if (tail == (u32) cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	cq->ibcq.cqe = cqe;
-	wc->head = n;
-	wc->tail = 0;
-	cq->queue = wc;
-	spin_unlock_irq(&cq->lock);
-
-	vfree(old_wc);
-
-	if (cq->ip) {
-		struct qib_ibdev *dev = to_idev(ibcq->device);
-		struct qib_mmap_info *ip = cq->ip;
-
-		qib_update_mmap_info(dev, ip, sz, wc);
-
-		/*
-		 * Return the offset to mmap.
-		 * See qib_mmap() for details.
-		 */
-		if (udata && udata->outlen >= sizeof(__u64)) {
-			ret = ib_copy_to_udata(udata, &ip->offset,
-					       sizeof(ip->offset));
-			if (ret)
-				goto bail;
-		}
-
-		spin_lock_irq(&dev->pending_lock);
-		if (list_empty(&ip->pending_mmaps))
-			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = 0;
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&cq->lock);
-bail_free:
-	vfree(wc);
-bail:
-	return ret;
-}
-
-int qib_cq_init(struct qib_devdata *dd)
-{
-	int ret = 0;
-	int cpu;
-	struct task_struct *task;
-
-	if (dd->worker)
-		return 0;
-	dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
-	if (!dd->worker)
-		return -ENOMEM;
-	init_kthread_worker(dd->worker);
-	task = kthread_create_on_node(
-		kthread_worker_fn,
-		dd->worker,
-		dd->assigned_node_id,
-		"qib_cq%d", dd->unit);
-	if (IS_ERR(task))
-		goto task_fail;
-	cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
-	kthread_bind(task, cpu);
-	wake_up_process(task);
-out:
-	return ret;
-task_fail:
-	ret = PTR_ERR(task);
-	kfree(dd->worker);
-	dd->worker = NULL;
-	goto out;
-}
-
-void qib_cq_exit(struct qib_devdata *dd)
-{
-	struct kthread_worker *worker;
-
-	worker = dd->worker;
-	if (!worker)
-		return;
-	/* blocks future queuing from send_complete() */
-	dd->worker = NULL;
-	smp_wmb();
-	flush_kthread_worker(worker);
-	kthread_stop(worker->task);
-	kfree(worker);
-}

+ 39 - 32
drivers/infiniband/hw/qib/qib_driver.c

@@ -90,6 +90,22 @@ const char *qib_get_unit_name(int unit)
 	return iname;
 	return iname;
 }
 }
 
 
+const char *qib_get_card_name(struct rvt_dev_info *rdi)
+{
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
+	return qib_get_unit_name(dd->unit);
+}
+
+struct pci_dev *qib_get_pci_dev(struct rvt_dev_info *rdi)
+{
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
+	return dd->pcidev;
+}
+
 /*
 /*
  * Return count of units with at least one port ACTIVE.
  * Return count of units with at least one port ACTIVE.
  */
  */
@@ -306,7 +322,9 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 		struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
 		struct qib_ib_header *hdr = (struct qib_ib_header *) rhdr;
 		struct qib_other_headers *ohdr = NULL;
 		struct qib_other_headers *ohdr = NULL;
 		struct qib_ibport *ibp = &ppd->ibport_data;
 		struct qib_ibport *ibp = &ppd->ibport_data;
-		struct qib_qp *qp = NULL;
+		struct qib_devdata *dd = ppd->dd;
+		struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+		struct rvt_qp *qp = NULL;
 		u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
 		u32 tlen = qib_hdrget_length_in_bytes(rhf_addr);
 		u16 lid  = be16_to_cpu(hdr->lrh[1]);
 		u16 lid  = be16_to_cpu(hdr->lrh[1]);
 		int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
 		int lnh = be16_to_cpu(hdr->lrh[0]) & 3;
@@ -319,7 +337,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 		if (tlen < 24)
 		if (tlen < 24)
 			goto drop;
 			goto drop;
 
 
-		if (lid < QIB_MULTICAST_LID_BASE) {
+		if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 			lid &= ~((1 << ppd->lmc) - 1);
 			lid &= ~((1 << ppd->lmc) - 1);
 			if (unlikely(lid != ppd->lid))
 			if (unlikely(lid != ppd->lid))
 				goto drop;
 				goto drop;
@@ -346,13 +364,16 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 		psn = be32_to_cpu(ohdr->bth[2]);
 		psn = be32_to_cpu(ohdr->bth[2]);
 
 
 		/* Get the destination QP number. */
 		/* Get the destination QP number. */
-		qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+		qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 		if (qp_num != QIB_MULTICAST_QPN) {
 		if (qp_num != QIB_MULTICAST_QPN) {
 			int ruc_res;
 			int ruc_res;
 
 
-			qp = qib_lookup_qpn(ibp, qp_num);
-			if (!qp)
+			rcu_read_lock();
+			qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+			if (!qp) {
+				rcu_read_unlock();
 				goto drop;
 				goto drop;
+			}
 
 
 			/*
 			/*
 			 * Handle only RC QPs - for other QP types drop error
 			 * Handle only RC QPs - for other QP types drop error
@@ -361,9 +382,9 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 			spin_lock(&qp->r_lock);
 			spin_lock(&qp->r_lock);
 
 
 			/* Check for valid receive state. */
 			/* Check for valid receive state. */
-			if (!(ib_qib_state_ops[qp->state] &
-			      QIB_PROCESS_RECV_OK)) {
-				ibp->n_pkt_drops++;
+			if (!(ib_rvt_state_ops[qp->state] &
+			      RVT_PROCESS_RECV_OK)) {
+				ibp->rvp.n_pkt_drops++;
 				goto unlock;
 				goto unlock;
 			}
 			}
 
 
@@ -383,7 +404,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 				    IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
 				    IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST) {
 					diff = qib_cmp24(psn, qp->r_psn);
 					diff = qib_cmp24(psn, qp->r_psn);
 					if (!qp->r_nak_state && diff >= 0) {
 					if (!qp->r_nak_state && diff >= 0) {
-						ibp->n_rc_seqnak++;
+						ibp->rvp.n_rc_seqnak++;
 						qp->r_nak_state =
 						qp->r_nak_state =
 							IB_NAK_PSN_ERROR;
 							IB_NAK_PSN_ERROR;
 						/* Use the expected PSN. */
 						/* Use the expected PSN. */
@@ -398,7 +419,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 						 */
 						 */
 						if (list_empty(&qp->rspwait)) {
 						if (list_empty(&qp->rspwait)) {
 							qp->r_flags |=
 							qp->r_flags |=
-								QIB_R_RSP_NAK;
+								RVT_R_RSP_NAK;
 							atomic_inc(
 							atomic_inc(
 								&qp->refcount);
 								&qp->refcount);
 							list_add_tail(
 							list_add_tail(
@@ -419,12 +440,7 @@ static u32 qib_rcv_hdrerr(struct qib_ctxtdata *rcd, struct qib_pportdata *ppd,
 
 
 unlock:
 unlock:
 			spin_unlock(&qp->r_lock);
 			spin_unlock(&qp->r_lock);
-			/*
-			 * Notify qib_destroy_qp() if it is waiting
-			 * for us to finish.
-			 */
-			if (atomic_dec_and_test(&qp->refcount))
-				wake_up(&qp->wait);
+			rcu_read_unlock();
 		} /* Unicast QP */
 		} /* Unicast QP */
 	} /* Valid packet with TIDErr */
 	} /* Valid packet with TIDErr */
 
 
@@ -456,7 +472,7 @@ u32 qib_kreceive(struct qib_ctxtdata *rcd, u32 *llic, u32 *npkts)
 	u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
 	u32 eflags, etype, tlen, i = 0, updegr = 0, crcs = 0;
 	int last;
 	int last;
 	u64 lval;
 	u64 lval;
-	struct qib_qp *qp, *nqp;
+	struct rvt_qp *qp, *nqp;
 
 
 	l = rcd->head;
 	l = rcd->head;
 	rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
 	rhf_addr = (__le32 *) rcd->rcvhdrq + l + dd->rhf_offset;
@@ -549,15 +565,6 @@ move_along:
 			updegr = 0;
 			updegr = 0;
 		}
 		}
 	}
 	}
-	/*
-	 * Notify qib_destroy_qp() if it is waiting
-	 * for lookaside_qp to finish.
-	 */
-	if (rcd->lookaside_qp) {
-		if (atomic_dec_and_test(&rcd->lookaside_qp->refcount))
-			wake_up(&rcd->lookaside_qp->wait);
-		rcd->lookaside_qp = NULL;
-	}
 
 
 	rcd->head = l;
 	rcd->head = l;
 
 
@@ -567,17 +574,17 @@ move_along:
 	 */
 	 */
 	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
 	list_for_each_entry_safe(qp, nqp, &rcd->qp_wait_list, rspwait) {
 		list_del_init(&qp->rspwait);
 		list_del_init(&qp->rspwait);
-		if (qp->r_flags & QIB_R_RSP_NAK) {
-			qp->r_flags &= ~QIB_R_RSP_NAK;
+		if (qp->r_flags & RVT_R_RSP_NAK) {
+			qp->r_flags &= ~RVT_R_RSP_NAK;
 			qib_send_rc_ack(qp);
 			qib_send_rc_ack(qp);
 		}
 		}
-		if (qp->r_flags & QIB_R_RSP_SEND) {
+		if (qp->r_flags & RVT_R_RSP_SEND) {
 			unsigned long flags;
 			unsigned long flags;
 
 
-			qp->r_flags &= ~QIB_R_RSP_SEND;
+			qp->r_flags &= ~RVT_R_RSP_SEND;
 			spin_lock_irqsave(&qp->s_lock, flags);
 			spin_lock_irqsave(&qp->s_lock, flags);
-			if (ib_qib_state_ops[qp->state] &
-					QIB_PROCESS_OR_FLUSH_SEND)
+			if (ib_rvt_state_ops[qp->state] &
+					RVT_PROCESS_OR_FLUSH_SEND)
 				qib_schedule_send(qp);
 				qib_schedule_send(qp);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 		}
 		}

+ 4 - 4
drivers/infiniband/hw/qib/qib_iba6120.c

@@ -2956,13 +2956,13 @@ static void pma_6120_timer(unsigned long data)
 	struct qib_ibport *ibp = &ppd->ibport_data;
 	struct qib_ibport *ibp = &ppd->ibport_data;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	spin_lock_irqsave(&ibp->lock, flags);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
 	if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED) {
 	if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED) {
 		cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
 		cs->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
 		qib_snapshot_counters(ppd, &cs->sword, &cs->rword,
 		qib_snapshot_counters(ppd, &cs->sword, &cs->rword,
 				      &cs->spkts, &cs->rpkts, &cs->xmit_wait);
 				      &cs->spkts, &cs->rpkts, &cs->xmit_wait);
 		mod_timer(&cs->pma_timer,
 		mod_timer(&cs->pma_timer,
-			  jiffies + usecs_to_jiffies(ibp->pma_sample_interval));
+		      jiffies + usecs_to_jiffies(ibp->rvp.pma_sample_interval));
 	} else if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 	} else if (cs->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
 		u64 ta, tb, tc, td, te;
 		u64 ta, tb, tc, td, te;
 
 
@@ -2975,11 +2975,11 @@ static void pma_6120_timer(unsigned long data)
 		cs->rpkts = td - cs->rpkts;
 		cs->rpkts = td - cs->rpkts;
 		cs->xmit_wait = te - cs->xmit_wait;
 		cs->xmit_wait = te - cs->xmit_wait;
 	}
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 }
 }
 
 
 /*
 /*
- * Note that the caller has the ibp->lock held.
+ * Note that the caller has the ibp->rvp.lock held.
  */
  */
 static void qib_set_cntr_6120_sample(struct qib_pportdata *ppd, u32 intv,
 static void qib_set_cntr_6120_sample(struct qib_pportdata *ppd, u32 intv,
 				     u32 start)
 				     u32 start)

+ 2 - 4
drivers/infiniband/hw/qib/qib_iba7322.c

@@ -2910,8 +2910,6 @@ static void qib_setup_7322_cleanup(struct qib_devdata *dd)
 			spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
 			spin_unlock_irqrestore(&dd->cspec->gpio_lock, flags);
 			qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data);
 			qib_qsfp_deinit(&dd->pport[i].cpspec->qsfp_data);
 		}
 		}
-		if (dd->pport[i].ibport_data.smi_ah)
-			ib_destroy_ah(&dd->pport[i].ibport_data.smi_ah->ibah);
 	}
 	}
 }
 }
 
 
@@ -5497,7 +5495,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
 	unsigned delay;
 	unsigned delay;
 	int ret;
 	int ret;
 
 
-	agent = ibp->send_agent;
+	agent = ibp->rvp.send_agent;
 	if (!agent)
 	if (!agent)
 		goto retry;
 		goto retry;
 
 
@@ -5515,7 +5513,7 @@ static void try_7322_ipg(struct qib_pportdata *ppd)
 			ret = PTR_ERR(ah);
 			ret = PTR_ERR(ah);
 		else {
 		else {
 			send_buf->ah = ah;
 			send_buf->ah = ah;
-			ibp->smi_ah = to_iah(ah);
+			ibp->smi_ah = ibah_to_rvtah(ah);
 			ret = 0;
 			ret = 0;
 		}
 		}
 	} else {
 	} else {

+ 18 - 7
drivers/infiniband/hw/qib/qib_init.c

@@ -42,6 +42,7 @@
 #ifdef CONFIG_INFINIBAND_QIB_DCA
 #ifdef CONFIG_INFINIBAND_QIB_DCA
 #include <linux/dca.h>
 #include <linux/dca.h>
 #endif
 #endif
+#include <rdma/rdma_vt.h>
 
 
 #include "qib.h"
 #include "qib.h"
 #include "qib_common.h"
 #include "qib_common.h"
@@ -244,6 +245,13 @@ int qib_init_pportdata(struct qib_pportdata *ppd, struct qib_devdata *dd,
 		alloc_percpu(struct qib_pma_counters);
 		alloc_percpu(struct qib_pma_counters);
 	if (!ppd->ibport_data.pmastats)
 	if (!ppd->ibport_data.pmastats)
 		return -ENOMEM;
 		return -ENOMEM;
+	ppd->ibport_data.rvp.rc_acks = alloc_percpu(u64);
+	ppd->ibport_data.rvp.rc_qacks = alloc_percpu(u64);
+	ppd->ibport_data.rvp.rc_delayed_comp = alloc_percpu(u64);
+	if (!(ppd->ibport_data.rvp.rc_acks) ||
+	    !(ppd->ibport_data.rvp.rc_qacks) ||
+	    !(ppd->ibport_data.rvp.rc_delayed_comp))
+		return -ENOMEM;
 
 
 	if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
 	if (qib_cc_table_size < IB_CCT_MIN_ENTRIES)
 		goto bail;
 		goto bail;
@@ -449,8 +457,6 @@ static int loadtime_init(struct qib_devdata *dd)
 	init_timer(&dd->intrchk_timer);
 	init_timer(&dd->intrchk_timer);
 	dd->intrchk_timer.function = verify_interrupt;
 	dd->intrchk_timer.function = verify_interrupt;
 	dd->intrchk_timer.data = (unsigned long) dd;
 	dd->intrchk_timer.data = (unsigned long) dd;
-
-	ret = qib_cq_init(dd);
 done:
 done:
 	return ret;
 	return ret;
 }
 }
@@ -631,6 +637,9 @@ wq_error:
 static void qib_free_pportdata(struct qib_pportdata *ppd)
 static void qib_free_pportdata(struct qib_pportdata *ppd)
 {
 {
 	free_percpu(ppd->ibport_data.pmastats);
 	free_percpu(ppd->ibport_data.pmastats);
+	free_percpu(ppd->ibport_data.rvp.rc_acks);
+	free_percpu(ppd->ibport_data.rvp.rc_qacks);
+	free_percpu(ppd->ibport_data.rvp.rc_delayed_comp);
 	ppd->ibport_data.pmastats = NULL;
 	ppd->ibport_data.pmastats = NULL;
 }
 }
 
 
@@ -1081,7 +1090,7 @@ void qib_free_devdata(struct qib_devdata *dd)
 	qib_dbg_ibdev_exit(&dd->verbs_dev);
 	qib_dbg_ibdev_exit(&dd->verbs_dev);
 #endif
 #endif
 	free_percpu(dd->int_counter);
 	free_percpu(dd->int_counter);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 }
 }
 
 
 u64 qib_int_counter(struct qib_devdata *dd)
 u64 qib_int_counter(struct qib_devdata *dd)
@@ -1120,9 +1129,12 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	struct qib_devdata *dd;
 	struct qib_devdata *dd;
-	int ret;
+	int ret, nports;
 
 
-	dd = (struct qib_devdata *) ib_alloc_device(sizeof(*dd) + extra);
+	/* extra is * number of ports */
+	nports = extra / sizeof(struct qib_pportdata);
+	dd = (struct qib_devdata *)rvt_alloc_device(sizeof(*dd) + extra,
+						    nports);
 	if (!dd)
 	if (!dd)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 
 
@@ -1171,7 +1183,7 @@ struct qib_devdata *qib_alloc_devdata(struct pci_dev *pdev, size_t extra)
 bail:
 bail:
 	if (!list_empty(&dd->list))
 	if (!list_empty(&dd->list))
 		list_del_init(&dd->list);
 		list_del_init(&dd->list);
-	ib_dealloc_device(&dd->verbs_dev.ibdev);
+	ib_dealloc_device(&dd->verbs_dev.rdi.ibdev);
 	return ERR_PTR(ret);
 	return ERR_PTR(ret);
 }
 }
 
 
@@ -1421,7 +1433,6 @@ static void cleanup_device_data(struct qib_devdata *dd)
 	}
 	}
 	kfree(tmp);
 	kfree(tmp);
 	kfree(dd->boardname);
 	kfree(dd->boardname);
-	qib_cq_exit(dd);
 }
 }
 
 
 /*
 /*

+ 1 - 1
drivers/infiniband/hw/qib/qib_intr.c

@@ -74,7 +74,7 @@ static void signal_ib_event(struct qib_pportdata *ppd, enum ib_event_type ev)
 	struct ib_event event;
 	struct ib_event event;
 	struct qib_devdata *dd = ppd->dd;
 	struct qib_devdata *dd = ppd->dd;
 
 
-	event.device = &dd->verbs_dev.ibdev;
+	event.device = &dd->verbs_dev.rdi.ibdev;
 	event.element.port_num = ppd->port;
 	event.element.port_num = ppd->port;
 	event.event = ev;
 	event.event = ev;
 	ib_dispatch_event(&event);
 	ib_dispatch_event(&event);

+ 16 - 170
drivers/infiniband/hw/qib/qib_keys.c

@@ -46,20 +46,20 @@
  *
  *
  */
  */
 
 
-int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
+int qib_alloc_lkey(struct rvt_mregion *mr, int dma_region)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	u32 r;
 	u32 r;
 	u32 n;
 	u32 n;
 	int ret = 0;
 	int ret = 0;
 	struct qib_ibdev *dev = to_idev(mr->pd->device);
 	struct qib_ibdev *dev = to_idev(mr->pd->device);
-	struct qib_lkey_table *rkt = &dev->lk_table;
+	struct rvt_lkey_table *rkt = &dev->lk_table;
 
 
 	spin_lock_irqsave(&rkt->lock, flags);
 	spin_lock_irqsave(&rkt->lock, flags);
 
 
 	/* special case for dma_mr lkey == 0 */
 	/* special case for dma_mr lkey == 0 */
 	if (dma_region) {
 	if (dma_region) {
-		struct qib_mregion *tmr;
+		struct rvt_mregion *tmr;
 
 
 		tmr = rcu_access_pointer(dev->dma_mr);
 		tmr = rcu_access_pointer(dev->dma_mr);
 		if (!tmr) {
 		if (!tmr) {
@@ -90,8 +90,8 @@ int qib_alloc_lkey(struct qib_mregion *mr, int dma_region)
 	 * bits are capped in qib_verbs.c to insure enough bits
 	 * bits are capped in qib_verbs.c to insure enough bits
 	 * for generation number
 	 * for generation number
 	 */
 	 */
-	mr->lkey = (r << (32 - ib_qib_lkey_table_size)) |
-		((((1 << (24 - ib_qib_lkey_table_size)) - 1) & rkt->gen)
+	mr->lkey = (r << (32 - ib_rvt_lkey_table_size)) |
+		((((1 << (24 - ib_rvt_lkey_table_size)) - 1) & rkt->gen)
 		 << 8);
 		 << 8);
 	if (mr->lkey == 0) {
 	if (mr->lkey == 0) {
 		mr->lkey |= 1 << 8;
 		mr->lkey |= 1 << 8;
@@ -114,13 +114,13 @@ bail:
  * qib_free_lkey - free an lkey
  * qib_free_lkey - free an lkey
  * @mr: mr to free from tables
  * @mr: mr to free from tables
  */
  */
-void qib_free_lkey(struct qib_mregion *mr)
+void qib_free_lkey(struct rvt_mregion *mr)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	u32 lkey = mr->lkey;
 	u32 lkey = mr->lkey;
 	u32 r;
 	u32 r;
 	struct qib_ibdev *dev = to_idev(mr->pd->device);
 	struct qib_ibdev *dev = to_idev(mr->pd->device);
-	struct qib_lkey_table *rkt = &dev->lk_table;
+	struct rvt_lkey_table *rkt = &dev->lk_table;
 
 
 	spin_lock_irqsave(&rkt->lock, flags);
 	spin_lock_irqsave(&rkt->lock, flags);
 	if (!mr->lkey_published)
 	if (!mr->lkey_published)
@@ -128,7 +128,7 @@ void qib_free_lkey(struct qib_mregion *mr)
 	if (lkey == 0)
 	if (lkey == 0)
 		RCU_INIT_POINTER(dev->dma_mr, NULL);
 		RCU_INIT_POINTER(dev->dma_mr, NULL);
 	else {
 	else {
-		r = lkey >> (32 - ib_qib_lkey_table_size);
+		r = lkey >> (32 - ib_rvt_lkey_table_size);
 		RCU_INIT_POINTER(rkt->table[r], NULL);
 		RCU_INIT_POINTER(rkt->table[r], NULL);
 	}
 	}
 	qib_put_mr(mr);
 	qib_put_mr(mr);
@@ -137,105 +137,6 @@ out:
 	spin_unlock_irqrestore(&rkt->lock, flags);
 	spin_unlock_irqrestore(&rkt->lock, flags);
 }
 }
 
 
-/**
- * qib_lkey_ok - check IB SGE for validity and initialize
- * @rkt: table containing lkey to check SGE against
- * @pd: protection domain
- * @isge: outgoing internal SGE
- * @sge: SGE to check
- * @acc: access flags
- *
- * Return 1 if valid and successful, otherwise returns 0.
- *
- * increments the reference count upon success
- *
- * Check the IB SGE for validity and initialize our internal version
- * of it.
- */
-int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
-		struct qib_sge *isge, struct ib_sge *sge, int acc)
-{
-	struct qib_mregion *mr;
-	unsigned n, m;
-	size_t off;
-
-	/*
-	 * We use LKEY == zero for kernel virtual addresses
-	 * (see qib_get_dma_mr and qib_dma.c).
-	 */
-	rcu_read_lock();
-	if (sge->lkey == 0) {
-		struct qib_ibdev *dev = to_idev(pd->ibpd.device);
-
-		if (pd->user)
-			goto bail;
-		mr = rcu_dereference(dev->dma_mr);
-		if (!mr)
-			goto bail;
-		if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
-			goto bail;
-		rcu_read_unlock();
-
-		isge->mr = mr;
-		isge->vaddr = (void *) sge->addr;
-		isge->length = sge->length;
-		isge->sge_length = sge->length;
-		isge->m = 0;
-		isge->n = 0;
-		goto ok;
-	}
-	mr = rcu_dereference(
-		rkt->table[(sge->lkey >> (32 - ib_qib_lkey_table_size))]);
-	if (unlikely(!mr || mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
-		goto bail;
-
-	off = sge->addr - mr->user_base;
-	if (unlikely(sge->addr < mr->user_base ||
-		     off + sge->length > mr->length ||
-		     (mr->access_flags & acc) != acc))
-		goto bail;
-	if (unlikely(!atomic_inc_not_zero(&mr->refcount)))
-		goto bail;
-	rcu_read_unlock();
-
-	off += mr->offset;
-	if (mr->page_shift) {
-		/*
-		page sizes are uniform power of 2 so no loop is necessary
-		entries_spanned_by_off is the number of times the loop below
-		would have executed.
-		*/
-		size_t entries_spanned_by_off;
-
-		entries_spanned_by_off = off >> mr->page_shift;
-		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off/QIB_SEGSZ;
-		n = entries_spanned_by_off%QIB_SEGSZ;
-	} else {
-		m = 0;
-		n = 0;
-		while (off >= mr->map[m]->segs[n].length) {
-			off -= mr->map[m]->segs[n].length;
-			n++;
-			if (n >= QIB_SEGSZ) {
-				m++;
-				n = 0;
-			}
-		}
-	}
-	isge->mr = mr;
-	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
-	isge->length = mr->map[m]->segs[n].length - off;
-	isge->sge_length = sge->length;
-	isge->m = m;
-	isge->n = n;
-ok:
-	return 1;
-bail:
-	rcu_read_unlock();
-	return 0;
-}
-
 /**
 /**
  * qib_rkey_ok - check the IB virtual address, length, and RKEY
  * qib_rkey_ok - check the IB virtual address, length, and RKEY
  * @qp: qp for validation
  * @qp: qp for validation
@@ -249,11 +150,11 @@ bail:
  *
  *
  * increments the reference count upon success
  * increments the reference count upon success
  */
  */
-int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
+int qib_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
 		u32 len, u64 vaddr, u32 rkey, int acc)
 		u32 len, u64 vaddr, u32 rkey, int acc)
 {
 {
-	struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct qib_mregion *mr;
+	struct rvt_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
+	struct rvt_mregion *mr;
 	unsigned n, m;
 	unsigned n, m;
 	size_t off;
 	size_t off;
 
 
@@ -263,7 +164,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 	 */
 	 */
 	rcu_read_lock();
 	rcu_read_lock();
 	if (rkey == 0) {
 	if (rkey == 0) {
-		struct qib_pd *pd = to_ipd(qp->ibqp.pd);
+		struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
 		struct qib_ibdev *dev = to_idev(pd->ibpd.device);
 		struct qib_ibdev *dev = to_idev(pd->ibpd.device);
 
 
 		if (pd->user)
 		if (pd->user)
@@ -285,7 +186,7 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 	}
 	}
 
 
 	mr = rcu_dereference(
 	mr = rcu_dereference(
-		rkt->table[(rkey >> (32 - ib_qib_lkey_table_size))]);
+		rkt->table[(rkey >> (32 - ib_rvt_lkey_table_size))]);
 	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
 		goto bail;
 		goto bail;
 
 
@@ -308,15 +209,15 @@ int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
 
 
 		entries_spanned_by_off = off >> mr->page_shift;
 		entries_spanned_by_off = off >> mr->page_shift;
 		off -= (entries_spanned_by_off << mr->page_shift);
 		off -= (entries_spanned_by_off << mr->page_shift);
-		m = entries_spanned_by_off/QIB_SEGSZ;
-		n = entries_spanned_by_off%QIB_SEGSZ;
+		m = entries_spanned_by_off / RVT_SEGSZ;
+		n = entries_spanned_by_off % RVT_SEGSZ;
 	} else {
 	} else {
 		m = 0;
 		m = 0;
 		n = 0;
 		n = 0;
 		while (off >= mr->map[m]->segs[n].length) {
 		while (off >= mr->map[m]->segs[n].length) {
 			off -= mr->map[m]->segs[n].length;
 			off -= mr->map[m]->segs[n].length;
 			n++;
 			n++;
-			if (n >= QIB_SEGSZ) {
+			if (n >= RVT_SEGSZ) {
 				m++;
 				m++;
 				n = 0;
 				n = 0;
 			}
 			}
@@ -335,58 +236,3 @@ bail:
 	return 0;
 	return 0;
 }
 }
 
 
-/*
- * Initialize the memory region specified by the work request.
- */
-int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr)
-{
-	struct qib_lkey_table *rkt = &to_idev(qp->ibqp.device)->lk_table;
-	struct qib_pd *pd = to_ipd(qp->ibqp.pd);
-	struct qib_mr *mr = to_imr(wr->mr);
-	struct qib_mregion *mrg;
-	u32 key = wr->key;
-	unsigned i, n, m;
-	int ret = -EINVAL;
-	unsigned long flags;
-	u64 *page_list;
-	size_t ps;
-
-	spin_lock_irqsave(&rkt->lock, flags);
-	if (pd->user || key == 0)
-		goto bail;
-
-	mrg = rcu_dereference_protected(
-		rkt->table[(key >> (32 - ib_qib_lkey_table_size))],
-		lockdep_is_held(&rkt->lock));
-	if (unlikely(mrg == NULL || qp->ibqp.pd != mrg->pd))
-		goto bail;
-
-	if (mr->npages > mrg->max_segs)
-		goto bail;
-
-	ps = mr->ibmr.page_size;
-	if (mr->ibmr.length > ps * mr->npages)
-		goto bail;
-
-	mrg->user_base = mr->ibmr.iova;
-	mrg->iova = mr->ibmr.iova;
-	mrg->lkey = key;
-	mrg->length = mr->ibmr.length;
-	mrg->access_flags = wr->access;
-	page_list = mr->pages;
-	m = 0;
-	n = 0;
-	for (i = 0; i < mr->npages; i++) {
-		mrg->map[m]->segs[n].vaddr = (void *) page_list[i];
-		mrg->map[m]->segs[n].length = ps;
-		if (++n == QIB_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-
-	ret = 0;
-bail:
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	return ret;
-}

+ 149 - 189
drivers/infiniband/hw/qib/qib_mad.c

@@ -70,7 +70,7 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
 	unsigned long flags;
 	unsigned long flags;
 	unsigned long timeout;
 	unsigned long timeout;
 
 
-	agent = ibp->send_agent;
+	agent = ibp->rvp.send_agent;
 	if (!agent)
 	if (!agent)
 		return;
 		return;
 
 
@@ -79,7 +79,8 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
 		return;
 		return;
 
 
 	/* o14-2 */
 	/* o14-2 */
-	if (ibp->trap_timeout && time_before(jiffies, ibp->trap_timeout))
+	if (ibp->rvp.trap_timeout &&
+	    time_before(jiffies, ibp->rvp.trap_timeout))
 		return;
 		return;
 
 
 	send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR,
 	send_buf = ib_create_send_mad(agent, 0, 0, 0, IB_MGMT_MAD_HDR,
@@ -93,42 +94,42 @@ static void qib_send_trap(struct qib_ibport *ibp, void *data, unsigned len)
 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 	smp->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
 	smp->class_version = 1;
 	smp->class_version = 1;
 	smp->method = IB_MGMT_METHOD_TRAP;
 	smp->method = IB_MGMT_METHOD_TRAP;
-	ibp->tid++;
-	smp->tid = cpu_to_be64(ibp->tid);
+	ibp->rvp.tid++;
+	smp->tid = cpu_to_be64(ibp->rvp.tid);
 	smp->attr_id = IB_SMP_ATTR_NOTICE;
 	smp->attr_id = IB_SMP_ATTR_NOTICE;
 	/* o14-1: smp->mkey = 0; */
 	/* o14-1: smp->mkey = 0; */
 	memcpy(smp->data, data, len);
 	memcpy(smp->data, data, len);
 
 
-	spin_lock_irqsave(&ibp->lock, flags);
-	if (!ibp->sm_ah) {
-		if (ibp->sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	if (!ibp->rvp.sm_ah) {
+		if (ibp->rvp.sm_lid != be16_to_cpu(IB_LID_PERMISSIVE)) {
 			struct ib_ah *ah;
 			struct ib_ah *ah;
 
 
-			ah = qib_create_qp0_ah(ibp, ibp->sm_lid);
+			ah = qib_create_qp0_ah(ibp, ibp->rvp.sm_lid);
 			if (IS_ERR(ah))
 			if (IS_ERR(ah))
 				ret = PTR_ERR(ah);
 				ret = PTR_ERR(ah);
 			else {
 			else {
 				send_buf->ah = ah;
 				send_buf->ah = ah;
-				ibp->sm_ah = to_iah(ah);
+				ibp->rvp.sm_ah = ibah_to_rvtah(ah);
 				ret = 0;
 				ret = 0;
 			}
 			}
 		} else
 		} else
 			ret = -EINVAL;
 			ret = -EINVAL;
 	} else {
 	} else {
-		send_buf->ah = &ibp->sm_ah->ibah;
+		send_buf->ah = &ibp->rvp.sm_ah->ibah;
 		ret = 0;
 		ret = 0;
 	}
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 
 	if (!ret)
 	if (!ret)
 		ret = ib_post_send_mad(send_buf, NULL);
 		ret = ib_post_send_mad(send_buf, NULL);
 	if (!ret) {
 	if (!ret) {
 		/* 4.096 usec. */
 		/* 4.096 usec. */
-		timeout = (4096 * (1UL << ibp->subnet_timeout)) / 1000;
-		ibp->trap_timeout = jiffies + usecs_to_jiffies(timeout);
+		timeout = (4096 * (1UL << ibp->rvp.subnet_timeout)) / 1000;
+		ibp->rvp.trap_timeout = jiffies + usecs_to_jiffies(timeout);
 	} else {
 	} else {
 		ib_free_send_mad(send_buf);
 		ib_free_send_mad(send_buf);
-		ibp->trap_timeout = 0;
+		ibp->rvp.trap_timeout = 0;
 	}
 	}
 }
 }
 
 
@@ -141,10 +142,10 @@ void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 	struct ib_mad_notice_attr data;
 	struct ib_mad_notice_attr data;
 
 
 	if (trap_num == IB_NOTICE_TRAP_BAD_PKEY)
 	if (trap_num == IB_NOTICE_TRAP_BAD_PKEY)
-		ibp->pkey_violations++;
+		ibp->rvp.pkey_violations++;
 	else
 	else
-		ibp->qkey_violations++;
-	ibp->n_pkt_drops++;
+		ibp->rvp.qkey_violations++;
+	ibp->rvp.n_pkt_drops++;
 
 
 	/* Send violation trap */
 	/* Send violation trap */
 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
 	data.generic_type = IB_NOTICE_TYPE_SECURITY;
@@ -205,8 +206,11 @@ static void qib_bad_mkey(struct qib_ibport *ibp, struct ib_smp *smp)
 /*
 /*
  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
  * Send a Port Capability Mask Changed trap (ch. 14.3.11).
  */
  */
-void qib_cap_mask_chg(struct qib_ibport *ibp)
+void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num)
 {
 {
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = dd_from_dev(ibdev);
+	struct qib_ibport *ibp = &dd->pport[port_num - 1].ibport_data;
 	struct ib_mad_notice_attr data;
 	struct ib_mad_notice_attr data;
 
 
 	data.generic_type = IB_NOTICE_TYPE_INFO;
 	data.generic_type = IB_NOTICE_TYPE_INFO;
@@ -217,8 +221,8 @@ void qib_cap_mask_chg(struct qib_ibport *ibp)
 	data.toggle_count = 0;
 	data.toggle_count = 0;
 	memset(&data.details, 0, sizeof(data.details));
 	memset(&data.details, 0, sizeof(data.details));
 	data.details.ntc_144.lid = data.issuer_lid;
 	data.details.ntc_144.lid = data.issuer_lid;
-	data.details.ntc_144.new_cap_mask = cpu_to_be32(ibp->port_cap_flags);
-
+	data.details.ntc_144.new_cap_mask =
+					cpu_to_be32(ibp->rvp.port_cap_flags);
 	qib_send_trap(ibp, &data, sizeof(data));
 	qib_send_trap(ibp, &data, sizeof(data));
 }
 }
 
 
@@ -409,37 +413,38 @@ static int check_mkey(struct qib_ibport *ibp, struct ib_smp *smp, int mad_flags)
 	int ret = 0;
 	int ret = 0;
 
 
 	/* Is the mkey in the process of expiring? */
 	/* Is the mkey in the process of expiring? */
-	if (ibp->mkey_lease_timeout &&
-	    time_after_eq(jiffies, ibp->mkey_lease_timeout)) {
+	if (ibp->rvp.mkey_lease_timeout &&
+	    time_after_eq(jiffies, ibp->rvp.mkey_lease_timeout)) {
 		/* Clear timeout and mkey protection field. */
 		/* Clear timeout and mkey protection field. */
-		ibp->mkey_lease_timeout = 0;
-		ibp->mkeyprot = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
+		ibp->rvp.mkeyprot = 0;
 	}
 	}
 
 
-	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->mkey == 0 ||
-	    ibp->mkey == smp->mkey)
+	if ((mad_flags & IB_MAD_IGNORE_MKEY) ||  ibp->rvp.mkey == 0 ||
+	    ibp->rvp.mkey == smp->mkey)
 		valid_mkey = 1;
 		valid_mkey = 1;
 
 
 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
 	/* Unset lease timeout on any valid Get/Set/TrapRepress */
-	if (valid_mkey && ibp->mkey_lease_timeout &&
+	if (valid_mkey && ibp->rvp.mkey_lease_timeout &&
 	    (smp->method == IB_MGMT_METHOD_GET ||
 	    (smp->method == IB_MGMT_METHOD_GET ||
 	     smp->method == IB_MGMT_METHOD_SET ||
 	     smp->method == IB_MGMT_METHOD_SET ||
 	     smp->method == IB_MGMT_METHOD_TRAP_REPRESS))
 	     smp->method == IB_MGMT_METHOD_TRAP_REPRESS))
-		ibp->mkey_lease_timeout = 0;
+		ibp->rvp.mkey_lease_timeout = 0;
 
 
 	if (!valid_mkey) {
 	if (!valid_mkey) {
 		switch (smp->method) {
 		switch (smp->method) {
 		case IB_MGMT_METHOD_GET:
 		case IB_MGMT_METHOD_GET:
 			/* Bad mkey not a violation below level 2 */
 			/* Bad mkey not a violation below level 2 */
-			if (ibp->mkeyprot < 2)
+			if (ibp->rvp.mkeyprot < 2)
 				break;
 				break;
 		case IB_MGMT_METHOD_SET:
 		case IB_MGMT_METHOD_SET:
 		case IB_MGMT_METHOD_TRAP_REPRESS:
 		case IB_MGMT_METHOD_TRAP_REPRESS:
-			if (ibp->mkey_violations != 0xFFFF)
-				++ibp->mkey_violations;
-			if (!ibp->mkey_lease_timeout && ibp->mkey_lease_period)
-				ibp->mkey_lease_timeout = jiffies +
-					ibp->mkey_lease_period * HZ;
+			if (ibp->rvp.mkey_violations != 0xFFFF)
+				++ibp->rvp.mkey_violations;
+			if (!ibp->rvp.mkey_lease_timeout &&
+			    ibp->rvp.mkey_lease_period)
+				ibp->rvp.mkey_lease_timeout = jiffies +
+					ibp->rvp.mkey_lease_period * HZ;
 			/* Generate a trap notice. */
 			/* Generate a trap notice. */
 			qib_bad_mkey(ibp, smp);
 			qib_bad_mkey(ibp, smp);
 			ret = 1;
 			ret = 1;
@@ -489,15 +494,15 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 
 
 	/* Only return the mkey if the protection field allows it. */
 	/* Only return the mkey if the protection field allows it. */
 	if (!(smp->method == IB_MGMT_METHOD_GET &&
 	if (!(smp->method == IB_MGMT_METHOD_GET &&
-	      ibp->mkey != smp->mkey &&
-	      ibp->mkeyprot == 1))
-		pip->mkey = ibp->mkey;
-	pip->gid_prefix = ibp->gid_prefix;
+	      ibp->rvp.mkey != smp->mkey &&
+	      ibp->rvp.mkeyprot == 1))
+		pip->mkey = ibp->rvp.mkey;
+	pip->gid_prefix = ibp->rvp.gid_prefix;
 	pip->lid = cpu_to_be16(ppd->lid);
 	pip->lid = cpu_to_be16(ppd->lid);
-	pip->sm_lid = cpu_to_be16(ibp->sm_lid);
-	pip->cap_mask = cpu_to_be32(ibp->port_cap_flags);
+	pip->sm_lid = cpu_to_be16(ibp->rvp.sm_lid);
+	pip->cap_mask = cpu_to_be32(ibp->rvp.port_cap_flags);
 	/* pip->diag_code; */
 	/* pip->diag_code; */
-	pip->mkey_lease_period = cpu_to_be16(ibp->mkey_lease_period);
+	pip->mkey_lease_period = cpu_to_be16(ibp->rvp.mkey_lease_period);
 	pip->local_port_num = port;
 	pip->local_port_num = port;
 	pip->link_width_enabled = ppd->link_width_enabled;
 	pip->link_width_enabled = ppd->link_width_enabled;
 	pip->link_width_supported = ppd->link_width_supported;
 	pip->link_width_supported = ppd->link_width_supported;
@@ -508,7 +513,7 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	pip->portphysstate_linkdown =
 	pip->portphysstate_linkdown =
 		(dd->f_ibphys_portstate(ppd->lastibcstat) << 4) |
 		(dd->f_ibphys_portstate(ppd->lastibcstat) << 4) |
 		(get_linkdowndefaultstate(ppd) ? 1 : 2);
 		(get_linkdowndefaultstate(ppd) ? 1 : 2);
-	pip->mkeyprot_resv_lmc = (ibp->mkeyprot << 6) | ppd->lmc;
+	pip->mkeyprot_resv_lmc = (ibp->rvp.mkeyprot << 6) | ppd->lmc;
 	pip->linkspeedactive_enabled = (ppd->link_speed_active << 4) |
 	pip->linkspeedactive_enabled = (ppd->link_speed_active << 4) |
 		ppd->link_speed_enabled;
 		ppd->link_speed_enabled;
 	switch (ppd->ibmtu) {
 	switch (ppd->ibmtu) {
@@ -529,9 +534,9 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 		mtu = IB_MTU_256;
 		mtu = IB_MTU_256;
 		break;
 		break;
 	}
 	}
-	pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->sm_sl;
+	pip->neighbormtu_mastersmsl = (mtu << 4) | ibp->rvp.sm_sl;
 	pip->vlcap_inittype = ppd->vls_supported << 4;  /* InitType = 0 */
 	pip->vlcap_inittype = ppd->vls_supported << 4;  /* InitType = 0 */
-	pip->vl_high_limit = ibp->vl_high_limit;
+	pip->vl_high_limit = ibp->rvp.vl_high_limit;
 	pip->vl_arb_high_cap =
 	pip->vl_arb_high_cap =
 		dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_CAP);
 		dd->f_get_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_CAP);
 	pip->vl_arb_low_cap =
 	pip->vl_arb_low_cap =
@@ -542,20 +547,20 @@ static int subn_get_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	/* pip->vlstallcnt_hoqlife; */
 	/* pip->vlstallcnt_hoqlife; */
 	pip->operationalvl_pei_peo_fpi_fpo =
 	pip->operationalvl_pei_peo_fpi_fpo =
 		dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OP_VLS) << 4;
 		dd->f_get_ib_cfg(ppd, QIB_IB_CFG_OP_VLS) << 4;
-	pip->mkey_violations = cpu_to_be16(ibp->mkey_violations);
+	pip->mkey_violations = cpu_to_be16(ibp->rvp.mkey_violations);
 	/* P_KeyViolations are counted by hardware. */
 	/* P_KeyViolations are counted by hardware. */
-	pip->pkey_violations = cpu_to_be16(ibp->pkey_violations);
-	pip->qkey_violations = cpu_to_be16(ibp->qkey_violations);
+	pip->pkey_violations = cpu_to_be16(ibp->rvp.pkey_violations);
+	pip->qkey_violations = cpu_to_be16(ibp->rvp.qkey_violations);
 	/* Only the hardware GUID is supported for now */
 	/* Only the hardware GUID is supported for now */
 	pip->guid_cap = QIB_GUIDS_PER_PORT;
 	pip->guid_cap = QIB_GUIDS_PER_PORT;
-	pip->clientrereg_resv_subnetto = ibp->subnet_timeout;
+	pip->clientrereg_resv_subnetto = ibp->rvp.subnet_timeout;
 	/* 32.768 usec. response time (guessing) */
 	/* 32.768 usec. response time (guessing) */
 	pip->resv_resptimevalue = 3;
 	pip->resv_resptimevalue = 3;
 	pip->localphyerrors_overrunerrors =
 	pip->localphyerrors_overrunerrors =
 		(get_phyerrthreshold(ppd) << 4) |
 		(get_phyerrthreshold(ppd) << 4) |
 		get_overrunthreshold(ppd);
 		get_overrunthreshold(ppd);
 	/* pip->max_credit_hint; */
 	/* pip->max_credit_hint; */
-	if (ibp->port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
+	if (ibp->rvp.port_cap_flags & IB_PORT_LINK_LATENCY_SUP) {
 		u32 v;
 		u32 v;
 
 
 		v = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKLATENCY);
 		v = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_LINKLATENCY);
@@ -685,13 +690,13 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	event.device = ibdev;
 	event.device = ibdev;
 	event.element.port_num = port;
 	event.element.port_num = port;
 
 
-	ibp->mkey = pip->mkey;
-	ibp->gid_prefix = pip->gid_prefix;
-	ibp->mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
+	ibp->rvp.mkey = pip->mkey;
+	ibp->rvp.gid_prefix = pip->gid_prefix;
+	ibp->rvp.mkey_lease_period = be16_to_cpu(pip->mkey_lease_period);
 
 
 	lid = be16_to_cpu(pip->lid);
 	lid = be16_to_cpu(pip->lid);
 	/* Must be a valid unicast LID address. */
 	/* Must be a valid unicast LID address. */
-	if (lid == 0 || lid >= QIB_MULTICAST_LID_BASE)
+	if (lid == 0 || lid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
 		smp->status |= IB_SMP_INVALID_FIELD;
 		smp->status |= IB_SMP_INVALID_FIELD;
 	else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
 	else if (ppd->lid != lid || ppd->lmc != (pip->mkeyprot_resv_lmc & 7)) {
 		if (ppd->lid != lid)
 		if (ppd->lid != lid)
@@ -706,21 +711,21 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	smlid = be16_to_cpu(pip->sm_lid);
 	smlid = be16_to_cpu(pip->sm_lid);
 	msl = pip->neighbormtu_mastersmsl & 0xF;
 	msl = pip->neighbormtu_mastersmsl & 0xF;
 	/* Must be a valid unicast LID address. */
 	/* Must be a valid unicast LID address. */
-	if (smlid == 0 || smlid >= QIB_MULTICAST_LID_BASE)
+	if (smlid == 0 || smlid >= be16_to_cpu(IB_MULTICAST_LID_BASE))
 		smp->status |= IB_SMP_INVALID_FIELD;
 		smp->status |= IB_SMP_INVALID_FIELD;
-	else if (smlid != ibp->sm_lid || msl != ibp->sm_sl) {
-		spin_lock_irqsave(&ibp->lock, flags);
-		if (ibp->sm_ah) {
-			if (smlid != ibp->sm_lid)
-				ibp->sm_ah->attr.dlid = smlid;
-			if (msl != ibp->sm_sl)
-				ibp->sm_ah->attr.sl = msl;
+	else if (smlid != ibp->rvp.sm_lid || msl != ibp->rvp.sm_sl) {
+		spin_lock_irqsave(&ibp->rvp.lock, flags);
+		if (ibp->rvp.sm_ah) {
+			if (smlid != ibp->rvp.sm_lid)
+				ibp->rvp.sm_ah->attr.dlid = smlid;
+			if (msl != ibp->rvp.sm_sl)
+				ibp->rvp.sm_ah->attr.sl = msl;
 		}
 		}
-		spin_unlock_irqrestore(&ibp->lock, flags);
-		if (smlid != ibp->sm_lid)
-			ibp->sm_lid = smlid;
-		if (msl != ibp->sm_sl)
-			ibp->sm_sl = msl;
+		spin_unlock_irqrestore(&ibp->rvp.lock, flags);
+		if (smlid != ibp->rvp.sm_lid)
+			ibp->rvp.sm_lid = smlid;
+		if (msl != ibp->rvp.sm_sl)
+			ibp->rvp.sm_sl = msl;
 		event.event = IB_EVENT_SM_CHANGE;
 		event.event = IB_EVENT_SM_CHANGE;
 		ib_dispatch_event(&event);
 		ib_dispatch_event(&event);
 	}
 	}
@@ -768,10 +773,10 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 		smp->status |= IB_SMP_INVALID_FIELD;
 		smp->status |= IB_SMP_INVALID_FIELD;
 	}
 	}
 
 
-	ibp->mkeyprot = pip->mkeyprot_resv_lmc >> 6;
-	ibp->vl_high_limit = pip->vl_high_limit;
+	ibp->rvp.mkeyprot = pip->mkeyprot_resv_lmc >> 6;
+	ibp->rvp.vl_high_limit = pip->vl_high_limit;
 	(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_LIMIT,
 	(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_VL_HIGH_LIMIT,
-				    ibp->vl_high_limit);
+				    ibp->rvp.vl_high_limit);
 
 
 	mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF);
 	mtu = ib_mtu_enum_to_int((pip->neighbormtu_mastersmsl >> 4) & 0xF);
 	if (mtu == -1)
 	if (mtu == -1)
@@ -789,13 +794,13 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	}
 	}
 
 
 	if (pip->mkey_violations == 0)
 	if (pip->mkey_violations == 0)
-		ibp->mkey_violations = 0;
+		ibp->rvp.mkey_violations = 0;
 
 
 	if (pip->pkey_violations == 0)
 	if (pip->pkey_violations == 0)
-		ibp->pkey_violations = 0;
+		ibp->rvp.pkey_violations = 0;
 
 
 	if (pip->qkey_violations == 0)
 	if (pip->qkey_violations == 0)
-		ibp->qkey_violations = 0;
+		ibp->rvp.qkey_violations = 0;
 
 
 	ore = pip->localphyerrors_overrunerrors;
 	ore = pip->localphyerrors_overrunerrors;
 	if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF))
 	if (set_phyerrthreshold(ppd, (ore >> 4) & 0xF))
@@ -804,7 +809,7 @@ static int subn_set_portinfo(struct ib_smp *smp, struct ib_device *ibdev,
 	if (set_overrunthreshold(ppd, (ore & 0xF)))
 	if (set_overrunthreshold(ppd, (ore & 0xF)))
 		smp->status |= IB_SMP_INVALID_FIELD;
 		smp->status |= IB_SMP_INVALID_FIELD;
 
 
-	ibp->subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
+	ibp->rvp.subnet_timeout = pip->clientrereg_resv_subnetto & 0x1F;
 
 
 	/*
 	/*
 	 * Do the port state change now that the other link parameters
 	 * Do the port state change now that the other link parameters
@@ -1028,7 +1033,7 @@ static int set_pkeys(struct qib_devdata *dd, u8 port, u16 *pkeys)
 		(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
 		(void) dd->f_set_ib_cfg(ppd, QIB_IB_CFG_PKEYS, 0);
 
 
 		event.event = IB_EVENT_PKEY_CHANGE;
 		event.event = IB_EVENT_PKEY_CHANGE;
-		event.device = &dd->verbs_dev.ibdev;
+		event.device = &dd->verbs_dev.rdi.ibdev;
 		event.element.port_num = port;
 		event.element.port_num = port;
 		ib_dispatch_event(&event);
 		ib_dispatch_event(&event);
 	}
 	}
@@ -1062,7 +1067,7 @@ static int subn_get_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev,
 
 
 	memset(smp->data, 0, sizeof(smp->data));
 	memset(smp->data, 0, sizeof(smp->data));
 
 
-	if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP))
+	if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP))
 		smp->status |= IB_SMP_UNSUP_METHOD;
 		smp->status |= IB_SMP_UNSUP_METHOD;
 	else
 	else
 		for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2)
 		for (i = 0; i < ARRAY_SIZE(ibp->sl_to_vl); i += 2)
@@ -1078,7 +1083,7 @@ static int subn_set_sl_to_vl(struct ib_smp *smp, struct ib_device *ibdev,
 	u8 *p = (u8 *) smp->data;
 	u8 *p = (u8 *) smp->data;
 	unsigned i;
 	unsigned i;
 
 
-	if (!(ibp->port_cap_flags & IB_PORT_SL_MAP_SUP)) {
+	if (!(ibp->rvp.port_cap_flags & IB_PORT_SL_MAP_SUP)) {
 		smp->status |= IB_SMP_UNSUP_METHOD;
 		smp->status |= IB_SMP_UNSUP_METHOD;
 		return reply(smp);
 		return reply(smp);
 	}
 	}
@@ -1195,20 +1200,20 @@ static int pma_get_portsamplescontrol(struct ib_pma_mad *pmp,
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		goto bail;
 		goto bail;
 	}
 	}
-	spin_lock_irqsave(&ibp->lock, flags);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
 	p->tick = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PMA_TICKS);
 	p->tick = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_PMA_TICKS);
 	p->sample_status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
 	p->sample_status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
 	p->counter_width = 4;   /* 32 bit counters */
 	p->counter_width = 4;   /* 32 bit counters */
 	p->counter_mask0_9 = COUNTER_MASK0_9;
 	p->counter_mask0_9 = COUNTER_MASK0_9;
-	p->sample_start = cpu_to_be32(ibp->pma_sample_start);
-	p->sample_interval = cpu_to_be32(ibp->pma_sample_interval);
-	p->tag = cpu_to_be16(ibp->pma_tag);
-	p->counter_select[0] = ibp->pma_counter_select[0];
-	p->counter_select[1] = ibp->pma_counter_select[1];
-	p->counter_select[2] = ibp->pma_counter_select[2];
-	p->counter_select[3] = ibp->pma_counter_select[3];
-	p->counter_select[4] = ibp->pma_counter_select[4];
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	p->sample_start = cpu_to_be32(ibp->rvp.pma_sample_start);
+	p->sample_interval = cpu_to_be32(ibp->rvp.pma_sample_interval);
+	p->tag = cpu_to_be16(ibp->rvp.pma_tag);
+	p->counter_select[0] = ibp->rvp.pma_counter_select[0];
+	p->counter_select[1] = ibp->rvp.pma_counter_select[1];
+	p->counter_select[2] = ibp->rvp.pma_counter_select[2];
+	p->counter_select[3] = ibp->rvp.pma_counter_select[3];
+	p->counter_select[4] = ibp->rvp.pma_counter_select[4];
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 
 bail:
 bail:
 	return reply((struct ib_smp *) pmp);
 	return reply((struct ib_smp *) pmp);
@@ -1233,7 +1238,7 @@ static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
 		goto bail;
 		goto bail;
 	}
 	}
 
 
-	spin_lock_irqsave(&ibp->lock, flags);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
 
 
 	/* Port Sampling code owns the PS* HW counters */
 	/* Port Sampling code owns the PS* HW counters */
 	xmit_flags = ppd->cong_stats.flags;
 	xmit_flags = ppd->cong_stats.flags;
@@ -1242,18 +1247,18 @@ static int pma_set_portsamplescontrol(struct ib_pma_mad *pmp,
 	if (status == IB_PMA_SAMPLE_STATUS_DONE ||
 	if (status == IB_PMA_SAMPLE_STATUS_DONE ||
 	    (status == IB_PMA_SAMPLE_STATUS_RUNNING &&
 	    (status == IB_PMA_SAMPLE_STATUS_RUNNING &&
 	     xmit_flags == IB_PMA_CONG_HW_CONTROL_TIMER)) {
 	     xmit_flags == IB_PMA_CONG_HW_CONTROL_TIMER)) {
-		ibp->pma_sample_start = be32_to_cpu(p->sample_start);
-		ibp->pma_sample_interval = be32_to_cpu(p->sample_interval);
-		ibp->pma_tag = be16_to_cpu(p->tag);
-		ibp->pma_counter_select[0] = p->counter_select[0];
-		ibp->pma_counter_select[1] = p->counter_select[1];
-		ibp->pma_counter_select[2] = p->counter_select[2];
-		ibp->pma_counter_select[3] = p->counter_select[3];
-		ibp->pma_counter_select[4] = p->counter_select[4];
-		dd->f_set_cntr_sample(ppd, ibp->pma_sample_interval,
-				      ibp->pma_sample_start);
+		ibp->rvp.pma_sample_start = be32_to_cpu(p->sample_start);
+		ibp->rvp.pma_sample_interval = be32_to_cpu(p->sample_interval);
+		ibp->rvp.pma_tag = be16_to_cpu(p->tag);
+		ibp->rvp.pma_counter_select[0] = p->counter_select[0];
+		ibp->rvp.pma_counter_select[1] = p->counter_select[1];
+		ibp->rvp.pma_counter_select[2] = p->counter_select[2];
+		ibp->rvp.pma_counter_select[3] = p->counter_select[3];
+		ibp->rvp.pma_counter_select[4] = p->counter_select[4];
+		dd->f_set_cntr_sample(ppd, ibp->rvp.pma_sample_interval,
+				      ibp->rvp.pma_sample_start);
 	}
 	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 
 	ret = pma_get_portsamplescontrol(pmp, ibdev, port);
 	ret = pma_get_portsamplescontrol(pmp, ibdev, port);
 
 
@@ -1357,8 +1362,8 @@ static int pma_get_portsamplesresult(struct ib_pma_mad *pmp,
 	int i;
 	int i;
 
 
 	memset(pmp->data, 0, sizeof(pmp->data));
 	memset(pmp->data, 0, sizeof(pmp->data));
-	spin_lock_irqsave(&ibp->lock, flags);
-	p->tag = cpu_to_be16(ibp->pma_tag);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	p->tag = cpu_to_be16(ibp->rvp.pma_tag);
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
 		p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 		p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 	else {
 	else {
@@ -1373,11 +1378,11 @@ static int pma_get_portsamplesresult(struct ib_pma_mad *pmp,
 			ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
 			ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
 		}
 		}
 	}
 	}
-	for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+	for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++)
 		p->counter[i] = cpu_to_be32(
 		p->counter[i] = cpu_to_be32(
 			get_cache_hw_sample_counters(
 			get_cache_hw_sample_counters(
-				ppd, ibp->pma_counter_select[i]));
-	spin_unlock_irqrestore(&ibp->lock, flags);
+				ppd, ibp->rvp.pma_counter_select[i]));
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 
 	return reply((struct ib_smp *) pmp);
 	return reply((struct ib_smp *) pmp);
 }
 }
@@ -1397,8 +1402,8 @@ static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
 
 
 	/* Port Sampling code owns the PS* HW counters */
 	/* Port Sampling code owns the PS* HW counters */
 	memset(pmp->data, 0, sizeof(pmp->data));
 	memset(pmp->data, 0, sizeof(pmp->data));
-	spin_lock_irqsave(&ibp->lock, flags);
-	p->tag = cpu_to_be16(ibp->pma_tag);
+	spin_lock_irqsave(&ibp->rvp.lock, flags);
+	p->tag = cpu_to_be16(ibp->rvp.pma_tag);
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_TIMER)
 		p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 		p->sample_status = IB_PMA_SAMPLE_STATUS_DONE;
 	else {
 	else {
@@ -1415,11 +1420,11 @@ static int pma_get_portsamplesresult_ext(struct ib_pma_mad *pmp,
 			ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
 			ppd->cong_stats.flags = IB_PMA_CONG_HW_CONTROL_TIMER;
 		}
 		}
 	}
 	}
-	for (i = 0; i < ARRAY_SIZE(ibp->pma_counter_select); i++)
+	for (i = 0; i < ARRAY_SIZE(ibp->rvp.pma_counter_select); i++)
 		p->counter[i] = cpu_to_be64(
 		p->counter[i] = cpu_to_be64(
 			get_cache_hw_sample_counters(
 			get_cache_hw_sample_counters(
-				ppd, ibp->pma_counter_select[i]));
-	spin_unlock_irqrestore(&ibp->lock, flags);
+				ppd, ibp->rvp.pma_counter_select[i]));
+	spin_unlock_irqrestore(&ibp->rvp.lock, flags);
 
 
 	return reply((struct ib_smp *) pmp);
 	return reply((struct ib_smp *) pmp);
 }
 }
@@ -1453,7 +1458,7 @@ static int pma_get_portcounters(struct ib_pma_mad *pmp,
 	cntrs.excessive_buffer_overrun_errors -=
 	cntrs.excessive_buffer_overrun_errors -=
 		ibp->z_excessive_buffer_overrun_errors;
 		ibp->z_excessive_buffer_overrun_errors;
 	cntrs.vl15_dropped -= ibp->z_vl15_dropped;
 	cntrs.vl15_dropped -= ibp->z_vl15_dropped;
-	cntrs.vl15_dropped += ibp->n_vl15_dropped;
+	cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped;
 
 
 	memset(pmp->data, 0, sizeof(pmp->data));
 	memset(pmp->data, 0, sizeof(pmp->data));
 
 
@@ -1546,9 +1551,9 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
 
 
 	qib_get_counters(ppd, &cntrs);
 	qib_get_counters(ppd, &cntrs);
-	spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+	spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
 	xmit_wait_counter = xmit_wait_get_value_delta(ppd);
 	xmit_wait_counter = xmit_wait_get_value_delta(ppd);
-	spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+	spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
 
 
 	/* Adjust counters for any resets done. */
 	/* Adjust counters for any resets done. */
 	cntrs.symbol_error_counter -= ibp->z_symbol_error_counter;
 	cntrs.symbol_error_counter -= ibp->z_symbol_error_counter;
@@ -1564,7 +1569,7 @@ static int pma_get_portcounters_cong(struct ib_pma_mad *pmp,
 	cntrs.excessive_buffer_overrun_errors -=
 	cntrs.excessive_buffer_overrun_errors -=
 		ibp->z_excessive_buffer_overrun_errors;
 		ibp->z_excessive_buffer_overrun_errors;
 	cntrs.vl15_dropped -= ibp->z_vl15_dropped;
 	cntrs.vl15_dropped -= ibp->z_vl15_dropped;
-	cntrs.vl15_dropped += ibp->n_vl15_dropped;
+	cntrs.vl15_dropped += ibp->rvp.n_vl15_dropped;
 	cntrs.port_xmit_data -= ibp->z_port_xmit_data;
 	cntrs.port_xmit_data -= ibp->z_port_xmit_data;
 	cntrs.port_rcv_data -= ibp->z_port_rcv_data;
 	cntrs.port_rcv_data -= ibp->z_port_rcv_data;
 	cntrs.port_xmit_packets -= ibp->z_port_xmit_packets;
 	cntrs.port_xmit_packets -= ibp->z_port_xmit_packets;
@@ -1743,7 +1748,7 @@ static int pma_set_portcounters(struct ib_pma_mad *pmp,
 			cntrs.excessive_buffer_overrun_errors;
 			cntrs.excessive_buffer_overrun_errors;
 
 
 	if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
 	if (p->counter_select & IB_PMA_SEL_PORT_VL15_DROPPED) {
-		ibp->n_vl15_dropped = 0;
+		ibp->rvp.n_vl15_dropped = 0;
 		ibp->z_vl15_dropped = cntrs.vl15_dropped;
 		ibp->z_vl15_dropped = cntrs.vl15_dropped;
 	}
 	}
 
 
@@ -1778,11 +1783,11 @@ static int pma_set_portcounters_cong(struct ib_pma_mad *pmp,
 	ret = pma_get_portcounters_cong(pmp, ibdev, port);
 	ret = pma_get_portcounters_cong(pmp, ibdev, port);
 
 
 	if (counter_select & IB_PMA_SEL_CONG_XMIT) {
 	if (counter_select & IB_PMA_SEL_CONG_XMIT) {
-		spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+		spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
 		ppd->cong_stats.counter = 0;
 		ppd->cong_stats.counter = 0;
 		dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL,
 		dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL,
 				      0x0);
 				      0x0);
-		spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+		spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
 	}
 	}
 	if (counter_select & IB_PMA_SEL_CONG_PORT_DATA) {
 	if (counter_select & IB_PMA_SEL_CONG_PORT_DATA) {
 		ibp->z_port_xmit_data = cntrs.port_xmit_data;
 		ibp->z_port_xmit_data = cntrs.port_xmit_data;
@@ -1806,7 +1811,7 @@ static int pma_set_portcounters_cong(struct ib_pma_mad *pmp,
 			cntrs.local_link_integrity_errors;
 			cntrs.local_link_integrity_errors;
 		ibp->z_excessive_buffer_overrun_errors =
 		ibp->z_excessive_buffer_overrun_errors =
 			cntrs.excessive_buffer_overrun_errors;
 			cntrs.excessive_buffer_overrun_errors;
-		ibp->n_vl15_dropped = 0;
+		ibp->rvp.n_vl15_dropped = 0;
 		ibp->z_vl15_dropped = cntrs.vl15_dropped;
 		ibp->z_vl15_dropped = cntrs.vl15_dropped;
 	}
 	}
 
 
@@ -1916,12 +1921,12 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 			ret = subn_get_vl_arb(smp, ibdev, port);
 			ret = subn_get_vl_arb(smp, ibdev, port);
 			goto bail;
 			goto bail;
 		case IB_SMP_ATTR_SM_INFO:
 		case IB_SMP_ATTR_SM_INFO:
-			if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) {
 				ret = IB_MAD_RESULT_SUCCESS |
 				ret = IB_MAD_RESULT_SUCCESS |
 					IB_MAD_RESULT_CONSUMED;
 					IB_MAD_RESULT_CONSUMED;
 				goto bail;
 				goto bail;
 			}
 			}
-			if (ibp->port_cap_flags & IB_PORT_SM) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM) {
 				ret = IB_MAD_RESULT_SUCCESS;
 				ret = IB_MAD_RESULT_SUCCESS;
 				goto bail;
 				goto bail;
 			}
 			}
@@ -1950,12 +1955,12 @@ static int process_subn(struct ib_device *ibdev, int mad_flags,
 			ret = subn_set_vl_arb(smp, ibdev, port);
 			ret = subn_set_vl_arb(smp, ibdev, port);
 			goto bail;
 			goto bail;
 		case IB_SMP_ATTR_SM_INFO:
 		case IB_SMP_ATTR_SM_INFO:
-			if (ibp->port_cap_flags & IB_PORT_SM_DISABLED) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM_DISABLED) {
 				ret = IB_MAD_RESULT_SUCCESS |
 				ret = IB_MAD_RESULT_SUCCESS |
 					IB_MAD_RESULT_CONSUMED;
 					IB_MAD_RESULT_CONSUMED;
 				goto bail;
 				goto bail;
 			}
 			}
-			if (ibp->port_cap_flags & IB_PORT_SM) {
+			if (ibp->rvp.port_cap_flags & IB_PORT_SM) {
 				ret = IB_MAD_RESULT_SUCCESS;
 				ret = IB_MAD_RESULT_SUCCESS;
 				goto bail;
 				goto bail;
 			}
 			}
@@ -2443,12 +2448,6 @@ bail:
 	return ret;
 	return ret;
 }
 }
 
 
-static void send_handler(struct ib_mad_agent *agent,
-			 struct ib_mad_send_wc *mad_send_wc)
-{
-	ib_free_send_mad(mad_send_wc->send_buf);
-}
-
 static void xmit_wait_timer_func(unsigned long opaque)
 static void xmit_wait_timer_func(unsigned long opaque)
 {
 {
 	struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
 	struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
@@ -2456,7 +2455,7 @@ static void xmit_wait_timer_func(unsigned long opaque)
 	unsigned long flags;
 	unsigned long flags;
 	u8 status;
 	u8 status;
 
 
-	spin_lock_irqsave(&ppd->ibport_data.lock, flags);
+	spin_lock_irqsave(&ppd->ibport_data.rvp.lock, flags);
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_SAMPLE) {
 	if (ppd->cong_stats.flags == IB_PMA_CONG_HW_CONTROL_SAMPLE) {
 		status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
 		status = dd->f_portcntr(ppd, QIBPORTCNTR_PSSTAT);
 		if (status == IB_PMA_SAMPLE_STATUS_DONE) {
 		if (status == IB_PMA_SAMPLE_STATUS_DONE) {
@@ -2469,74 +2468,35 @@ static void xmit_wait_timer_func(unsigned long opaque)
 	ppd->cong_stats.counter = xmit_wait_get_value_delta(ppd);
 	ppd->cong_stats.counter = xmit_wait_get_value_delta(ppd);
 	dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0);
 	dd->f_set_cntr_sample(ppd, QIB_CONG_TIMER_PSINTERVAL, 0x0);
 done:
 done:
-	spin_unlock_irqrestore(&ppd->ibport_data.lock, flags);
+	spin_unlock_irqrestore(&ppd->ibport_data.rvp.lock, flags);
 	mod_timer(&ppd->cong_stats.timer, jiffies + HZ);
 	mod_timer(&ppd->cong_stats.timer, jiffies + HZ);
 }
 }
 
 
-int qib_create_agents(struct qib_ibdev *dev)
+void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx)
 {
 {
-	struct qib_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct qib_ibport *ibp;
-	int p;
-	int ret;
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
 
 
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		agent = ib_register_mad_agent(&dev->ibdev, p + 1, IB_QPT_SMI,
-					      NULL, 0, send_handler,
-					      NULL, NULL, 0);
-		if (IS_ERR(agent)) {
-			ret = PTR_ERR(agent);
-			goto err;
-		}
-
-		/* Initialize xmit_wait structure */
-		dd->pport[p].cong_stats.counter = 0;
-		init_timer(&dd->pport[p].cong_stats.timer);
-		dd->pport[p].cong_stats.timer.function = xmit_wait_timer_func;
-		dd->pport[p].cong_stats.timer.data =
-			(unsigned long)(&dd->pport[p]);
-		dd->pport[p].cong_stats.timer.expires = 0;
-		add_timer(&dd->pport[p].cong_stats.timer);
-
-		ibp->send_agent = agent;
-	}
-
-	return 0;
-
-err:
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-	}
-
-	return ret;
+	/* Initialize xmit_wait structure */
+	dd->pport[port_idx].cong_stats.counter = 0;
+	init_timer(&dd->pport[port_idx].cong_stats.timer);
+	dd->pport[port_idx].cong_stats.timer.function = xmit_wait_timer_func;
+	dd->pport[port_idx].cong_stats.timer.data =
+		(unsigned long)(&dd->pport[port_idx]);
+	dd->pport[port_idx].cong_stats.timer.expires = 0;
+	add_timer(&dd->pport[port_idx].cong_stats.timer);
 }
 }
 
 
-void qib_free_agents(struct qib_ibdev *dev)
+void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx)
 {
 {
-	struct qib_devdata *dd = dd_from_dev(dev);
-	struct ib_mad_agent *agent;
-	struct qib_ibport *ibp;
-	int p;
-
-	for (p = 0; p < dd->num_pports; p++) {
-		ibp = &dd->pport[p].ibport_data;
-		if (ibp->send_agent) {
-			agent = ibp->send_agent;
-			ibp->send_agent = NULL;
-			ib_unregister_mad_agent(agent);
-		}
-		if (ibp->sm_ah) {
-			ib_destroy_ah(&ibp->sm_ah->ibah);
-			ibp->sm_ah = NULL;
-		}
-		if (dd->pport[p].cong_stats.timer.data)
-			del_timer_sync(&dd->pport[p].cong_stats.timer);
-	}
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(ibdev,
+					      struct qib_devdata, verbs_dev);
+
+	if (dd->pport[port_idx].cong_stats.timer.data)
+		del_timer_sync(&dd->pport[port_idx].cong_stats.timer);
+
+	if (dd->pport[port_idx].ibport_data.smi_ah)
+		ib_destroy_ah(&dd->pport[port_idx].ibport_data.smi_ah->ibah);
 }
 }

+ 0 - 174
drivers/infiniband/hw/qib/qib_mmap.c

@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <asm/pgtable.h>
-
-#include "qib_verbs.h"
-
-/**
- * qib_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct qib_mmap_info
- */
-void qib_release_mmap_info(struct kref *ref)
-{
-	struct qib_mmap_info *ip =
-		container_of(ref, struct qib_mmap_info, ref);
-	struct qib_ibdev *dev = to_idev(ip->context->device);
-
-	spin_lock_irq(&dev->pending_lock);
-	list_del(&ip->pending_mmaps);
-	spin_unlock_irq(&dev->pending_lock);
-
-	vfree(ip->obj);
-	kfree(ip);
-}
-
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void qib_vma_open(struct vm_area_struct *vma)
-{
-	struct qib_mmap_info *ip = vma->vm_private_data;
-
-	kref_get(&ip->ref);
-}
-
-static void qib_vma_close(struct vm_area_struct *vma)
-{
-	struct qib_mmap_info *ip = vma->vm_private_data;
-
-	kref_put(&ip->ref, qib_release_mmap_info);
-}
-
-static const struct vm_operations_struct qib_vm_ops = {
-	.open =     qib_vma_open,
-	.close =    qib_vma_close,
-};
-
-/**
- * qib_mmap - create a new mmap region
- * @context: the IB user context of the process making the mmap() call
- * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
- */
-int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
-{
-	struct qib_ibdev *dev = to_idev(context->device);
-	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-	unsigned long size = vma->vm_end - vma->vm_start;
-	struct qib_mmap_info *ip, *pp;
-	int ret = -EINVAL;
-
-	/*
-	 * Search the device's list of objects waiting for a mmap call.
-	 * Normally, this list is very short since a call to create a
-	 * CQ, QP, or SRQ is soon followed by a call to mmap().
-	 */
-	spin_lock_irq(&dev->pending_lock);
-	list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
-				 pending_mmaps) {
-		/* Only the creator is allowed to mmap the object */
-		if (context != ip->context || (__u64) offset != ip->offset)
-			continue;
-		/* Don't allow a mmap larger than the object. */
-		if (size > ip->size)
-			break;
-
-		list_del_init(&ip->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-
-		ret = remap_vmalloc_range(vma, ip->obj, 0);
-		if (ret)
-			goto done;
-		vma->vm_ops = &qib_vm_ops;
-		vma->vm_private_data = ip;
-		qib_vma_open(vma);
-		goto done;
-	}
-	spin_unlock_irq(&dev->pending_lock);
-done:
-	return ret;
-}
-
-/*
- * Allocate information for qib_mmap
- */
-struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev,
-					   u32 size,
-					   struct ib_ucontext *context,
-					   void *obj) {
-	struct qib_mmap_info *ip;
-
-	ip = kmalloc(sizeof(*ip), GFP_KERNEL);
-	if (!ip)
-		goto bail;
-
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	INIT_LIST_HEAD(&ip->pending_mmaps);
-	ip->size = size;
-	ip->context = context;
-	ip->obj = obj;
-	kref_init(&ip->ref);
-
-bail:
-	return ip;
-}
-
-void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
-			  u32 size, void *obj)
-{
-	size = PAGE_ALIGN(size);
-
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
-
-	ip->size = size;
-	ip->obj = obj;
-}

+ 0 - 490
drivers/infiniband/hw/qib/qib_mr.c

@@ -1,490 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <rdma/ib_umem.h>
-#include <rdma/ib_smi.h>
-
-#include "qib.h"
-
-/* Fast memory region */
-struct qib_fmr {
-	struct ib_fmr ibfmr;
-	struct qib_mregion mr;        /* must be last */
-};
-
-static inline struct qib_fmr *to_ifmr(struct ib_fmr *ibfmr)
-{
-	return container_of(ibfmr, struct qib_fmr, ibfmr);
-}
-
-static int init_qib_mregion(struct qib_mregion *mr, struct ib_pd *pd,
-	int count)
-{
-	int m, i = 0;
-	int rval = 0;
-
-	m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
-	for (; i < m; i++) {
-		mr->map[i] = kzalloc(sizeof(*mr->map[0]), GFP_KERNEL);
-		if (!mr->map[i])
-			goto bail;
-	}
-	mr->mapsz = m;
-	init_completion(&mr->comp);
-	/* count returning the ptr to user */
-	atomic_set(&mr->refcount, 1);
-	mr->pd = pd;
-	mr->max_segs = count;
-out:
-	return rval;
-bail:
-	while (i)
-		kfree(mr->map[--i]);
-	rval = -ENOMEM;
-	goto out;
-}
-
-static void deinit_qib_mregion(struct qib_mregion *mr)
-{
-	int i = mr->mapsz;
-
-	mr->mapsz = 0;
-	while (i)
-		kfree(mr->map[--i]);
-}
-
-
-/**
- * qib_get_dma_mr - get a DMA memory region
- * @pd: protection domain for this memory region
- * @acc: access flags
- *
- * Returns the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the
- * struct ib_dma_mapping_ops functions (see qib_dma.c).
- */
-struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc)
-{
-	struct qib_mr *mr = NULL;
-	struct ib_mr *ret;
-	int rval;
-
-	if (to_ipd(pd)->user) {
-		ret = ERR_PTR(-EPERM);
-		goto bail;
-	}
-
-	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
-	if (!mr) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	rval = init_qib_mregion(&mr->mr, pd, 0);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail;
-	}
-
-
-	rval = qib_alloc_lkey(&mr->mr, 1);
-	if (rval) {
-		ret = ERR_PTR(rval);
-		goto bail_mregion;
-	}
-
-	mr->mr.access_flags = acc;
-	ret = &mr->ibmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_qib_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	goto done;
-}
-
-static struct qib_mr *alloc_mr(int count, struct ib_pd *pd)
-{
-	struct qib_mr *mr;
-	int rval = -ENOMEM;
-	int m;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (count + QIB_SEGSZ - 1) / QIB_SEGSZ;
-	mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL);
-	if (!mr)
-		goto bail;
-
-	rval = init_qib_mregion(&mr->mr, pd, count);
-	if (rval)
-		goto bail;
-
-	rval = qib_alloc_lkey(&mr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	mr->ibmr.lkey = mr->mr.lkey;
-	mr->ibmr.rkey = mr->mr.lkey;
-done:
-	return mr;
-
-bail_mregion:
-	deinit_qib_mregion(&mr->mr);
-bail:
-	kfree(mr);
-	mr = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * qib_reg_user_mr - register a userspace memory region
- * @pd: protection domain for this memory region
- * @start: starting userspace address
- * @length: length of region to register
- * @mr_access_flags: access flags for this memory region
- * @udata: unused by the QLogic_IB driver
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			      u64 virt_addr, int mr_access_flags,
-			      struct ib_udata *udata)
-{
-	struct qib_mr *mr;
-	struct ib_umem *umem;
-	struct scatterlist *sg;
-	int n, m, entry;
-	struct ib_mr *ret;
-
-	if (length == 0) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	umem = ib_umem_get(pd->uobject->context, start, length,
-			   mr_access_flags, 0);
-	if (IS_ERR(umem))
-		return (void *) umem;
-
-	n = umem->nmap;
-
-	mr = alloc_mr(n, pd);
-	if (IS_ERR(mr)) {
-		ret = (struct ib_mr *)mr;
-		ib_umem_release(umem);
-		goto bail;
-	}
-
-	mr->mr.user_base = start;
-	mr->mr.iova = virt_addr;
-	mr->mr.length = length;
-	mr->mr.offset = ib_umem_offset(umem);
-	mr->mr.access_flags = mr_access_flags;
-	mr->umem = umem;
-
-	if (is_power_of_2(umem->page_size))
-		mr->mr.page_shift = ilog2(umem->page_size);
-	m = 0;
-	n = 0;
-	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
-			void *vaddr;
-
-			vaddr = page_address(sg_page(sg));
-			if (!vaddr) {
-				ret = ERR_PTR(-EINVAL);
-				goto bail;
-			}
-			mr->mr.map[m]->segs[n].vaddr = vaddr;
-			mr->mr.map[m]->segs[n].length = umem->page_size;
-			n++;
-			if (n == QIB_SEGSZ) {
-				m++;
-				n = 0;
-			}
-	}
-	ret = &mr->ibmr;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_dereg_mr - unregister and free a memory region
- * @ibmr: the memory region to free
- *
- * Returns 0 on success.
- *
- * Note that this is called to free MRs created by qib_get_dma_mr()
- * or qib_reg_user_mr().
- */
-int qib_dereg_mr(struct ib_mr *ibmr)
-{
-	struct qib_mr *mr = to_imr(ibmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	kfree(mr->pages);
-	qib_free_lkey(&mr->mr);
-
-	qib_put_mr(&mr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&mr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		qib_get_mr(&mr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_qib_mregion(&mr->mr);
-	if (mr->umem)
-		ib_umem_release(mr->umem);
-	kfree(mr);
-out:
-	return ret;
-}
-
-/*
- * Allocate a memory region usable with the
- * IB_WR_REG_MR send work request.
- *
- * Return the memory region on success, otherwise return an errno.
- */
-struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
-			   enum ib_mr_type mr_type,
-			   u32 max_num_sg)
-{
-	struct qib_mr *mr;
-
-	if (mr_type != IB_MR_TYPE_MEM_REG)
-		return ERR_PTR(-EINVAL);
-
-	mr = alloc_mr(max_num_sg, pd);
-	if (IS_ERR(mr))
-		return (struct ib_mr *)mr;
-
-	mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
-	if (!mr->pages)
-		goto err;
-
-	return &mr->ibmr;
-
-err:
-	qib_dereg_mr(&mr->ibmr);
-	return ERR_PTR(-ENOMEM);
-}
-
-static int qib_set_page(struct ib_mr *ibmr, u64 addr)
-{
-	struct qib_mr *mr = to_imr(ibmr);
-
-	if (unlikely(mr->npages == mr->mr.max_segs))
-		return -ENOMEM;
-
-	mr->pages[mr->npages++] = addr;
-
-	return 0;
-}
-
-int qib_map_mr_sg(struct ib_mr *ibmr,
-		  struct scatterlist *sg,
-		  int sg_nents)
-{
-	struct qib_mr *mr = to_imr(ibmr);
-
-	mr->npages = 0;
-
-	return ib_sg_to_pages(ibmr, sg, sg_nents, qib_set_page);
-}
-
-/**
- * qib_alloc_fmr - allocate a fast memory region
- * @pd: the protection domain for this memory region
- * @mr_access_flags: access flags for this memory region
- * @fmr_attr: fast memory region attributes
- *
- * Returns the memory region on success, otherwise returns an errno.
- */
-struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			     struct ib_fmr_attr *fmr_attr)
-{
-	struct qib_fmr *fmr;
-	int m;
-	struct ib_fmr *ret;
-	int rval = -ENOMEM;
-
-	/* Allocate struct plus pointers to first level page tables. */
-	m = (fmr_attr->max_pages + QIB_SEGSZ - 1) / QIB_SEGSZ;
-	fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL);
-	if (!fmr)
-		goto bail;
-
-	rval = init_qib_mregion(&fmr->mr, pd, fmr_attr->max_pages);
-	if (rval)
-		goto bail;
-
-	/*
-	 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey &
-	 * rkey.
-	 */
-	rval = qib_alloc_lkey(&fmr->mr, 0);
-	if (rval)
-		goto bail_mregion;
-	fmr->ibfmr.rkey = fmr->mr.lkey;
-	fmr->ibfmr.lkey = fmr->mr.lkey;
-	/*
-	 * Resources are allocated but no valid mapping (RKEY can't be
-	 * used).
-	 */
-	fmr->mr.access_flags = mr_access_flags;
-	fmr->mr.max_segs = fmr_attr->max_pages;
-	fmr->mr.page_shift = fmr_attr->page_shift;
-
-	ret = &fmr->ibfmr;
-done:
-	return ret;
-
-bail_mregion:
-	deinit_qib_mregion(&fmr->mr);
-bail:
-	kfree(fmr);
-	ret = ERR_PTR(rval);
-	goto done;
-}
-
-/**
- * qib_map_phys_fmr - set up a fast memory region
- * @ibmfr: the fast memory region to set up
- * @page_list: the list of pages to associate with the fast memory region
- * @list_len: the number of pages to associate with the fast memory region
- * @iova: the virtual address of the start of the fast memory region
- *
- * This may be called from interrupt context.
- */
-
-int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		     int list_len, u64 iova)
-{
-	struct qib_fmr *fmr = to_ifmr(ibfmr);
-	struct qib_lkey_table *rkt;
-	unsigned long flags;
-	int m, n, i;
-	u32 ps;
-	int ret;
-
-	i = atomic_read(&fmr->mr.refcount);
-	if (i > 2)
-		return -EBUSY;
-
-	if (list_len > fmr->mr.max_segs) {
-		ret = -EINVAL;
-		goto bail;
-	}
-	rkt = &to_idev(ibfmr->device)->lk_table;
-	spin_lock_irqsave(&rkt->lock, flags);
-	fmr->mr.user_base = iova;
-	fmr->mr.iova = iova;
-	ps = 1 << fmr->mr.page_shift;
-	fmr->mr.length = list_len * ps;
-	m = 0;
-	n = 0;
-	for (i = 0; i < list_len; i++) {
-		fmr->mr.map[m]->segs[n].vaddr = (void *) page_list[i];
-		fmr->mr.map[m]->segs[n].length = ps;
-		if (++n == QIB_SEGSZ) {
-			m++;
-			n = 0;
-		}
-	}
-	spin_unlock_irqrestore(&rkt->lock, flags);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_unmap_fmr - unmap fast memory regions
- * @fmr_list: the list of fast memory regions to unmap
- *
- * Returns 0 on success.
- */
-int qib_unmap_fmr(struct list_head *fmr_list)
-{
-	struct qib_fmr *fmr;
-	struct qib_lkey_table *rkt;
-	unsigned long flags;
-
-	list_for_each_entry(fmr, fmr_list, ibfmr.list) {
-		rkt = &to_idev(fmr->ibfmr.device)->lk_table;
-		spin_lock_irqsave(&rkt->lock, flags);
-		fmr->mr.user_base = 0;
-		fmr->mr.iova = 0;
-		fmr->mr.length = 0;
-		spin_unlock_irqrestore(&rkt->lock, flags);
-	}
-	return 0;
-}
-
-/**
- * qib_dealloc_fmr - deallocate a fast memory region
- * @ibfmr: the fast memory region to deallocate
- *
- * Returns 0 on success.
- */
-int qib_dealloc_fmr(struct ib_fmr *ibfmr)
-{
-	struct qib_fmr *fmr = to_ifmr(ibfmr);
-	int ret = 0;
-	unsigned long timeout;
-
-	qib_free_lkey(&fmr->mr);
-	qib_put_mr(&fmr->mr); /* will set completion if last */
-	timeout = wait_for_completion_timeout(&fmr->mr.comp,
-		5 * HZ);
-	if (!timeout) {
-		qib_get_mr(&fmr->mr);
-		ret = -EBUSY;
-		goto out;
-	}
-	deinit_qib_mregion(&fmr->mr);
-	kfree(fmr);
-out:
-	return ret;
-}
-
-void mr_rcu_callback(struct rcu_head *list)
-{
-	struct qib_mregion *mr = container_of(list, struct qib_mregion, list);
-
-	complete(&mr->comp);
-}

+ 191 - 987
drivers/infiniband/hw/qib/qib_qp.c

@@ -34,32 +34,38 @@
 
 
 #include <linux/err.h>
 #include <linux/err.h>
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
-#include <linux/jhash.h>
+#include <rdma/rdma_vt.h>
 #ifdef CONFIG_DEBUG_FS
 #ifdef CONFIG_DEBUG_FS
 #include <linux/seq_file.h>
 #include <linux/seq_file.h>
 #endif
 #endif
 
 
 #include "qib.h"
 #include "qib.h"
 
 
-#define BITS_PER_PAGE           (PAGE_SIZE*BITS_PER_BYTE)
-#define BITS_PER_PAGE_MASK      (BITS_PER_PAGE-1)
+/*
+ * mask field which was present in now deleted qib_qpn_table
+ * is not present in rvt_qpn_table. Defining the same field
+ * as qpt_mask here instead of adding the mask field to
+ * rvt_qpn_table.
+ */
+u16 qpt_mask;
 
 
-static inline unsigned mk_qpn(struct qib_qpn_table *qpt,
-			      struct qpn_map *map, unsigned off)
+static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
+			      struct rvt_qpn_map *map, unsigned off)
 {
 {
-	return (map - qpt->map) * BITS_PER_PAGE + off;
+	return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
 }
 }
 
 
-static inline unsigned find_next_offset(struct qib_qpn_table *qpt,
-					struct qpn_map *map, unsigned off,
+static inline unsigned find_next_offset(struct rvt_qpn_table *qpt,
+					struct rvt_qpn_map *map, unsigned off,
 					unsigned n)
 					unsigned n)
 {
 {
-	if (qpt->mask) {
+	if (qpt_mask) {
 		off++;
 		off++;
-		if (((off & qpt->mask) >> 1) >= n)
-			off = (off | qpt->mask) + 2;
-	} else
-		off = find_next_zero_bit(map->page, BITS_PER_PAGE, off);
+		if (((off & qpt_mask) >> 1) >= n)
+			off = (off | qpt_mask) + 2;
+	} else {
+		off = find_next_zero_bit(map->page, RVT_BITS_PER_PAGE, off);
+	}
 	return off;
 	return off;
 }
 }
 
 
@@ -100,7 +106,7 @@ static u32 credit_table[31] = {
 	32768                   /* 1E */
 	32768                   /* 1E */
 };
 };
 
 
-static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
+static void get_map_page(struct rvt_qpn_table *qpt, struct rvt_qpn_map *map,
 			 gfp_t gfp)
 			 gfp_t gfp)
 {
 {
 	unsigned long page = get_zeroed_page(gfp);
 	unsigned long page = get_zeroed_page(gfp);
@@ -121,12 +127,15 @@ static void get_map_page(struct qib_qpn_table *qpt, struct qpn_map *map,
  * Allocate the next available QPN or
  * Allocate the next available QPN or
  * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
  * zero/one for QP type IB_QPT_SMI/IB_QPT_GSI.
  */
  */
-static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
-		     enum ib_qp_type type, u8 port, gfp_t gfp)
+int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+		  enum ib_qp_type type, u8 port, gfp_t gfp)
 {
 {
 	u32 i, offset, max_scan, qpn;
 	u32 i, offset, max_scan, qpn;
-	struct qpn_map *map;
+	struct rvt_qpn_map *map;
 	u32 ret;
 	u32 ret;
+	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+					      verbs_dev);
 
 
 	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 	if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
 		unsigned n;
 		unsigned n;
@@ -143,12 +152,12 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
 	}
 	}
 
 
 	qpn = qpt->last + 2;
 	qpn = qpt->last + 2;
-	if (qpn >= QPN_MAX)
+	if (qpn >= RVT_QPN_MAX)
 		qpn = 2;
 		qpn = 2;
-	if (qpt->mask && ((qpn & qpt->mask) >> 1) >= dd->n_krcv_queues)
-		qpn = (qpn | qpt->mask) + 2;
-	offset = qpn & BITS_PER_PAGE_MASK;
-	map = &qpt->map[qpn / BITS_PER_PAGE];
+	if (qpt_mask && ((qpn & qpt_mask) >> 1) >= dd->n_krcv_queues)
+		qpn = (qpn | qpt_mask) + 2;
+	offset = qpn & RVT_BITS_PER_PAGE_MASK;
+	map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
 	max_scan = qpt->nmaps - !offset;
 	max_scan = qpt->nmaps - !offset;
 	for (i = 0;;) {
 	for (i = 0;;) {
 		if (unlikely(!map->page)) {
 		if (unlikely(!map->page)) {
@@ -173,14 +182,14 @@ static int alloc_qpn(struct qib_devdata *dd, struct qib_qpn_table *qpt,
 			 * We just need to be sure we don't loop
 			 * We just need to be sure we don't loop
 			 * forever.
 			 * forever.
 			 */
 			 */
-		} while (offset < BITS_PER_PAGE && qpn < QPN_MAX);
+		} while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
 		/*
 		/*
 		 * In order to keep the number of pages allocated to a
 		 * In order to keep the number of pages allocated to a
 		 * minimum, we scan the all existing pages before increasing
 		 * minimum, we scan the all existing pages before increasing
 		 * the size of the bitmap table.
 		 * the size of the bitmap table.
 		 */
 		 */
 		if (++i > max_scan) {
 		if (++i > max_scan) {
-			if (qpt->nmaps == QPNMAP_ENTRIES)
+			if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
 				break;
 				break;
 			map = &qpt->map[qpt->nmaps++];
 			map = &qpt->map[qpt->nmaps++];
 			offset = 0;
 			offset = 0;
@@ -200,706 +209,113 @@ bail:
 	return ret;
 	return ret;
 }
 }
 
 
-static void free_qpn(struct qib_qpn_table *qpt, u32 qpn)
-{
-	struct qpn_map *map;
-
-	map = qpt->map + qpn / BITS_PER_PAGE;
-	if (map->page)
-		clear_bit(qpn & BITS_PER_PAGE_MASK, map->page);
-}
-
-static inline unsigned qpn_hash(struct qib_ibdev *dev, u32 qpn)
-{
-	return jhash_1word(qpn, dev->qp_rnd) &
-		(dev->qp_table_size - 1);
-}
-
-
-/*
- * Put the QP into the hash table.
- * The hash table holds a reference to the QP.
- */
-static void insert_qp(struct qib_ibdev *dev, struct qib_qp *qp)
-{
-	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	unsigned long flags;
-	unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
-
-	atomic_inc(&qp->refcount);
-	spin_lock_irqsave(&dev->qpt_lock, flags);
-
-	if (qp->ibqp.qp_num == 0)
-		rcu_assign_pointer(ibp->qp0, qp);
-	else if (qp->ibqp.qp_num == 1)
-		rcu_assign_pointer(ibp->qp1, qp);
-	else {
-		qp->next = dev->qp_table[n];
-		rcu_assign_pointer(dev->qp_table[n], qp);
-	}
-
-	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-}
-
-/*
- * Remove the QP from the table so it can't be found asynchronously by
- * the receive interrupt routine.
- */
-static void remove_qp(struct qib_ibdev *dev, struct qib_qp *qp)
-{
-	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	unsigned n = qpn_hash(dev, qp->ibqp.qp_num);
-	unsigned long flags;
-	int removed = 1;
-
-	spin_lock_irqsave(&dev->qpt_lock, flags);
-
-	if (rcu_dereference_protected(ibp->qp0,
-			lockdep_is_held(&dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp0, NULL);
-	} else if (rcu_dereference_protected(ibp->qp1,
-			lockdep_is_held(&dev->qpt_lock)) == qp) {
-		RCU_INIT_POINTER(ibp->qp1, NULL);
-	} else {
-		struct qib_qp *q;
-		struct qib_qp __rcu **qpp;
-
-		removed = 0;
-		qpp = &dev->qp_table[n];
-		for (; (q = rcu_dereference_protected(*qpp,
-				lockdep_is_held(&dev->qpt_lock))) != NULL;
-				qpp = &q->next)
-			if (q == qp) {
-				RCU_INIT_POINTER(*qpp,
-					rcu_dereference_protected(qp->next,
-					 lockdep_is_held(&dev->qpt_lock)));
-				removed = 1;
-				break;
-			}
-	}
-
-	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-	if (removed) {
-		synchronize_rcu();
-		atomic_dec(&qp->refcount);
-	}
-}
-
 /**
 /**
  * qib_free_all_qps - check for QPs still in use
  * qib_free_all_qps - check for QPs still in use
- * @qpt: the QP table to empty
- *
- * There should not be any QPs still in use.
- * Free memory for table.
  */
  */
-unsigned qib_free_all_qps(struct qib_devdata *dd)
+unsigned qib_free_all_qps(struct rvt_dev_info *rdi)
 {
 {
-	struct qib_ibdev *dev = &dd->verbs_dev;
-	unsigned long flags;
-	struct qib_qp *qp;
+	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+					      verbs_dev);
 	unsigned n, qp_inuse = 0;
 	unsigned n, qp_inuse = 0;
 
 
 	for (n = 0; n < dd->num_pports; n++) {
 	for (n = 0; n < dd->num_pports; n++) {
 		struct qib_ibport *ibp = &dd->pport[n].ibport_data;
 		struct qib_ibport *ibp = &dd->pport[n].ibport_data;
 
 
-		if (!qib_mcast_tree_empty(ibp))
-			qp_inuse++;
 		rcu_read_lock();
 		rcu_read_lock();
-		if (rcu_dereference(ibp->qp0))
+		if (rcu_dereference(ibp->rvp.qp[0]))
 			qp_inuse++;
 			qp_inuse++;
-		if (rcu_dereference(ibp->qp1))
+		if (rcu_dereference(ibp->rvp.qp[1]))
 			qp_inuse++;
 			qp_inuse++;
 		rcu_read_unlock();
 		rcu_read_unlock();
 	}
 	}
-
-	spin_lock_irqsave(&dev->qpt_lock, flags);
-	for (n = 0; n < dev->qp_table_size; n++) {
-		qp = rcu_dereference_protected(dev->qp_table[n],
-			lockdep_is_held(&dev->qpt_lock));
-		RCU_INIT_POINTER(dev->qp_table[n], NULL);
-
-		for (; qp; qp = rcu_dereference_protected(qp->next,
-					lockdep_is_held(&dev->qpt_lock)))
-			qp_inuse++;
-	}
-	spin_unlock_irqrestore(&dev->qpt_lock, flags);
-	synchronize_rcu();
-
 	return qp_inuse;
 	return qp_inuse;
 }
 }
 
 
-/**
- * qib_lookup_qpn - return the QP with the given QPN
- * @qpt: the QP table
- * @qpn: the QP number to look up
- *
- * The caller is responsible for decrementing the QP reference count
- * when done.
- */
-struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn)
+void qib_notify_qp_reset(struct rvt_qp *qp)
 {
 {
-	struct qib_qp *qp = NULL;
-
-	rcu_read_lock();
-	if (unlikely(qpn <= 1)) {
-		if (qpn == 0)
-			qp = rcu_dereference(ibp->qp0);
-		else
-			qp = rcu_dereference(ibp->qp1);
-		if (qp)
-			atomic_inc(&qp->refcount);
-	} else {
-		struct qib_ibdev *dev = &ppd_from_ibp(ibp)->dd->verbs_dev;
-		unsigned n = qpn_hash(dev, qpn);
-
-		for (qp = rcu_dereference(dev->qp_table[n]); qp;
-			qp = rcu_dereference(qp->next))
-			if (qp->ibqp.qp_num == qpn) {
-				atomic_inc(&qp->refcount);
-				break;
-			}
-	}
-	rcu_read_unlock();
-	return qp;
-}
-
-/**
- * qib_reset_qp - initialize the QP state to the reset state
- * @qp: the QP to reset
- * @type: the QP type
- */
-static void qib_reset_qp(struct qib_qp *qp, enum ib_qp_type type)
-{
-	qp->remote_qpn = 0;
-	qp->qkey = 0;
-	qp->qp_access_flags = 0;
-	atomic_set(&qp->s_dma_busy, 0);
-	qp->s_flags &= QIB_S_SIGNAL_REQ_WR;
-	qp->s_hdrwords = 0;
-	qp->s_wqe = NULL;
-	qp->s_draining = 0;
-	qp->s_next_psn = 0;
-	qp->s_last_psn = 0;
-	qp->s_sending_psn = 0;
-	qp->s_sending_hpsn = 0;
-	qp->s_psn = 0;
-	qp->r_psn = 0;
-	qp->r_msn = 0;
-	if (type == IB_QPT_RC) {
-		qp->s_state = IB_OPCODE_RC_SEND_LAST;
-		qp->r_state = IB_OPCODE_RC_SEND_LAST;
-	} else {
-		qp->s_state = IB_OPCODE_UC_SEND_LAST;
-		qp->r_state = IB_OPCODE_UC_SEND_LAST;
-	}
-	qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
-	qp->r_nak_state = 0;
-	qp->r_aflags = 0;
-	qp->r_flags = 0;
-	qp->s_head = 0;
-	qp->s_tail = 0;
-	qp->s_cur = 0;
-	qp->s_acked = 0;
-	qp->s_last = 0;
-	qp->s_ssn = 1;
-	qp->s_lsn = 0;
-	qp->s_mig_state = IB_MIG_MIGRATED;
-	memset(qp->s_ack_queue, 0, sizeof(qp->s_ack_queue));
-	qp->r_head_ack_queue = 0;
-	qp->s_tail_ack_queue = 0;
-	qp->s_num_rd_atomic = 0;
-	if (qp->r_rq.wq) {
-		qp->r_rq.wq->head = 0;
-		qp->r_rq.wq->tail = 0;
-	}
-	qp->r_sge.num_sge = 0;
-}
-
-static void clear_mr_refs(struct qib_qp *qp, int clr_sends)
-{
-	unsigned n;
-
-	if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
-		qib_put_ss(&qp->s_rdma_read_sge);
-
-	qib_put_ss(&qp->r_sge);
-
-	if (clr_sends) {
-		while (qp->s_last != qp->s_head) {
-			struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_last);
-			unsigned i;
-
-			for (i = 0; i < wqe->wr.num_sge; i++) {
-				struct qib_sge *sge = &wqe->sg_list[i];
-
-				qib_put_mr(sge->mr);
-			}
-			if (qp->ibqp.qp_type == IB_QPT_UD ||
-			    qp->ibqp.qp_type == IB_QPT_SMI ||
-			    qp->ibqp.qp_type == IB_QPT_GSI)
-				atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
-			if (++qp->s_last >= qp->s_size)
-				qp->s_last = 0;
-		}
-		if (qp->s_rdma_mr) {
-			qib_put_mr(qp->s_rdma_mr);
-			qp->s_rdma_mr = NULL;
-		}
-	}
-
-	if (qp->ibqp.qp_type != IB_QPT_RC)
-		return;
+	struct qib_qp_priv *priv = qp->priv;
 
 
-	for (n = 0; n < ARRAY_SIZE(qp->s_ack_queue); n++) {
-		struct qib_ack_entry *e = &qp->s_ack_queue[n];
-
-		if (e->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST &&
-		    e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
-			e->rdma_sge.mr = NULL;
-		}
-	}
+	atomic_set(&priv->s_dma_busy, 0);
 }
 }
 
 
-/**
- * qib_error_qp - put a QP into the error state
- * @qp: the QP to put into the error state
- * @err: the receive completion error to signal if a RWQE is active
- *
- * Flushes both send and receive work queues.
- * Returns true if last WQE event should be generated.
- * The QP r_lock and s_lock should be held and interrupts disabled.
- * If we are already in error state, just return.
- */
-int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err)
+void qib_notify_error_qp(struct rvt_qp *qp)
 {
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
-	struct ib_wc wc;
-	int ret = 0;
-
-	if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
-		goto bail;
-
-	qp->state = IB_QPS_ERR;
-
-	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
-		del_timer(&qp->s_timer);
-	}
-
-	if (qp->s_flags & QIB_S_ANY_WAIT_SEND)
-		qp->s_flags &= ~QIB_S_ANY_WAIT_SEND;
 
 
-	spin_lock(&dev->pending_lock);
-	if (!list_empty(&qp->iowait) && !(qp->s_flags & QIB_S_BUSY)) {
-		qp->s_flags &= ~QIB_S_ANY_WAIT_IO;
-		list_del_init(&qp->iowait);
+	spin_lock(&dev->rdi.pending_lock);
+	if (!list_empty(&priv->iowait) && !(qp->s_flags & RVT_S_BUSY)) {
+		qp->s_flags &= ~RVT_S_ANY_WAIT_IO;
+		list_del_init(&priv->iowait);
 	}
 	}
-	spin_unlock(&dev->pending_lock);
+	spin_unlock(&dev->rdi.pending_lock);
 
 
-	if (!(qp->s_flags & QIB_S_BUSY)) {
+	if (!(qp->s_flags & RVT_S_BUSY)) {
 		qp->s_hdrwords = 0;
 		qp->s_hdrwords = 0;
 		if (qp->s_rdma_mr) {
 		if (qp->s_rdma_mr) {
-			qib_put_mr(qp->s_rdma_mr);
+			rvt_put_mr(qp->s_rdma_mr);
 			qp->s_rdma_mr = NULL;
 			qp->s_rdma_mr = NULL;
 		}
 		}
-		if (qp->s_tx) {
-			qib_put_txreq(qp->s_tx);
-			qp->s_tx = NULL;
+		if (priv->s_tx) {
+			qib_put_txreq(priv->s_tx);
+			priv->s_tx = NULL;
 		}
 		}
 	}
 	}
-
-	/* Schedule the sending tasklet to drain the send work queue. */
-	if (qp->s_last != qp->s_head)
-		qib_schedule_send(qp);
-
-	clear_mr_refs(qp, 0);
-
-	memset(&wc, 0, sizeof(wc));
-	wc.qp = &qp->ibqp;
-	wc.opcode = IB_WC_RECV;
-
-	if (test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags)) {
-		wc.wr_id = qp->r_wr_id;
-		wc.status = err;
-		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-	}
-	wc.status = IB_WC_WR_FLUSH_ERR;
-
-	if (qp->r_rq.wq) {
-		struct qib_rwq *wq;
-		u32 head;
-		u32 tail;
-
-		spin_lock(&qp->r_rq.lock);
-
-		/* sanity check pointers before trusting them */
-		wq = qp->r_rq.wq;
-		head = wq->head;
-		if (head >= qp->r_rq.size)
-			head = 0;
-		tail = wq->tail;
-		if (tail >= qp->r_rq.size)
-			tail = 0;
-		while (tail != head) {
-			wc.wr_id = get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
-			if (++tail >= qp->r_rq.size)
-				tail = 0;
-			qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
-		}
-		wq->tail = tail;
-
-		spin_unlock(&qp->r_rq.lock);
-	} else if (qp->ibqp.event_handler)
-		ret = 1;
-
-bail:
-	return ret;
 }
 }
 
 
-/**
- * qib_modify_qp - modify the attributes of a queue pair
- * @ibqp: the queue pair who's attributes we're modifying
- * @attr: the new attributes
- * @attr_mask: the mask of attributes to modify
- * @udata: user data for libibverbs.so
- *
- * Returns 0 on success, otherwise returns an errno.
- */
-int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_udata *udata)
+static int mtu_to_enum(u32 mtu)
 {
 {
-	struct qib_ibdev *dev = to_idev(ibqp->device);
-	struct qib_qp *qp = to_iqp(ibqp);
-	enum ib_qp_state cur_state, new_state;
-	struct ib_event ev;
-	int lastwqe = 0;
-	int mig = 0;
-	int ret;
-	u32 pmtu = 0; /* for gcc warning only */
-
-	spin_lock_irq(&qp->r_lock);
-	spin_lock(&qp->s_lock);
-
-	cur_state = attr_mask & IB_QP_CUR_STATE ?
-		attr->cur_qp_state : qp->state;
-	new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
-
-	if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
-				attr_mask, IB_LINK_LAYER_UNSPECIFIED))
-		goto inval;
-
-	if (attr_mask & IB_QP_AV) {
-		if (attr->ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
-			goto inval;
-		if (qib_check_ah(qp->ibqp.device, &attr->ah_attr))
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_ALT_PATH) {
-		if (attr->alt_ah_attr.dlid >= QIB_MULTICAST_LID_BASE)
-			goto inval;
-		if (qib_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
-			goto inval;
-		if (attr->alt_pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		if (attr->pkey_index >= qib_get_npkeys(dd_from_dev(dev)))
-			goto inval;
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		if (attr->min_rnr_timer > 31)
-			goto inval;
-
-	if (attr_mask & IB_QP_PORT)
-		if (qp->ibqp.qp_type == IB_QPT_SMI ||
-		    qp->ibqp.qp_type == IB_QPT_GSI ||
-		    attr->port_num == 0 ||
-		    attr->port_num > ibqp->device->phys_port_cnt)
-			goto inval;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		if (attr->dest_qp_num > QIB_QPN_MASK)
-			goto inval;
-
-	if (attr_mask & IB_QP_RETRY_CNT)
-		if (attr->retry_cnt > 7)
-			goto inval;
-
-	if (attr_mask & IB_QP_RNR_RETRY)
-		if (attr->rnr_retry > 7)
-			goto inval;
-
-	/*
-	 * Don't allow invalid path_mtu values.  OK to set greater
-	 * than the active mtu (or even the max_cap, if we have tuned
-	 * that to a small mtu.  We'll set qp->path_mtu
-	 * to the lesser of requested attribute mtu and active,
-	 * for packetizing messages.
-	 * Note that the QP port has to be set in INIT and MTU in RTR.
-	 */
-	if (attr_mask & IB_QP_PATH_MTU) {
-		struct qib_devdata *dd = dd_from_dev(dev);
-		int mtu, pidx = qp->port_num - 1;
-
-		mtu = ib_mtu_enum_to_int(attr->path_mtu);
-		if (mtu == -1)
-			goto inval;
-		if (mtu > dd->pport[pidx].ibmtu) {
-			switch (dd->pport[pidx].ibmtu) {
-			case 4096:
-				pmtu = IB_MTU_4096;
-				break;
-			case 2048:
-				pmtu = IB_MTU_2048;
-				break;
-			case 1024:
-				pmtu = IB_MTU_1024;
-				break;
-			case 512:
-				pmtu = IB_MTU_512;
-				break;
-			case 256:
-				pmtu = IB_MTU_256;
-				break;
-			default:
-				pmtu = IB_MTU_2048;
-			}
-		} else
-			pmtu = attr->path_mtu;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		if (attr->path_mig_state == IB_MIG_REARM) {
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				goto inval;
-			if (new_state != IB_QPS_RTS)
-				goto inval;
-		} else if (attr->path_mig_state == IB_MIG_MIGRATED) {
-			if (qp->s_mig_state == IB_MIG_REARM)
-				goto inval;
-			if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
-				goto inval;
-			if (qp->s_mig_state == IB_MIG_ARMED)
-				mig = 1;
-		} else
-			goto inval;
-	}
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		if (attr->max_dest_rd_atomic > QIB_MAX_RDMA_ATOMIC)
-			goto inval;
+	int enum_mtu;
 
 
-	switch (new_state) {
-	case IB_QPS_RESET:
-		if (qp->state != IB_QPS_RESET) {
-			qp->state = IB_QPS_RESET;
-			spin_lock(&dev->pending_lock);
-			if (!list_empty(&qp->iowait))
-				list_del_init(&qp->iowait);
-			spin_unlock(&dev->pending_lock);
-			qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
-			spin_unlock(&qp->s_lock);
-			spin_unlock_irq(&qp->r_lock);
-			/* Stop the sending work queue and retry timer */
-			cancel_work_sync(&qp->s_work);
-			del_timer_sync(&qp->s_timer);
-			wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-			if (qp->s_tx) {
-				qib_put_txreq(qp->s_tx);
-				qp->s_tx = NULL;
-			}
-			remove_qp(dev, qp);
-			wait_event(qp->wait, !atomic_read(&qp->refcount));
-			spin_lock_irq(&qp->r_lock);
-			spin_lock(&qp->s_lock);
-			clear_mr_refs(qp, 1);
-			qib_reset_qp(qp, ibqp->qp_type);
-		}
+	switch (mtu) {
+	case 4096:
+		enum_mtu = IB_MTU_4096;
 		break;
 		break;
-
-	case IB_QPS_RTR:
-		/* Allow event to retrigger if QP set to RTR more than once */
-		qp->r_flags &= ~QIB_R_COMM_EST;
-		qp->state = new_state;
+	case 2048:
+		enum_mtu = IB_MTU_2048;
 		break;
 		break;
-
-	case IB_QPS_SQD:
-		qp->s_draining = qp->s_last != qp->s_cur;
-		qp->state = new_state;
+	case 1024:
+		enum_mtu = IB_MTU_1024;
 		break;
 		break;
-
-	case IB_QPS_SQE:
-		if (qp->ibqp.qp_type == IB_QPT_RC)
-			goto inval;
-		qp->state = new_state;
+	case 512:
+		enum_mtu = IB_MTU_512;
 		break;
 		break;
-
-	case IB_QPS_ERR:
-		lastwqe = qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+	case 256:
+		enum_mtu = IB_MTU_256;
 		break;
 		break;
-
 	default:
 	default:
-		qp->state = new_state;
-		break;
-	}
-
-	if (attr_mask & IB_QP_PKEY_INDEX)
-		qp->s_pkey_index = attr->pkey_index;
-
-	if (attr_mask & IB_QP_PORT)
-		qp->port_num = attr->port_num;
-
-	if (attr_mask & IB_QP_DEST_QPN)
-		qp->remote_qpn = attr->dest_qp_num;
-
-	if (attr_mask & IB_QP_SQ_PSN) {
-		qp->s_next_psn = attr->sq_psn & QIB_PSN_MASK;
-		qp->s_psn = qp->s_next_psn;
-		qp->s_sending_psn = qp->s_next_psn;
-		qp->s_last_psn = qp->s_next_psn - 1;
-		qp->s_sending_hpsn = qp->s_last_psn;
-	}
-
-	if (attr_mask & IB_QP_RQ_PSN)
-		qp->r_psn = attr->rq_psn & QIB_PSN_MASK;
-
-	if (attr_mask & IB_QP_ACCESS_FLAGS)
-		qp->qp_access_flags = attr->qp_access_flags;
-
-	if (attr_mask & IB_QP_AV) {
-		qp->remote_ah_attr = attr->ah_attr;
-		qp->s_srate = attr->ah_attr.static_rate;
-	}
-
-	if (attr_mask & IB_QP_ALT_PATH) {
-		qp->alt_ah_attr = attr->alt_ah_attr;
-		qp->s_alt_pkey_index = attr->alt_pkey_index;
-	}
-
-	if (attr_mask & IB_QP_PATH_MIG_STATE) {
-		qp->s_mig_state = attr->path_mig_state;
-		if (mig) {
-			qp->remote_ah_attr = qp->alt_ah_attr;
-			qp->port_num = qp->alt_ah_attr.port_num;
-			qp->s_pkey_index = qp->s_alt_pkey_index;
-		}
-	}
-
-	if (attr_mask & IB_QP_PATH_MTU) {
-		qp->path_mtu = pmtu;
-		qp->pmtu = ib_mtu_enum_to_int(pmtu);
-	}
-
-	if (attr_mask & IB_QP_RETRY_CNT) {
-		qp->s_retry_cnt = attr->retry_cnt;
-		qp->s_retry = attr->retry_cnt;
-	}
-
-	if (attr_mask & IB_QP_RNR_RETRY) {
-		qp->s_rnr_retry_cnt = attr->rnr_retry;
-		qp->s_rnr_retry = attr->rnr_retry;
+		enum_mtu = IB_MTU_2048;
 	}
 	}
-
-	if (attr_mask & IB_QP_MIN_RNR_TIMER)
-		qp->r_min_rnr_timer = attr->min_rnr_timer;
-
-	if (attr_mask & IB_QP_TIMEOUT) {
-		qp->timeout = attr->timeout;
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-	}
-
-	if (attr_mask & IB_QP_QKEY)
-		qp->qkey = attr->qkey;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-		qp->s_max_rd_atomic = attr->max_rd_atomic;
-
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-
-	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
-		insert_qp(dev, qp);
-
-	if (lastwqe) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	if (mig) {
-		ev.device = qp->ibqp.device;
-		ev.element.qp = &qp->ibqp;
-		ev.event = IB_EVENT_PATH_MIG;
-		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
-	}
-	ret = 0;
-	goto bail;
-
-inval:
-	spin_unlock(&qp->s_lock);
-	spin_unlock_irq(&qp->r_lock);
-	ret = -EINVAL;
-
-bail:
-	return ret;
+	return enum_mtu;
 }
 }
 
 
-int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		 int attr_mask, struct ib_qp_init_attr *init_attr)
+int qib_get_pmtu_from_attr(struct rvt_dev_info *rdi, struct rvt_qp *qp,
+			   struct ib_qp_attr *attr)
 {
 {
-	struct qib_qp *qp = to_iqp(ibqp);
+	int mtu, pmtu, pidx = qp->port_num - 1;
+	struct qib_ibdev *verbs_dev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = container_of(verbs_dev, struct qib_devdata,
+					      verbs_dev);
+	mtu = ib_mtu_enum_to_int(attr->path_mtu);
+	if (mtu == -1)
+		return -EINVAL;
+
+	if (mtu > dd->pport[pidx].ibmtu)
+		pmtu = mtu_to_enum(dd->pport[pidx].ibmtu);
+	else
+		pmtu = attr->path_mtu;
+	return pmtu;
+}
 
 
-	attr->qp_state = qp->state;
-	attr->cur_qp_state = attr->qp_state;
-	attr->path_mtu = qp->path_mtu;
-	attr->path_mig_state = qp->s_mig_state;
-	attr->qkey = qp->qkey;
-	attr->rq_psn = qp->r_psn & QIB_PSN_MASK;
-	attr->sq_psn = qp->s_next_psn & QIB_PSN_MASK;
-	attr->dest_qp_num = qp->remote_qpn;
-	attr->qp_access_flags = qp->qp_access_flags;
-	attr->cap.max_send_wr = qp->s_size - 1;
-	attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
-	attr->cap.max_send_sge = qp->s_max_sge;
-	attr->cap.max_recv_sge = qp->r_rq.max_sge;
-	attr->cap.max_inline_data = 0;
-	attr->ah_attr = qp->remote_ah_attr;
-	attr->alt_ah_attr = qp->alt_ah_attr;
-	attr->pkey_index = qp->s_pkey_index;
-	attr->alt_pkey_index = qp->s_alt_pkey_index;
-	attr->en_sqd_async_notify = 0;
-	attr->sq_draining = qp->s_draining;
-	attr->max_rd_atomic = qp->s_max_rd_atomic;
-	attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
-	attr->min_rnr_timer = qp->r_min_rnr_timer;
-	attr->port_num = qp->port_num;
-	attr->timeout = qp->timeout;
-	attr->retry_cnt = qp->s_retry_cnt;
-	attr->rnr_retry = qp->s_rnr_retry_cnt;
-	attr->alt_port_num = qp->alt_ah_attr.port_num;
-	attr->alt_timeout = qp->alt_timeout;
+int qib_mtu_to_path_mtu(u32 mtu)
+{
+	return mtu_to_enum(mtu);
+}
 
 
-	init_attr->event_handler = qp->ibqp.event_handler;
-	init_attr->qp_context = qp->ibqp.qp_context;
-	init_attr->send_cq = qp->ibqp.send_cq;
-	init_attr->recv_cq = qp->ibqp.recv_cq;
-	init_attr->srq = qp->ibqp.srq;
-	init_attr->cap = attr->cap;
-	if (qp->s_flags & QIB_S_SIGNAL_REQ_WR)
-		init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
-	else
-		init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
-	init_attr->qp_type = qp->ibqp.qp_type;
-	init_attr->port_num = qp->port_num;
-	return 0;
+u32 qib_mtu_from_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp, u32 pmtu)
+{
+	return ib_mtu_enum_to_int(pmtu);
 }
 }
 
 
 /**
 /**
@@ -908,7 +324,7 @@ int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
  *
  *
  * Returns the AETH.
  * Returns the AETH.
  */
  */
-__be32 qib_compute_aeth(struct qib_qp *qp)
+__be32 qib_compute_aeth(struct rvt_qp *qp)
 {
 {
 	u32 aeth = qp->r_msn & QIB_MSN_MASK;
 	u32 aeth = qp->r_msn & QIB_MSN_MASK;
 
 
@@ -921,7 +337,7 @@ __be32 qib_compute_aeth(struct qib_qp *qp)
 	} else {
 	} else {
 		u32 min, max, x;
 		u32 min, max, x;
 		u32 credits;
 		u32 credits;
-		struct qib_rwq *wq = qp->r_rq.wq;
+		struct rvt_rwq *wq = qp->r_rq.wq;
 		u32 head;
 		u32 head;
 		u32 tail;
 		u32 tail;
 
 
@@ -962,315 +378,63 @@ __be32 qib_compute_aeth(struct qib_qp *qp)
 	return cpu_to_be32(aeth);
 	return cpu_to_be32(aeth);
 }
 }
 
 
-/**
- * qib_create_qp - create a queue pair for a device
- * @ibpd: the protection domain who's device we create the queue pair for
- * @init_attr: the attributes of the queue pair
- * @udata: user data for libibverbs.so
- *
- * Returns the queue pair on success, otherwise returns an errno.
- *
- * Called by the ib_create_qp() core verbs function.
- */
-struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata)
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp)
 {
 {
-	struct qib_qp *qp;
-	int err;
-	struct qib_swqe *swq = NULL;
-	struct qib_ibdev *dev;
-	struct qib_devdata *dd;
-	size_t sz;
-	size_t sg_list_sz;
-	struct ib_qp *ret;
-	gfp_t gfp;
+	struct qib_qp_priv *priv;
 
 
+	priv = kzalloc(sizeof(*priv), gfp);
+	if (!priv)
+		return ERR_PTR(-ENOMEM);
+	priv->owner = qp;
 
 
-	if (init_attr->cap.max_send_sge > ib_qib_max_sges ||
-	    init_attr->cap.max_send_wr > ib_qib_max_qp_wrs ||
-	    init_attr->create_flags & ~(IB_QP_CREATE_USE_GFP_NOIO))
-		return ERR_PTR(-EINVAL);
-
-	/* GFP_NOIO is applicable in RC QPs only */
-	if (init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO &&
-	    init_attr->qp_type != IB_QPT_RC)
-		return ERR_PTR(-EINVAL);
-
-	gfp = init_attr->create_flags & IB_QP_CREATE_USE_GFP_NOIO ?
-			GFP_NOIO : GFP_KERNEL;
-
-	/* Check receive queue parameters if no SRQ is specified. */
-	if (!init_attr->srq) {
-		if (init_attr->cap.max_recv_sge > ib_qib_max_sges ||
-		    init_attr->cap.max_recv_wr > ib_qib_max_qp_wrs) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-		if (init_attr->cap.max_send_sge +
-		    init_attr->cap.max_send_wr +
-		    init_attr->cap.max_recv_sge +
-		    init_attr->cap.max_recv_wr == 0) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
+	priv->s_hdr = kzalloc(sizeof(*priv->s_hdr), gfp);
+	if (!priv->s_hdr) {
+		kfree(priv);
+		return ERR_PTR(-ENOMEM);
 	}
 	}
+	init_waitqueue_head(&priv->wait_dma);
+	INIT_WORK(&priv->s_work, _qib_do_send);
+	INIT_LIST_HEAD(&priv->iowait);
 
 
-	switch (init_attr->qp_type) {
-	case IB_QPT_SMI:
-	case IB_QPT_GSI:
-		if (init_attr->port_num == 0 ||
-		    init_attr->port_num > ibpd->device->phys_port_cnt) {
-			ret = ERR_PTR(-EINVAL);
-			goto bail;
-		}
-	case IB_QPT_UC:
-	case IB_QPT_RC:
-	case IB_QPT_UD:
-		sz = sizeof(struct qib_sge) *
-			init_attr->cap.max_send_sge +
-			sizeof(struct qib_swqe);
-		swq = __vmalloc((init_attr->cap.max_send_wr + 1) * sz,
-				gfp, PAGE_KERNEL);
-		if (swq == NULL) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail;
-		}
-		sz = sizeof(*qp);
-		sg_list_sz = 0;
-		if (init_attr->srq) {
-			struct qib_srq *srq = to_isrq(init_attr->srq);
-
-			if (srq->rq.max_sge > 1)
-				sg_list_sz = sizeof(*qp->r_sg_list) *
-					(srq->rq.max_sge - 1);
-		} else if (init_attr->cap.max_recv_sge > 1)
-			sg_list_sz = sizeof(*qp->r_sg_list) *
-				(init_attr->cap.max_recv_sge - 1);
-		qp = kzalloc(sz + sg_list_sz, gfp);
-		if (!qp) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_swq;
-		}
-		RCU_INIT_POINTER(qp->next, NULL);
-		qp->s_hdr = kzalloc(sizeof(*qp->s_hdr), gfp);
-		if (!qp->s_hdr) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_qp;
-		}
-		qp->timeout_jiffies =
-			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
-				1000UL);
-		if (init_attr->srq)
-			sz = 0;
-		else {
-			qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
-			qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
-			sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
-				sizeof(struct qib_rwqe);
-			if (gfp != GFP_NOIO)
-				qp->r_rq.wq = vmalloc_user(
-						sizeof(struct qib_rwq) +
-						qp->r_rq.size * sz);
-			else
-				qp->r_rq.wq = __vmalloc(
-						sizeof(struct qib_rwq) +
-						qp->r_rq.size * sz,
-						gfp, PAGE_KERNEL);
-
-			if (!qp->r_rq.wq) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_qp;
-			}
-		}
-
-		/*
-		 * ib_create_qp() will initialize qp->ibqp
-		 * except for qp->ibqp.qp_num.
-		 */
-		spin_lock_init(&qp->r_lock);
-		spin_lock_init(&qp->s_lock);
-		spin_lock_init(&qp->r_rq.lock);
-		atomic_set(&qp->refcount, 0);
-		init_waitqueue_head(&qp->wait);
-		init_waitqueue_head(&qp->wait_dma);
-		init_timer(&qp->s_timer);
-		qp->s_timer.data = (unsigned long)qp;
-		INIT_WORK(&qp->s_work, qib_do_send);
-		INIT_LIST_HEAD(&qp->iowait);
-		INIT_LIST_HEAD(&qp->rspwait);
-		qp->state = IB_QPS_RESET;
-		qp->s_wq = swq;
-		qp->s_size = init_attr->cap.max_send_wr + 1;
-		qp->s_max_sge = init_attr->cap.max_send_sge;
-		if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
-			qp->s_flags = QIB_S_SIGNAL_REQ_WR;
-		dev = to_idev(ibpd->device);
-		dd = dd_from_dev(dev);
-		err = alloc_qpn(dd, &dev->qpn_table, init_attr->qp_type,
-				init_attr->port_num, gfp);
-		if (err < 0) {
-			ret = ERR_PTR(err);
-			vfree(qp->r_rq.wq);
-			goto bail_qp;
-		}
-		qp->ibqp.qp_num = err;
-		qp->port_num = init_attr->port_num;
-		qib_reset_qp(qp, init_attr->qp_type);
-		break;
-
-	default:
-		/* Don't support raw QPs */
-		ret = ERR_PTR(-ENOSYS);
-		goto bail;
-	}
-
-	init_attr->cap.max_inline_data = 0;
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See qib_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		if (!qp->r_rq.wq) {
-			__u64 offset = 0;
-
-			err = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		} else {
-			u32 s = sizeof(struct qib_rwq) + qp->r_rq.size * sz;
-
-			qp->ip = qib_create_mmap_info(dev, s,
-						      ibpd->uobject->context,
-						      qp->r_rq.wq);
-			if (!qp->ip) {
-				ret = ERR_PTR(-ENOMEM);
-				goto bail_ip;
-			}
-
-			err = ib_copy_to_udata(udata, &(qp->ip->offset),
-					       sizeof(qp->ip->offset));
-			if (err) {
-				ret = ERR_PTR(err);
-				goto bail_ip;
-			}
-		}
-	}
-
-	spin_lock(&dev->n_qps_lock);
-	if (dev->n_qps_allocated == ib_qib_max_qps) {
-		spin_unlock(&dev->n_qps_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_qps_allocated++;
-	spin_unlock(&dev->n_qps_lock);
-
-	if (qp->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&qp->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &qp->ibqp;
-	goto bail;
-
-bail_ip:
-	if (qp->ip)
-		kref_put(&qp->ip->ref, qib_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
-bail_qp:
-	kfree(qp->s_hdr);
-	kfree(qp);
-bail_swq:
-	vfree(swq);
-bail:
-	return ret;
+	return priv;
 }
 }
 
 
-/**
- * qib_destroy_qp - destroy a queue pair
- * @ibqp: the queue pair to destroy
- *
- * Returns 0 on success.
- *
- * Note that this can be called while the QP is actively sending or
- * receiving!
- */
-int qib_destroy_qp(struct ib_qp *ibqp)
+void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp)
 {
 {
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_ibdev *dev = to_idev(ibqp->device);
+	struct qib_qp_priv *priv = qp->priv;
 
 
-	/* Make sure HW and driver activity is stopped. */
-	spin_lock_irq(&qp->s_lock);
-	if (qp->state != IB_QPS_RESET) {
-		qp->state = IB_QPS_RESET;
-		spin_lock(&dev->pending_lock);
-		if (!list_empty(&qp->iowait))
-			list_del_init(&qp->iowait);
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_ANY_WAIT);
-		spin_unlock_irq(&qp->s_lock);
-		cancel_work_sync(&qp->s_work);
-		del_timer_sync(&qp->s_timer);
-		wait_event(qp->wait_dma, !atomic_read(&qp->s_dma_busy));
-		if (qp->s_tx) {
-			qib_put_txreq(qp->s_tx);
-			qp->s_tx = NULL;
-		}
-		remove_qp(dev, qp);
-		wait_event(qp->wait, !atomic_read(&qp->refcount));
-		clear_mr_refs(qp, 1);
-	} else
-		spin_unlock_irq(&qp->s_lock);
+	kfree(priv->s_hdr);
+	kfree(priv);
+}
 
 
-	/* all user's cleaned up, mark it available */
-	free_qpn(&dev->qpn_table, qp->ibqp.qp_num);
-	spin_lock(&dev->n_qps_lock);
-	dev->n_qps_allocated--;
-	spin_unlock(&dev->n_qps_lock);
+void qib_stop_send_queue(struct rvt_qp *qp)
+{
+	struct qib_qp_priv *priv = qp->priv;
 
 
-	if (qp->ip)
-		kref_put(&qp->ip->ref, qib_release_mmap_info);
-	else
-		vfree(qp->r_rq.wq);
-	vfree(qp->s_wq);
-	kfree(qp->s_hdr);
-	kfree(qp);
-	return 0;
+	cancel_work_sync(&priv->s_work);
+	del_timer_sync(&qp->s_timer);
 }
 }
 
 
-/**
- * qib_init_qpn_table - initialize the QP number table for a device
- * @qpt: the QPN table
- */
-void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt)
+void qib_quiesce_qp(struct rvt_qp *qp)
 {
 {
-	spin_lock_init(&qpt->lock);
-	qpt->last = 1;          /* start with QPN 2 */
-	qpt->nmaps = 1;
-	qpt->mask = dd->qpn_mask;
+	struct qib_qp_priv *priv = qp->priv;
+
+	wait_event(priv->wait_dma, !atomic_read(&priv->s_dma_busy));
+	if (priv->s_tx) {
+		qib_put_txreq(priv->s_tx);
+		priv->s_tx = NULL;
+	}
 }
 }
 
 
-/**
- * qib_free_qpn_table - free the QP number table for a device
- * @qpt: the QPN table
- */
-void qib_free_qpn_table(struct qib_qpn_table *qpt)
+void qib_flush_qp_waiters(struct rvt_qp *qp)
 {
 {
-	int i;
+	struct qib_qp_priv *priv = qp->priv;
+	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 
 
-	for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
-		if (qpt->map[i].page)
-			free_page((unsigned long) qpt->map[i].page);
+	spin_lock(&dev->rdi.pending_lock);
+	if (!list_empty(&priv->iowait))
+		list_del_init(&priv->iowait);
+	spin_unlock(&dev->rdi.pending_lock);
 }
 }
 
 
 /**
 /**
@@ -1280,7 +444,7 @@ void qib_free_qpn_table(struct qib_qpn_table *qpt)
  *
  *
  * The QP s_lock should be held.
  * The QP s_lock should be held.
  */
  */
-void qib_get_credit(struct qib_qp *qp, u32 aeth)
+void qib_get_credit(struct rvt_qp *qp, u32 aeth)
 {
 {
 	u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK;
 	u32 credit = (aeth >> QIB_AETH_CREDIT_SHIFT) & QIB_AETH_CREDIT_MASK;
 
 
@@ -1290,31 +454,70 @@ void qib_get_credit(struct qib_qp *qp, u32 aeth)
 	 * honor the credit field.
 	 * honor the credit field.
 	 */
 	 */
 	if (credit == QIB_AETH_CREDIT_INVAL) {
 	if (credit == QIB_AETH_CREDIT_INVAL) {
-		if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
-			qp->s_flags |= QIB_S_UNLIMITED_CREDIT;
-			if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+		if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
+			qp->s_flags |= RVT_S_UNLIMITED_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				qib_schedule_send(qp);
 				qib_schedule_send(qp);
 			}
 			}
 		}
 		}
-	} else if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT)) {
+	} else if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT)) {
 		/* Compute new LSN (i.e., MSN + credit) */
 		/* Compute new LSN (i.e., MSN + credit) */
 		credit = (aeth + credit_table[credit]) & QIB_MSN_MASK;
 		credit = (aeth + credit_table[credit]) & QIB_MSN_MASK;
 		if (qib_cmp24(credit, qp->s_lsn) > 0) {
 		if (qib_cmp24(credit, qp->s_lsn) > 0) {
 			qp->s_lsn = credit;
 			qp->s_lsn = credit;
-			if (qp->s_flags & QIB_S_WAIT_SSN_CREDIT) {
-				qp->s_flags &= ~QIB_S_WAIT_SSN_CREDIT;
+			if (qp->s_flags & RVT_S_WAIT_SSN_CREDIT) {
+				qp->s_flags &= ~RVT_S_WAIT_SSN_CREDIT;
 				qib_schedule_send(qp);
 				qib_schedule_send(qp);
 			}
 			}
 		}
 		}
 	}
 	}
 }
 }
 
 
+/**
+ * qib_check_send_wqe - validate wr/wqe
+ * @qp - The qp
+ * @wqe - The built wqe
+ *
+ * validate wr/wqe.  This is called
+ * prior to inserting the wqe into
+ * the ring but after the wqe has been
+ * setup.
+ *
+ * Returns 1 to force direct progress, 0 otherwise, -EINVAL on failure
+ */
+int qib_check_send_wqe(struct rvt_qp *qp,
+		       struct rvt_swqe *wqe)
+{
+	struct rvt_ah *ah;
+	int ret = 0;
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+	case IB_QPT_UC:
+		if (wqe->length > 0x80000000U)
+			return -EINVAL;
+		break;
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+	case IB_QPT_UD:
+		ah = ibah_to_rvtah(wqe->ud_wr.ah);
+		if (wqe->length > (1 << ah->log_pmtu))
+			return -EINVAL;
+		/* progress hint */
+		ret = 1;
+		break;
+	default:
+		break;
+	}
+	return ret;
+}
+
 #ifdef CONFIG_DEBUG_FS
 #ifdef CONFIG_DEBUG_FS
 
 
 struct qib_qp_iter {
 struct qib_qp_iter {
 	struct qib_ibdev *dev;
 	struct qib_ibdev *dev;
-	struct qib_qp *qp;
+	struct rvt_qp *qp;
 	int n;
 	int n;
 };
 };
 
 
@@ -1340,14 +543,14 @@ int qib_qp_iter_next(struct qib_qp_iter *iter)
 	struct qib_ibdev *dev = iter->dev;
 	struct qib_ibdev *dev = iter->dev;
 	int n = iter->n;
 	int n = iter->n;
 	int ret = 1;
 	int ret = 1;
-	struct qib_qp *pqp = iter->qp;
-	struct qib_qp *qp;
+	struct rvt_qp *pqp = iter->qp;
+	struct rvt_qp *qp;
 
 
-	for (; n < dev->qp_table_size; n++) {
+	for (; n < dev->rdi.qp_dev->qp_table_size; n++) {
 		if (pqp)
 		if (pqp)
 			qp = rcu_dereference(pqp->next);
 			qp = rcu_dereference(pqp->next);
 		else
 		else
-			qp = rcu_dereference(dev->qp_table[n]);
+			qp = rcu_dereference(dev->rdi.qp_dev->qp_table[n]);
 		pqp = qp;
 		pqp = qp;
 		if (qp) {
 		if (qp) {
 			iter->qp = qp;
 			iter->qp = qp;
@@ -1364,10 +567,11 @@ static const char * const qp_type_str[] = {
 
 
 void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
 void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
 {
 {
-	struct qib_swqe *wqe;
-	struct qib_qp *qp = iter->qp;
+	struct rvt_swqe *wqe;
+	struct rvt_qp *qp = iter->qp;
+	struct qib_qp_priv *priv = qp->priv;
 
 
-	wqe = get_swqe_ptr(qp, qp->s_last);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 	seq_printf(s,
 	seq_printf(s,
 		   "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
 		   "N %d QP%u %s %u %u %u f=%x %u %u %u %u %u PSN %x %x %x %x %x (%u %u %u %u %u %u) QP%u LID %x\n",
 		   iter->n,
 		   iter->n,
@@ -1377,8 +581,8 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter)
 		   wqe->wr.opcode,
 		   wqe->wr.opcode,
 		   qp->s_hdrwords,
 		   qp->s_hdrwords,
 		   qp->s_flags,
 		   qp->s_flags,
-		   atomic_read(&qp->s_dma_busy),
-		   !list_empty(&qp->iowait),
+		   atomic_read(&priv->s_dma_busy),
+		   !list_empty(&priv->iowait),
 		   qp->timeout,
 		   qp->timeout,
 		   wqe->ssn,
 		   wqe->ssn,
 		   qp->s_lsn,
 		   qp->s_lsn,

+ 198 - 211
drivers/infiniband/hw/qib/qib_rc.c

@@ -40,7 +40,7 @@
 
 
 static void rc_timeout(unsigned long arg);
 static void rc_timeout(unsigned long arg);
 
 
-static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
+static u32 restart_sge(struct rvt_sge_state *ss, struct rvt_swqe *wqe,
 		       u32 psn, u32 pmtu)
 		       u32 psn, u32 pmtu)
 {
 {
 	u32 len;
 	u32 len;
@@ -54,9 +54,9 @@ static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
 	return wqe->length - len;
 	return wqe->length - len;
 }
 }
 
 
-static void start_timer(struct qib_qp *qp)
+static void start_timer(struct rvt_qp *qp)
 {
 {
-	qp->s_flags |= QIB_S_TIMER;
+	qp->s_flags |= RVT_S_TIMER;
 	qp->s_timer.function = rc_timeout;
 	qp->s_timer.function = rc_timeout;
 	/* 4.096 usec. * (1 << qp->timeout) */
 	/* 4.096 usec. * (1 << qp->timeout) */
 	qp->s_timer.expires = jiffies + qp->timeout_jiffies;
 	qp->s_timer.expires = jiffies + qp->timeout_jiffies;
@@ -74,17 +74,17 @@ static void start_timer(struct qib_qp *qp)
  * Note that we are in the responder's side of the QP context.
  * Note that we are in the responder's side of the QP context.
  * Note the QP s_lock must be held.
  * Note the QP s_lock must be held.
  */
  */
-static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
+static int qib_make_rc_ack(struct qib_ibdev *dev, struct rvt_qp *qp,
 			   struct qib_other_headers *ohdr, u32 pmtu)
 			   struct qib_other_headers *ohdr, u32 pmtu)
 {
 {
-	struct qib_ack_entry *e;
+	struct rvt_ack_entry *e;
 	u32 hwords;
 	u32 hwords;
 	u32 len;
 	u32 len;
 	u32 bth0;
 	u32 bth0;
 	u32 bth2;
 	u32 bth2;
 
 
 	/* Don't send an ACK if we aren't supposed to. */
 	/* Don't send an ACK if we aren't supposed to. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto bail;
 		goto bail;
 
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
@@ -95,7 +95,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
 	case OP(RDMA_READ_RESPONSE_ONLY):
 	case OP(RDMA_READ_RESPONSE_ONLY):
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
 		if (e->rdma_sge.mr) {
 		if (e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 			e->rdma_sge.mr = NULL;
 		}
 		}
 		/* FALLTHROUGH */
 		/* FALLTHROUGH */
@@ -112,7 +112,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
 	case OP(ACKNOWLEDGE):
 	case OP(ACKNOWLEDGE):
 		/* Check for no next entry in the queue. */
 		/* Check for no next entry in the queue. */
 		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
 		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
-			if (qp->s_flags & QIB_S_ACK_PENDING)
+			if (qp->s_flags & RVT_S_ACK_PENDING)
 				goto normal;
 				goto normal;
 			goto bail;
 			goto bail;
 		}
 		}
@@ -133,7 +133,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
 			/* Copy SGE state in case we need to resend */
 			/* Copy SGE state in case we need to resend */
 			qp->s_rdma_mr = e->rdma_sge.mr;
 			qp->s_rdma_mr = e->rdma_sge.mr;
 			if (qp->s_rdma_mr)
 			if (qp->s_rdma_mr)
-				qib_get_mr(qp->s_rdma_mr);
+				rvt_get_mr(qp->s_rdma_mr);
 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
 			qp->s_ack_rdma_sge.sge = e->rdma_sge;
 			qp->s_ack_rdma_sge.num_sge = 1;
 			qp->s_ack_rdma_sge.num_sge = 1;
 			qp->s_cur_sge = &qp->s_ack_rdma_sge;
 			qp->s_cur_sge = &qp->s_ack_rdma_sge;
@@ -172,7 +172,7 @@ static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
 		qp->s_cur_sge = &qp->s_ack_rdma_sge;
 		qp->s_cur_sge = &qp->s_ack_rdma_sge;
 		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
 		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
 		if (qp->s_rdma_mr)
 		if (qp->s_rdma_mr)
-			qib_get_mr(qp->s_rdma_mr);
+			rvt_get_mr(qp->s_rdma_mr);
 		len = qp->s_ack_rdma_sge.sge.sge_length;
 		len = qp->s_ack_rdma_sge.sge.sge_length;
 		if (len > pmtu)
 		if (len > pmtu)
 			len = pmtu;
 			len = pmtu;
@@ -196,7 +196,7 @@ normal:
 		 * (see above).
 		 * (see above).
 		 */
 		 */
 		qp->s_ack_state = OP(SEND_ONLY);
 		qp->s_ack_state = OP(SEND_ONLY);
-		qp->s_flags &= ~QIB_S_ACK_PENDING;
+		qp->s_flags &= ~RVT_S_ACK_PENDING;
 		qp->s_cur_sge = NULL;
 		qp->s_cur_sge = NULL;
 		if (qp->s_nak_state)
 		if (qp->s_nak_state)
 			ohdr->u.aeth =
 			ohdr->u.aeth =
@@ -218,7 +218,7 @@ normal:
 
 
 bail:
 bail:
 	qp->s_ack_state = OP(ACKNOWLEDGE);
 	qp->s_ack_state = OP(ACKNOWLEDGE);
-	qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING);
+	qp->s_flags &= ~(RVT_S_RESP_PENDING | RVT_S_ACK_PENDING);
 	return 0;
 	return 0;
 }
 }
 
 
@@ -226,63 +226,60 @@ bail:
  * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
  * @qp: a pointer to the QP
  * @qp: a pointer to the QP
  *
  *
+ * Assumes the s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  * Return 1 if constructed; otherwise, return 0.
  */
  */
-int qib_make_rc_req(struct qib_qp *qp)
+int qib_make_rc_req(struct rvt_qp *qp)
 {
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_other_headers *ohdr;
 	struct qib_other_headers *ohdr;
-	struct qib_sge_state *ss;
-	struct qib_swqe *wqe;
+	struct rvt_sge_state *ss;
+	struct rvt_swqe *wqe;
 	u32 hwords;
 	u32 hwords;
 	u32 len;
 	u32 len;
 	u32 bth0;
 	u32 bth0;
 	u32 bth2;
 	u32 bth2;
 	u32 pmtu = qp->pmtu;
 	u32 pmtu = qp->pmtu;
 	char newreq;
 	char newreq;
-	unsigned long flags;
 	int ret = 0;
 	int ret = 0;
 	int delta;
 	int delta;
 
 
-	ohdr = &qp->s_hdr->u.oth;
+	ohdr = &priv->s_hdr->u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->u.l.oth;
-
-	/*
-	 * The lock is needed to synchronize between the sending tasklet,
-	 * the receive interrupt handler, and timeout resends.
-	 */
-	spin_lock_irqsave(&qp->s_lock, flags);
+		ohdr = &priv->s_hdr->u.l.oth;
 
 
 	/* Sending responses has higher priority over sending requests. */
 	/* Sending responses has higher priority over sending requests. */
-	if ((qp->s_flags & QIB_S_RESP_PENDING) &&
+	if ((qp->s_flags & RVT_S_RESP_PENDING) &&
 	    qib_make_rc_ack(dev, qp, ohdr, pmtu))
 	    qib_make_rc_ack(dev, qp, ohdr, pmtu))
 		goto done;
 		goto done;
 
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
-		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 			goto bail;
 		/* We are in the error state, flush the work request. */
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= QIB_S_WAIT_DMA;
+		if (atomic_read(&priv->s_dma_busy)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 			goto bail;
 		}
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 		qib_send_complete(qp, wqe, qp->s_last != qp->s_acked ?
 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 			IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR);
 		/* will get called again */
 		/* will get called again */
 		goto done;
 		goto done;
 	}
 	}
 
 
-	if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK))
+	if (qp->s_flags & (RVT_S_WAIT_RNR | RVT_S_WAIT_ACK))
 		goto bail;
 		goto bail;
 
 
 	if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
 	if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
 		if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
 		if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
-			qp->s_flags |= QIB_S_WAIT_PSN;
+			qp->s_flags |= RVT_S_WAIT_PSN;
 			goto bail;
 			goto bail;
 		}
 		}
 		qp->s_sending_psn = qp->s_psn;
 		qp->s_sending_psn = qp->s_psn;
@@ -294,10 +291,10 @@ int qib_make_rc_req(struct qib_qp *qp)
 	bth0 = 0;
 	bth0 = 0;
 
 
 	/* Send a request. */
 	/* Send a request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	switch (qp->s_state) {
 	switch (qp->s_state) {
 	default:
 	default:
-		if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 			goto bail;
 		/*
 		/*
 		 * Resend an old request or start a new one.
 		 * Resend an old request or start a new one.
@@ -317,11 +314,11 @@ int qib_make_rc_req(struct qib_qp *qp)
 			 */
 			 */
 			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
 			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
 			    qp->s_num_rd_atomic) {
 			    qp->s_num_rd_atomic) {
-				qp->s_flags |= QIB_S_WAIT_FENCE;
+				qp->s_flags |= RVT_S_WAIT_FENCE;
 				goto bail;
 				goto bail;
 			}
 			}
-			wqe->psn = qp->s_next_psn;
 			newreq = 1;
 			newreq = 1;
+			qp->s_psn = wqe->psn;
 		}
 		}
 		/*
 		/*
 		 * Note that we have to be careful not to modify the
 		 * Note that we have to be careful not to modify the
@@ -335,14 +332,12 @@ int qib_make_rc_req(struct qib_qp *qp)
 		case IB_WR_SEND:
 		case IB_WR_SEND:
 		case IB_WR_SEND_WITH_IMM:
 		case IB_WR_SEND_WITH_IMM:
 			/* If no credit, return. */
 			/* If no credit, return. */
-			if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 				goto bail;
 			}
 			}
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(SEND_FIRST);
 				qp->s_state = OP(SEND_FIRST);
 				len = pmtu;
 				len = pmtu;
 				break;
 				break;
@@ -363,14 +358,14 @@ int qib_make_rc_req(struct qib_qp *qp)
 			break;
 			break;
 
 
 		case IB_WR_RDMA_WRITE:
 		case IB_WR_RDMA_WRITE:
-			if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+			if (newreq && !(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 				qp->s_lsn++;
 				qp->s_lsn++;
 			/* FALLTHROUGH */
 			/* FALLTHROUGH */
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			/* If no credit, return. */
 			/* If no credit, return. */
-			if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+			if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT) &&
 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
 			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
-				qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+				qp->s_flags |= RVT_S_WAIT_SSN_CREDIT;
 				goto bail;
 				goto bail;
 			}
 			}
 
 
@@ -380,9 +375,7 @@ int qib_make_rc_req(struct qib_qp *qp)
 				cpu_to_be32(wqe->rdma_wr.rkey);
 				cpu_to_be32(wqe->rdma_wr.rkey);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			ohdr->u.rc.reth.length = cpu_to_be32(len);
 			hwords += sizeof(struct ib_reth) / sizeof(u32);
 			hwords += sizeof(struct ib_reth) / sizeof(u32);
-			wqe->lpsn = wqe->psn;
 			if (len > pmtu) {
 			if (len > pmtu) {
-				wqe->lpsn += (len - 1) / pmtu;
 				qp->s_state = OP(RDMA_WRITE_FIRST);
 				qp->s_state = OP(RDMA_WRITE_FIRST);
 				len = pmtu;
 				len = pmtu;
 				break;
 				break;
@@ -411,19 +404,12 @@ int qib_make_rc_req(struct qib_qp *qp)
 			if (newreq) {
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= QIB_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 					goto bail;
 				}
 				}
 				qp->s_num_rd_atomic++;
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
 					qp->s_lsn++;
-				/*
-				 * Adjust s_next_psn to count the
-				 * expected number of responses.
-				 */
-				if (len > pmtu)
-					qp->s_next_psn += (len - 1) / pmtu;
-				wqe->lpsn = qp->s_next_psn++;
 			}
 			}
 
 
 			ohdr->u.rc.reth.vaddr =
 			ohdr->u.rc.reth.vaddr =
@@ -449,13 +435,12 @@ int qib_make_rc_req(struct qib_qp *qp)
 			if (newreq) {
 			if (newreq) {
 				if (qp->s_num_rd_atomic >=
 				if (qp->s_num_rd_atomic >=
 				    qp->s_max_rd_atomic) {
 				    qp->s_max_rd_atomic) {
-					qp->s_flags |= QIB_S_WAIT_RDMAR;
+					qp->s_flags |= RVT_S_WAIT_RDMAR;
 					goto bail;
 					goto bail;
 				}
 				}
 				qp->s_num_rd_atomic++;
 				qp->s_num_rd_atomic++;
-				if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+				if (!(qp->s_flags & RVT_S_UNLIMITED_CREDIT))
 					qp->s_lsn++;
 					qp->s_lsn++;
-				wqe->lpsn = wqe->psn;
 			}
 			}
 			if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 			if (wqe->atomic_wr.wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 				qp->s_state = OP(COMPARE_SWAP);
 				qp->s_state = OP(COMPARE_SWAP);
@@ -498,11 +483,8 @@ int qib_make_rc_req(struct qib_qp *qp)
 		}
 		}
 		if (wqe->wr.opcode == IB_WR_RDMA_READ)
 		if (wqe->wr.opcode == IB_WR_RDMA_READ)
 			qp->s_psn = wqe->lpsn + 1;
 			qp->s_psn = wqe->lpsn + 1;
-		else {
+		else
 			qp->s_psn++;
 			qp->s_psn++;
-			if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-				qp->s_next_psn = qp->s_psn;
-		}
 		break;
 		break;
 
 
 	case OP(RDMA_READ_RESPONSE_FIRST):
 	case OP(RDMA_READ_RESPONSE_FIRST):
@@ -522,8 +504,6 @@ int qib_make_rc_req(struct qib_qp *qp)
 		/* FALLTHROUGH */
 		/* FALLTHROUGH */
 	case OP(SEND_MIDDLE):
 	case OP(SEND_MIDDLE):
 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
-		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		len = qp->s_len;
 		if (len > pmtu) {
 		if (len > pmtu) {
@@ -563,8 +543,6 @@ int qib_make_rc_req(struct qib_qp *qp)
 		/* FALLTHROUGH */
 		/* FALLTHROUGH */
 	case OP(RDMA_WRITE_MIDDLE):
 	case OP(RDMA_WRITE_MIDDLE):
 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
 		bth2 = qp->s_psn++ & QIB_PSN_MASK;
-		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
-			qp->s_next_psn = qp->s_psn;
 		ss = &qp->s_sge;
 		ss = &qp->s_sge;
 		len = qp->s_len;
 		len = qp->s_len;
 		if (len > pmtu) {
 		if (len > pmtu) {
@@ -618,9 +596,9 @@ int qib_make_rc_req(struct qib_qp *qp)
 	delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
 	delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
 	if (delta && delta % QIB_PSN_CREDIT == 0)
 	if (delta && delta % QIB_PSN_CREDIT == 0)
 		bth2 |= IB_BTH_REQ_ACK;
 		bth2 |= IB_BTH_REQ_ACK;
-	if (qp->s_flags & QIB_S_SEND_ONE) {
-		qp->s_flags &= ~QIB_S_SEND_ONE;
-		qp->s_flags |= QIB_S_WAIT_ACK;
+	if (qp->s_flags & RVT_S_SEND_ONE) {
+		qp->s_flags &= ~RVT_S_SEND_ONE;
+		qp->s_flags |= RVT_S_WAIT_ACK;
 		bth2 |= IB_BTH_REQ_ACK;
 		bth2 |= IB_BTH_REQ_ACK;
 	}
 	}
 	qp->s_len -= len;
 	qp->s_len -= len;
@@ -629,13 +607,9 @@ int qib_make_rc_req(struct qib_qp *qp)
 	qp->s_cur_size = len;
 	qp->s_cur_size = len;
 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
 done:
 done:
-	ret = 1;
-	goto unlock;
-
+	return 1;
 bail:
 bail:
-	qp->s_flags &= ~QIB_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags &= ~RVT_S_BUSY;
 	return ret;
 	return ret;
 }
 }
 
 
@@ -647,7 +621,7 @@ unlock:
  * Note that RDMA reads and atomics are handled in the
  * Note that RDMA reads and atomics are handled in the
  * send side QP state and tasklet.
  * send side QP state and tasklet.
  */
  */
-void qib_send_rc_ack(struct qib_qp *qp)
+void qib_send_rc_ack(struct rvt_qp *qp)
 {
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -665,11 +639,11 @@ void qib_send_rc_ack(struct qib_qp *qp)
 
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 	spin_lock_irqsave(&qp->s_lock, flags);
 
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto unlock;
 		goto unlock;
 
 
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
 	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
-	if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
+	if ((qp->s_flags & RVT_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
 		goto queue_ack;
 		goto queue_ack;
 
 
 	/* Construct the header with s_lock held so APM doesn't change it. */
 	/* Construct the header with s_lock held so APM doesn't change it. */
@@ -758,9 +732,9 @@ void qib_send_rc_ack(struct qib_qp *qp)
 	goto done;
 	goto done;
 
 
 queue_ack:
 queue_ack:
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
-		ibp->n_rc_qacks++;
-		qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING;
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		this_cpu_inc(*ibp->rvp.rc_qacks);
+		qp->s_flags |= RVT_S_ACK_PENDING | RVT_S_RESP_PENDING;
 		qp->s_nak_state = qp->r_nak_state;
 		qp->s_nak_state = qp->r_nak_state;
 		qp->s_ack_psn = qp->r_ack_psn;
 		qp->s_ack_psn = qp->r_ack_psn;
 
 
@@ -782,10 +756,10 @@ done:
  * for the given QP.
  * for the given QP.
  * Called at interrupt level with the QP s_lock held.
  * Called at interrupt level with the QP s_lock held.
  */
  */
-static void reset_psn(struct qib_qp *qp, u32 psn)
+static void reset_psn(struct rvt_qp *qp, u32 psn)
 {
 {
 	u32 n = qp->s_acked;
 	u32 n = qp->s_acked;
-	struct qib_swqe *wqe = get_swqe_ptr(qp, n);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, n);
 	u32 opcode;
 	u32 opcode;
 
 
 	qp->s_cur = n;
 	qp->s_cur = n;
@@ -808,7 +782,7 @@ static void reset_psn(struct qib_qp *qp, u32 psn)
 			n = 0;
 			n = 0;
 		if (n == qp->s_tail)
 		if (n == qp->s_tail)
 			break;
 			break;
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		diff = qib_cmp24(psn, wqe->psn);
 		diff = qib_cmp24(psn, wqe->psn);
 		if (diff < 0)
 		if (diff < 0)
 			break;
 			break;
@@ -854,22 +828,22 @@ static void reset_psn(struct qib_qp *qp, u32 psn)
 done:
 done:
 	qp->s_psn = psn;
 	qp->s_psn = psn;
 	/*
 	/*
-	 * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer
+	 * Set RVT_S_WAIT_PSN as qib_rc_complete() may start the timer
 	 * asynchronously before the send tasklet can get scheduled.
 	 * asynchronously before the send tasklet can get scheduled.
 	 * Doing it in qib_make_rc_req() is too late.
 	 * Doing it in qib_make_rc_req() is too late.
 	 */
 	 */
 	if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 	if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
 	    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
 	    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
-		qp->s_flags |= QIB_S_WAIT_PSN;
+		qp->s_flags |= RVT_S_WAIT_PSN;
 }
 }
 
 
 /*
 /*
  * Back up requester to resend the last un-ACKed request.
  * Back up requester to resend the last un-ACKed request.
  * The QP r_lock and s_lock should be held and interrupts disabled.
  * The QP r_lock and s_lock should be held and interrupts disabled.
  */
  */
-static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
+static void qib_restart_rc(struct rvt_qp *qp, u32 psn, int wait)
 {
 {
-	struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+	struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	struct qib_ibport *ibp;
 	struct qib_ibport *ibp;
 
 
 	if (qp->s_retry == 0) {
 	if (qp->s_retry == 0) {
@@ -878,7 +852,7 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
 			qp->s_retry = qp->s_retry_cnt;
 			qp->s_retry = qp->s_retry_cnt;
 		} else if (qp->s_last == qp->s_acked) {
 		} else if (qp->s_last == qp->s_acked) {
 			qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
 			qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
-			qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			return;
 			return;
 		} else /* XXX need to handle delayed completion */
 		} else /* XXX need to handle delayed completion */
 			return;
 			return;
@@ -887,15 +861,15 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
 
 
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
 	if (wqe->wr.opcode == IB_WR_RDMA_READ)
-		ibp->n_rc_resends++;
+		ibp->rvp.n_rc_resends++;
 	else
 	else
-		ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
 
 
-	qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR |
-			 QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN |
-			 QIB_S_WAIT_ACK);
+	qp->s_flags &= ~(RVT_S_WAIT_FENCE | RVT_S_WAIT_RDMAR |
+			 RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_PSN |
+			 RVT_S_WAIT_ACK);
 	if (wait)
 	if (wait)
-		qp->s_flags |= QIB_S_SEND_ONE;
+		qp->s_flags |= RVT_S_SEND_ONE;
 	reset_psn(qp, psn);
 	reset_psn(qp, psn);
 }
 }
 
 
@@ -904,16 +878,16 @@ static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
  */
  */
 static void rc_timeout(unsigned long arg)
 static void rc_timeout(unsigned long arg)
 {
 {
-	struct qib_qp *qp = (struct qib_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	struct qib_ibport *ibp;
 	struct qib_ibport *ibp;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	spin_lock_irqsave(&qp->r_lock, flags);
 	spin_lock_irqsave(&qp->r_lock, flags);
 	spin_lock(&qp->s_lock);
 	spin_lock(&qp->s_lock);
-	if (qp->s_flags & QIB_S_TIMER) {
+	if (qp->s_flags & RVT_S_TIMER) {
 		ibp = to_iport(qp->ibqp.device, qp->port_num);
 		ibp = to_iport(qp->ibqp.device, qp->port_num);
-		ibp->n_rc_timeouts++;
-		qp->s_flags &= ~QIB_S_TIMER;
+		ibp->rvp.n_rc_timeouts++;
+		qp->s_flags &= ~RVT_S_TIMER;
 		del_timer(&qp->s_timer);
 		del_timer(&qp->s_timer);
 		qib_restart_rc(qp, qp->s_last_psn + 1, 1);
 		qib_restart_rc(qp, qp->s_last_psn + 1, 1);
 		qib_schedule_send(qp);
 		qib_schedule_send(qp);
@@ -927,12 +901,12 @@ static void rc_timeout(unsigned long arg)
  */
  */
 void qib_rc_rnr_retry(unsigned long arg)
 void qib_rc_rnr_retry(unsigned long arg)
 {
 {
-	struct qib_qp *qp = (struct qib_qp *)arg;
+	struct rvt_qp *qp = (struct rvt_qp *)arg;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (qp->s_flags & QIB_S_WAIT_RNR) {
-		qp->s_flags &= ~QIB_S_WAIT_RNR;
+	if (qp->s_flags & RVT_S_WAIT_RNR) {
+		qp->s_flags &= ~RVT_S_WAIT_RNR;
 		del_timer(&qp->s_timer);
 		del_timer(&qp->s_timer);
 		qib_schedule_send(qp);
 		qib_schedule_send(qp);
 	}
 	}
@@ -943,14 +917,14 @@ void qib_rc_rnr_retry(unsigned long arg)
  * Set qp->s_sending_psn to the next PSN after the given one.
  * Set qp->s_sending_psn to the next PSN after the given one.
  * This would be psn+1 except when RDMA reads are present.
  * This would be psn+1 except when RDMA reads are present.
  */
  */
-static void reset_sending_psn(struct qib_qp *qp, u32 psn)
+static void reset_sending_psn(struct rvt_qp *qp, u32 psn)
 {
 {
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	u32 n = qp->s_last;
 	u32 n = qp->s_last;
 
 
 	/* Find the work request corresponding to the given PSN. */
 	/* Find the work request corresponding to the given PSN. */
 	for (;;) {
 	for (;;) {
-		wqe = get_swqe_ptr(qp, n);
+		wqe = rvt_get_swqe_ptr(qp, n);
 		if (qib_cmp24(psn, wqe->lpsn) <= 0) {
 		if (qib_cmp24(psn, wqe->lpsn) <= 0) {
 			if (wqe->wr.opcode == IB_WR_RDMA_READ)
 			if (wqe->wr.opcode == IB_WR_RDMA_READ)
 				qp->s_sending_psn = wqe->lpsn + 1;
 				qp->s_sending_psn = wqe->lpsn + 1;
@@ -968,16 +942,16 @@ static void reset_sending_psn(struct qib_qp *qp, u32 psn)
 /*
 /*
  * This should be called with the QP s_lock held and interrupts disabled.
  * This should be called with the QP s_lock held and interrupts disabled.
  */
  */
-void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
+void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr)
 {
 {
 	struct qib_other_headers *ohdr;
 	struct qib_other_headers *ohdr;
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	struct ib_wc wc;
 	struct ib_wc wc;
 	unsigned i;
 	unsigned i;
 	u32 opcode;
 	u32 opcode;
 	u32 psn;
 	u32 psn;
 
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 		return;
 
 
 	/* Find out where the BTH is */
 	/* Find out where the BTH is */
@@ -1002,22 +976,30 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
 	 * there are still requests that haven't been acked.
 	 * there are still requests that haven't been acked.
 	 */
 	 */
 	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
 	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
-	    !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)) &&
-	    (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	    !(qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR | RVT_S_WAIT_PSN)) &&
+	    (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		start_timer(qp);
 		start_timer(qp);
 
 
 	while (qp->s_last != qp->s_acked) {
 	while (qp->s_last != qp->s_acked) {
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		u32 s_last;
+
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
 		if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
 		    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 		    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
 			break;
 			break;
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		for (i = 0; i < wqe->wr.num_sge; i++) {
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct qib_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
 
-			qib_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
 		}
 		/* Post a send completion queue entry if requested. */
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
 			wc.wr_id = wqe->wr.wr_id;
@@ -1025,25 +1007,23 @@ void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
 			wc.qp = &qp->ibqp;
-			qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	}
 	}
 	/*
 	/*
 	 * If we were waiting for sends to complete before resending,
 	 * If we were waiting for sends to complete before resending,
 	 * and they are now complete, restart sending.
 	 * and they are now complete, restart sending.
 	 */
 	 */
-	if (qp->s_flags & QIB_S_WAIT_PSN &&
+	if (qp->s_flags & RVT_S_WAIT_PSN &&
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
-		qp->s_flags &= ~QIB_S_WAIT_PSN;
+		qp->s_flags &= ~RVT_S_WAIT_PSN;
 		qp->s_sending_psn = qp->s_psn;
 		qp->s_sending_psn = qp->s_psn;
 		qp->s_sending_hpsn = qp->s_psn - 1;
 		qp->s_sending_hpsn = qp->s_psn - 1;
 		qib_schedule_send(qp);
 		qib_schedule_send(qp);
 	}
 	}
 }
 }
 
 
-static inline void update_last_psn(struct qib_qp *qp, u32 psn)
+static inline void update_last_psn(struct rvt_qp *qp, u32 psn)
 {
 {
 	qp->s_last_psn = psn;
 	qp->s_last_psn = psn;
 }
 }
@@ -1053,8 +1033,8 @@ static inline void update_last_psn(struct qib_qp *qp, u32 psn)
  * This is similar to qib_send_complete but has to check to be sure
  * This is similar to qib_send_complete but has to check to be sure
  * that the SGEs are not being referenced if the SWQE is being resent.
  * that the SGEs are not being referenced if the SWQE is being resent.
  */
  */
-static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
-					 struct qib_swqe *wqe,
+static struct rvt_swqe *do_rc_completion(struct rvt_qp *qp,
+					 struct rvt_swqe *wqe,
 					 struct qib_ibport *ibp)
 					 struct qib_ibport *ibp)
 {
 {
 	struct ib_wc wc;
 	struct ib_wc wc;
@@ -1067,13 +1047,21 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
 	 */
 	 */
 	if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
 	if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
 	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+		u32 s_last;
+
 		for (i = 0; i < wqe->wr.num_sge; i++) {
 		for (i = 0; i < wqe->wr.num_sge; i++) {
-			struct qib_sge *sge = &wqe->sg_list[i];
+			struct rvt_sge *sge = &wqe->sg_list[i];
 
 
-			qib_put_mr(sge->mr);
+			rvt_put_mr(sge->mr);
 		}
 		}
+		s_last = qp->s_last;
+		if (++s_last >= qp->s_size)
+			s_last = 0;
+		qp->s_last = s_last;
+		/* see post_send() */
+		barrier();
 		/* Post a send completion queue entry if requested. */
 		/* Post a send completion queue entry if requested. */
-		if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+		if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
 			memset(&wc, 0, sizeof(wc));
 			memset(&wc, 0, sizeof(wc));
 			wc.wr_id = wqe->wr.wr_id;
 			wc.wr_id = wqe->wr.wr_id;
@@ -1081,12 +1069,10 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
 			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
 			wc.byte_len = wqe->length;
 			wc.byte_len = wqe->length;
 			wc.qp = &qp->ibqp;
 			wc.qp = &qp->ibqp;
-			qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+			rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc, 0);
 		}
 		}
-		if (++qp->s_last >= qp->s_size)
-			qp->s_last = 0;
 	} else
 	} else
-		ibp->n_rc_delayed_comp++;
+		this_cpu_inc(*ibp->rvp.rc_delayed_comp);
 
 
 	qp->s_retry = qp->s_retry_cnt;
 	qp->s_retry = qp->s_retry_cnt;
 	update_last_psn(qp, wqe->lpsn);
 	update_last_psn(qp, wqe->lpsn);
@@ -1100,7 +1086,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
 		if (++qp->s_cur >= qp->s_size)
 		if (++qp->s_cur >= qp->s_size)
 			qp->s_cur = 0;
 			qp->s_cur = 0;
 		qp->s_acked = qp->s_cur;
 		qp->s_acked = qp->s_cur;
-		wqe = get_swqe_ptr(qp, qp->s_cur);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 		if (qp->s_acked != qp->s_tail) {
 		if (qp->s_acked != qp->s_tail) {
 			qp->s_state = OP(SEND_LAST);
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = wqe->psn;
 			qp->s_psn = wqe->psn;
@@ -1110,7 +1096,7 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
 			qp->s_acked = 0;
 			qp->s_acked = 0;
 		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
 		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
 			qp->s_draining = 0;
 			qp->s_draining = 0;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	}
 	}
 	return wqe;
 	return wqe;
 }
 }
@@ -1126,19 +1112,19 @@ static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
  * Called at interrupt level with the QP s_lock held.
  * Called at interrupt level with the QP s_lock held.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
  */
-static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
+static int do_rc_ack(struct rvt_qp *qp, u32 aeth, u32 psn, int opcode,
 		     u64 val, struct qib_ctxtdata *rcd)
 		     u64 val, struct qib_ctxtdata *rcd)
 {
 {
 	struct qib_ibport *ibp;
 	struct qib_ibport *ibp;
 	enum ib_wc_status status;
 	enum ib_wc_status status;
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	int ret = 0;
 	int ret = 0;
 	u32 ack_psn;
 	u32 ack_psn;
 	int diff;
 	int diff;
 
 
 	/* Remove QP from retry timer */
 	/* Remove QP from retry timer */
-	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
 		del_timer(&qp->s_timer);
 		del_timer(&qp->s_timer);
 	}
 	}
 
 
@@ -1151,7 +1137,7 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 	ack_psn = psn;
 	ack_psn = psn;
 	if (aeth >> 29)
 	if (aeth >> 29)
 		ack_psn--;
 		ack_psn--;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 
 
 	/*
 	/*
@@ -1186,11 +1172,11 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
 		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
 			/* Retry this request. */
 			/* Retry this request. */
-			if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) {
-				qp->r_flags |= QIB_R_RDMAR_SEQ;
+			if (!(qp->r_flags & RVT_R_RDMAR_SEQ)) {
+				qp->r_flags |= RVT_R_RDMAR_SEQ;
 				qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 				qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 				if (list_empty(&qp->rspwait)) {
 				if (list_empty(&qp->rspwait)) {
-					qp->r_flags |= QIB_R_RSP_SEND;
+					qp->r_flags |= RVT_R_RSP_SEND;
 					atomic_inc(&qp->refcount);
 					atomic_inc(&qp->refcount);
 					list_add_tail(&qp->rspwait,
 					list_add_tail(&qp->rspwait,
 						      &rcd->qp_wait_list);
 						      &rcd->qp_wait_list);
@@ -1213,14 +1199,14 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
 		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
 			qp->s_num_rd_atomic--;
 			qp->s_num_rd_atomic--;
 			/* Restart sending task if fence is complete */
 			/* Restart sending task if fence is complete */
-			if ((qp->s_flags & QIB_S_WAIT_FENCE) &&
+			if ((qp->s_flags & RVT_S_WAIT_FENCE) &&
 			    !qp->s_num_rd_atomic) {
 			    !qp->s_num_rd_atomic) {
-				qp->s_flags &= ~(QIB_S_WAIT_FENCE |
-						 QIB_S_WAIT_ACK);
+				qp->s_flags &= ~(RVT_S_WAIT_FENCE |
+						 RVT_S_WAIT_ACK);
 				qib_schedule_send(qp);
 				qib_schedule_send(qp);
-			} else if (qp->s_flags & QIB_S_WAIT_RDMAR) {
-				qp->s_flags &= ~(QIB_S_WAIT_RDMAR |
-						 QIB_S_WAIT_ACK);
+			} else if (qp->s_flags & RVT_S_WAIT_RDMAR) {
+				qp->s_flags &= ~(RVT_S_WAIT_RDMAR |
+						 RVT_S_WAIT_ACK);
 				qib_schedule_send(qp);
 				qib_schedule_send(qp);
 			}
 			}
 		}
 		}
@@ -1231,7 +1217,7 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 
 
 	switch (aeth >> 29) {
 	switch (aeth >> 29) {
 	case 0:         /* ACK */
 	case 0:         /* ACK */
-		ibp->n_rc_acks++;
+		this_cpu_inc(*ibp->rvp.rc_acks);
 		if (qp->s_acked != qp->s_tail) {
 		if (qp->s_acked != qp->s_tail) {
 			/*
 			/*
 			 * We are expecting more ACKs so
 			 * We are expecting more ACKs so
@@ -1248,8 +1234,8 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 			qp->s_state = OP(SEND_LAST);
 			qp->s_state = OP(SEND_LAST);
 			qp->s_psn = psn + 1;
 			qp->s_psn = psn + 1;
 		}
 		}
-		if (qp->s_flags & QIB_S_WAIT_ACK) {
-			qp->s_flags &= ~QIB_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			qib_schedule_send(qp);
 			qib_schedule_send(qp);
 		}
 		}
 		qib_get_credit(qp, aeth);
 		qib_get_credit(qp, aeth);
@@ -1260,10 +1246,10 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		goto bail;
 		goto bail;
 
 
 	case 1:         /* RNR NAK */
 	case 1:         /* RNR NAK */
-		ibp->n_rnr_naks++;
+		ibp->rvp.n_rnr_naks++;
 		if (qp->s_acked == qp->s_tail)
 		if (qp->s_acked == qp->s_tail)
 			goto bail;
 			goto bail;
-		if (qp->s_flags & QIB_S_WAIT_RNR)
+		if (qp->s_flags & RVT_S_WAIT_RNR)
 			goto bail;
 			goto bail;
 		if (qp->s_rnr_retry == 0) {
 		if (qp->s_rnr_retry == 0) {
 			status = IB_WC_RNR_RETRY_EXC_ERR;
 			status = IB_WC_RNR_RETRY_EXC_ERR;
@@ -1275,12 +1261,12 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		/* The last valid PSN is the previous PSN. */
 		/* The last valid PSN is the previous PSN. */
 		update_last_psn(qp, psn - 1);
 		update_last_psn(qp, psn - 1);
 
 
-		ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+		ibp->rvp.n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
 
 
 		reset_psn(qp, psn);
 		reset_psn(qp, psn);
 
 
-		qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK);
-		qp->s_flags |= QIB_S_WAIT_RNR;
+		qp->s_flags &= ~(RVT_S_WAIT_SSN_CREDIT | RVT_S_WAIT_ACK);
+		qp->s_flags |= RVT_S_WAIT_RNR;
 		qp->s_timer.function = qib_rc_rnr_retry;
 		qp->s_timer.function = qib_rc_rnr_retry;
 		qp->s_timer.expires = jiffies + usecs_to_jiffies(
 		qp->s_timer.expires = jiffies + usecs_to_jiffies(
 			ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
 			ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
@@ -1296,7 +1282,7 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 		switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
 		switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
 			QIB_AETH_CREDIT_MASK) {
 			QIB_AETH_CREDIT_MASK) {
 		case 0: /* PSN sequence error */
 		case 0: /* PSN sequence error */
-			ibp->n_seq_naks++;
+			ibp->rvp.n_seq_naks++;
 			/*
 			/*
 			 * Back up to the responder's expected PSN.
 			 * Back up to the responder's expected PSN.
 			 * Note that we might get a NAK in the middle of an
 			 * Note that we might get a NAK in the middle of an
@@ -1309,21 +1295,21 @@ static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
 
 
 		case 1: /* Invalid Request */
 		case 1: /* Invalid Request */
 			status = IB_WC_REM_INV_REQ_ERR;
 			status = IB_WC_REM_INV_REQ_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 			goto class_b;
 
 
 		case 2: /* Remote Access Error */
 		case 2: /* Remote Access Error */
 			status = IB_WC_REM_ACCESS_ERR;
 			status = IB_WC_REM_ACCESS_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 			goto class_b;
 			goto class_b;
 
 
 		case 3: /* Remote Operation Error */
 		case 3: /* Remote Operation Error */
 			status = IB_WC_REM_OP_ERR;
 			status = IB_WC_REM_OP_ERR;
-			ibp->n_other_naks++;
+			ibp->rvp.n_other_naks++;
 class_b:
 class_b:
 			if (qp->s_last == qp->s_acked) {
 			if (qp->s_last == qp->s_acked) {
 				qib_send_complete(qp, wqe, status);
 				qib_send_complete(qp, wqe, status);
-				qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+				rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 			}
 			}
 			break;
 			break;
 
 
@@ -1349,18 +1335,18 @@ bail:
  * We have seen an out of sequence RDMA read middle or last packet.
  * We have seen an out of sequence RDMA read middle or last packet.
  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
  * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
  */
  */
-static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
+static void rdma_seq_err(struct rvt_qp *qp, struct qib_ibport *ibp, u32 psn,
 			 struct qib_ctxtdata *rcd)
 			 struct qib_ctxtdata *rcd)
 {
 {
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 
 
 	/* Remove QP from retry timer */
 	/* Remove QP from retry timer */
-	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
-		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+	if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
+		qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
 		del_timer(&qp->s_timer);
 		del_timer(&qp->s_timer);
 	}
 	}
 
 
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 
 
 	while (qib_cmp24(psn, wqe->lpsn) > 0) {
 	while (qib_cmp24(psn, wqe->lpsn) > 0) {
 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
 		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
@@ -1370,11 +1356,11 @@ static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
 		wqe = do_rc_completion(qp, wqe, ibp);
 		wqe = do_rc_completion(qp, wqe, ibp);
 	}
 	}
 
 
-	ibp->n_rdma_seq++;
-	qp->r_flags |= QIB_R_RDMAR_SEQ;
+	ibp->rvp.n_rdma_seq++;
+	qp->r_flags |= RVT_R_RDMAR_SEQ;
 	qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 	qib_restart_rc(qp, qp->s_last_psn + 1, 0);
 	if (list_empty(&qp->rspwait)) {
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_SEND;
+		qp->r_flags |= RVT_R_RSP_SEND;
 		atomic_inc(&qp->refcount);
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 	}
@@ -1399,12 +1385,12 @@ static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
 static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 			    struct qib_other_headers *ohdr,
 			    struct qib_other_headers *ohdr,
 			    void *data, u32 tlen,
 			    void *data, u32 tlen,
-			    struct qib_qp *qp,
+			    struct rvt_qp *qp,
 			    u32 opcode,
 			    u32 opcode,
 			    u32 psn, u32 hdrsize, u32 pmtu,
 			    u32 psn, u32 hdrsize, u32 pmtu,
 			    struct qib_ctxtdata *rcd)
 			    struct qib_ctxtdata *rcd)
 {
 {
-	struct qib_swqe *wqe;
+	struct rvt_swqe *wqe;
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 	enum ib_wc_status status;
 	enum ib_wc_status status;
 	unsigned long flags;
 	unsigned long flags;
@@ -1425,7 +1411,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 			 * If send tasklet not running attempt to progress
 			 * If send tasklet not running attempt to progress
 			 * SDMA queue.
 			 * SDMA queue.
 			 */
 			 */
-			if (!(qp->s_flags & QIB_S_BUSY)) {
+			if (!(qp->s_flags & RVT_S_BUSY)) {
 				/* Acquire SDMA Lock */
 				/* Acquire SDMA Lock */
 				spin_lock_irqsave(&ppd->sdma_lock, flags);
 				spin_lock_irqsave(&ppd->sdma_lock, flags);
 				/* Invoke sdma make progress */
 				/* Invoke sdma make progress */
@@ -1437,11 +1423,12 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 	}
 	}
 
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK))
 		goto ack_done;
 		goto ack_done;
 
 
 	/* Ignore invalid responses. */
 	/* Ignore invalid responses. */
-	if (qib_cmp24(psn, qp->s_next_psn) >= 0)
+	smp_read_barrier_depends(); /* see post_one_send */
+	if (qib_cmp24(psn, ACCESS_ONCE(qp->s_next_psn)) >= 0)
 		goto ack_done;
 		goto ack_done;
 
 
 	/* Ignore duplicate responses. */
 	/* Ignore duplicate responses. */
@@ -1460,15 +1447,15 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 	 * Skip everything other than the PSN we expect, if we are waiting
 	 * Skip everything other than the PSN we expect, if we are waiting
 	 * for a reply to a restarted RDMA read or atomic op.
 	 * for a reply to a restarted RDMA read or atomic op.
 	 */
 	 */
-	if (qp->r_flags & QIB_R_RDMAR_SEQ) {
+	if (qp->r_flags & RVT_R_RDMAR_SEQ) {
 		if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
 		if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
 			goto ack_done;
 			goto ack_done;
-		qp->r_flags &= ~QIB_R_RDMAR_SEQ;
+		qp->r_flags &= ~RVT_R_RDMAR_SEQ;
 	}
 	}
 
 
 	if (unlikely(qp->s_acked == qp->s_tail))
 	if (unlikely(qp->s_acked == qp->s_tail))
 		goto ack_done;
 		goto ack_done;
-	wqe = get_swqe_ptr(qp, qp->s_acked);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 	status = IB_WC_SUCCESS;
 	status = IB_WC_SUCCESS;
 
 
 	switch (opcode) {
 	switch (opcode) {
@@ -1487,7 +1474,7 @@ static void qib_rc_rcv_resp(struct qib_ibport *ibp,
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
 			goto ack_done;
 		hdrsize += 4;
 		hdrsize += 4;
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_op_err;
 			goto ack_op_err;
 		/*
 		/*
@@ -1515,10 +1502,10 @@ read_middle:
 		 * We got a response so update the timeout.
 		 * We got a response so update the timeout.
 		 * 4.096 usec. * (1 << qp->timeout)
 		 * 4.096 usec. * (1 << qp->timeout)
 		 */
 		 */
-		qp->s_flags |= QIB_S_TIMER;
+		qp->s_flags |= RVT_S_TIMER;
 		mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
 		mod_timer(&qp->s_timer, jiffies + qp->timeout_jiffies);
-		if (qp->s_flags & QIB_S_WAIT_ACK) {
-			qp->s_flags &= ~QIB_S_WAIT_ACK;
+		if (qp->s_flags & RVT_S_WAIT_ACK) {
+			qp->s_flags &= ~RVT_S_WAIT_ACK;
 			qib_schedule_send(qp);
 			qib_schedule_send(qp);
 		}
 		}
 
 
@@ -1553,7 +1540,7 @@ read_middle:
 		 * have to be careful to copy the data to the right
 		 * have to be careful to copy the data to the right
 		 * location.
 		 * location.
 		 */
 		 */
-		wqe = get_swqe_ptr(qp, qp->s_acked);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_acked);
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 						  wqe, psn, pmtu);
 						  wqe, psn, pmtu);
 		goto read_last;
 		goto read_last;
@@ -1598,7 +1585,7 @@ ack_len_err:
 ack_err:
 ack_err:
 	if (qp->s_last == qp->s_acked) {
 	if (qp->s_last == qp->s_acked) {
 		qib_send_complete(qp, wqe, status);
 		qib_send_complete(qp, wqe, status);
-		qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+		rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
 	}
 	}
 ack_done:
 ack_done:
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1623,14 +1610,14 @@ bail:
  */
  */
 static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 			    void *data,
 			    void *data,
-			    struct qib_qp *qp,
+			    struct rvt_qp *qp,
 			    u32 opcode,
 			    u32 opcode,
 			    u32 psn,
 			    u32 psn,
 			    int diff,
 			    int diff,
 			    struct qib_ctxtdata *rcd)
 			    struct qib_ctxtdata *rcd)
 {
 {
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
-	struct qib_ack_entry *e;
+	struct rvt_ack_entry *e;
 	unsigned long flags;
 	unsigned long flags;
 	u8 i, prev;
 	u8 i, prev;
 	int old_req;
 	int old_req;
@@ -1642,7 +1629,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 		 * Don't queue the NAK if we already sent one.
 		 * Don't queue the NAK if we already sent one.
 		 */
 		 */
 		if (!qp->r_nak_state) {
 		if (!qp->r_nak_state) {
-			ibp->n_rc_seqnak++;
+			ibp->rvp.n_rc_seqnak++;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			qp->r_nak_state = IB_NAK_PSN_ERROR;
 			/* Use the expected PSN. */
 			/* Use the expected PSN. */
 			qp->r_ack_psn = qp->r_psn;
 			qp->r_ack_psn = qp->r_psn;
@@ -1652,7 +1639,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 			 * Otherwise, we end up propagating congestion.
 			 * Otherwise, we end up propagating congestion.
 			 */
 			 */
 			if (list_empty(&qp->rspwait)) {
 			if (list_empty(&qp->rspwait)) {
-				qp->r_flags |= QIB_R_RSP_NAK;
+				qp->r_flags |= RVT_R_RSP_NAK;
 				atomic_inc(&qp->refcount);
 				atomic_inc(&qp->refcount);
 				list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 				list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 			}
 			}
@@ -1678,7 +1665,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 	 */
 	 */
 	e = NULL;
 	e = NULL;
 	old_req = 1;
 	old_req = 1;
-	ibp->n_rc_dupreq++;
+	ibp->rvp.n_rc_dupreq++;
 
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 	spin_lock_irqsave(&qp->s_lock, flags);
 
 
@@ -1732,7 +1719,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 		if (unlikely(offset + len != e->rdma_sge.sge_length))
 		if (unlikely(offset + len != e->rdma_sge.sge_length))
 			goto unlock_done;
 			goto unlock_done;
 		if (e->rdma_sge.mr) {
 		if (e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 			e->rdma_sge.mr = NULL;
 		}
 		}
 		if (len != 0) {
 		if (len != 0) {
@@ -1740,7 +1727,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			u64 vaddr = be64_to_cpu(reth->vaddr);
 			int ok;
 			int ok;
 
 
-			ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
 					 IB_ACCESS_REMOTE_READ);
 					 IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 			if (unlikely(!ok))
 				goto unlock_done;
 				goto unlock_done;
@@ -1791,7 +1778,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 		 * which doesn't accept a RDMA read response or atomic
 		 * which doesn't accept a RDMA read response or atomic
 		 * response as an ACK for earlier SENDs or RDMA writes.
 		 * response as an ACK for earlier SENDs or RDMA writes.
 		 */
 		 */
-		if (!(qp->s_flags & QIB_S_RESP_PENDING)) {
+		if (!(qp->s_flags & RVT_S_RESP_PENDING)) {
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			qp->r_nak_state = 0;
 			qp->r_nak_state = 0;
 			qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
 			qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
@@ -1805,7 +1792,7 @@ static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
 		break;
 		break;
 	}
 	}
 	qp->s_ack_state = OP(ACKNOWLEDGE);
 	qp->s_ack_state = OP(ACKNOWLEDGE);
-	qp->s_flags |= QIB_S_RESP_PENDING;
+	qp->s_flags |= RVT_S_RESP_PENDING;
 	qp->r_nak_state = 0;
 	qp->r_nak_state = 0;
 	qib_schedule_send(qp);
 	qib_schedule_send(qp);
 
 
@@ -1818,13 +1805,13 @@ send_ack:
 	return 0;
 	return 0;
 }
 }
 
 
-void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
+void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
 	int lastwqe;
 	int lastwqe;
 
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 	spin_lock_irqsave(&qp->s_lock, flags);
-	lastwqe = qib_error_qp(qp, err);
+	lastwqe = rvt_error_qp(qp, err);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 
 
 	if (lastwqe) {
 	if (lastwqe) {
@@ -1837,7 +1824,7 @@ void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
 	}
 	}
 }
 }
 
 
-static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
+static inline void qib_update_ack_queue(struct rvt_qp *qp, unsigned n)
 {
 {
 	unsigned next;
 	unsigned next;
 
 
@@ -1862,7 +1849,7 @@ static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
  * Called at interrupt level.
  * Called at interrupt level.
  */
  */
 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 {
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 	struct qib_other_headers *ohdr;
 	struct qib_other_headers *ohdr;
@@ -1948,8 +1935,8 @@ void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 		break;
 		break;
 	}
 	}
 
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
-		qp->r_flags |= QIB_R_COMM_EST;
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
+		qp->r_flags |= RVT_R_COMM_EST;
 		if (qp->ibqp.event_handler) {
 		if (qp->ibqp.event_handler) {
 			struct ib_event ev;
 			struct ib_event ev;
 
 
@@ -2026,9 +2013,9 @@ send_last:
 		if (unlikely(wc.byte_len > qp->r_len))
 		if (unlikely(wc.byte_len > qp->r_len))
 			goto nack_inv;
 			goto nack_inv;
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 		qp->r_msn++;
 		qp->r_msn++;
-		if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+		if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 			break;
 			break;
 		wc.wr_id = qp->r_wr_id;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
 		wc.status = IB_WC_SUCCESS;
@@ -2047,7 +2034,7 @@ send_last:
 		wc.dlid_path_bits = 0;
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
 		/* Signal completion event if the solicited bit is set. */
-		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 			     (ohdr->bth[0] &
 			     (ohdr->bth[0] &
 			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 		break;
 		break;
@@ -2069,7 +2056,7 @@ send_last:
 			int ok;
 			int ok;
 
 
 			/* Check rkey & NAK */
 			/* Check rkey & NAK */
-			ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
 					 rkey, IB_ACCESS_REMOTE_WRITE);
 					 rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 			if (unlikely(!ok))
 				goto nack_acc;
 				goto nack_acc;
@@ -2096,7 +2083,7 @@ send_last:
 		goto send_last;
 		goto send_last;
 
 
 	case OP(RDMA_READ_REQUEST): {
 	case OP(RDMA_READ_REQUEST): {
-		struct qib_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u32 len;
 		u32 len;
 		u8 next;
 		u8 next;
 
 
@@ -2114,7 +2101,7 @@ send_last:
 		}
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 			e->rdma_sge.mr = NULL;
 		}
 		}
 		reth = &ohdr->u.rc.reth;
 		reth = &ohdr->u.rc.reth;
@@ -2125,7 +2112,7 @@ send_last:
 			int ok;
 			int ok;
 
 
 			/* Check rkey & NAK */
 			/* Check rkey & NAK */
-			ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+			ok = rvt_rkey_ok(qp, &e->rdma_sge, len, vaddr,
 					 rkey, IB_ACCESS_REMOTE_READ);
 					 rkey, IB_ACCESS_REMOTE_READ);
 			if (unlikely(!ok))
 			if (unlikely(!ok))
 				goto nack_acc_unlck;
 				goto nack_acc_unlck;
@@ -2157,7 +2144,7 @@ send_last:
 		qp->r_head_ack_queue = next;
 		qp->r_head_ack_queue = next;
 
 
 		/* Schedule the send tasklet. */
 		/* Schedule the send tasklet. */
-		qp->s_flags |= QIB_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		qib_schedule_send(qp);
 		qib_schedule_send(qp);
 
 
 		goto sunlock;
 		goto sunlock;
@@ -2166,7 +2153,7 @@ send_last:
 	case OP(COMPARE_SWAP):
 	case OP(COMPARE_SWAP):
 	case OP(FETCH_ADD): {
 	case OP(FETCH_ADD): {
 		struct ib_atomic_eth *ateth;
 		struct ib_atomic_eth *ateth;
-		struct qib_ack_entry *e;
+		struct rvt_ack_entry *e;
 		u64 vaddr;
 		u64 vaddr;
 		atomic64_t *maddr;
 		atomic64_t *maddr;
 		u64 sdata;
 		u64 sdata;
@@ -2186,7 +2173,7 @@ send_last:
 		}
 		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
 		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
-			qib_put_mr(e->rdma_sge.mr);
+			rvt_put_mr(e->rdma_sge.mr);
 			e->rdma_sge.mr = NULL;
 			e->rdma_sge.mr = NULL;
 		}
 		}
 		ateth = &ohdr->u.atomic_eth;
 		ateth = &ohdr->u.atomic_eth;
@@ -2196,7 +2183,7 @@ send_last:
 			goto nack_inv_unlck;
 			goto nack_inv_unlck;
 		rkey = be32_to_cpu(ateth->rkey);
 		rkey = be32_to_cpu(ateth->rkey);
 		/* Check rkey & NAK */
 		/* Check rkey & NAK */
-		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
 					  vaddr, rkey,
 					  vaddr, rkey,
 					  IB_ACCESS_REMOTE_ATOMIC)))
 					  IB_ACCESS_REMOTE_ATOMIC)))
 			goto nack_acc_unlck;
 			goto nack_acc_unlck;
@@ -2208,7 +2195,7 @@ send_last:
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
 				      be64_to_cpu(ateth->compare_data),
 				      be64_to_cpu(ateth->compare_data),
 				      sdata);
 				      sdata);
-		qib_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		qp->r_sge.num_sge = 0;
 		e->opcode = opcode;
 		e->opcode = opcode;
 		e->sent = 0;
 		e->sent = 0;
@@ -2221,7 +2208,7 @@ send_last:
 		qp->r_head_ack_queue = next;
 		qp->r_head_ack_queue = next;
 
 
 		/* Schedule the send tasklet. */
 		/* Schedule the send tasklet. */
-		qp->s_flags |= QIB_S_RESP_PENDING;
+		qp->s_flags |= RVT_S_RESP_PENDING;
 		qib_schedule_send(qp);
 		qib_schedule_send(qp);
 
 
 		goto sunlock;
 		goto sunlock;
@@ -2245,7 +2232,7 @@ rnr_nak:
 	qp->r_ack_psn = qp->r_psn;
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue RNR NAK for later */
 	/* Queue RNR NAK for later */
 	if (list_empty(&qp->rspwait)) {
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_NAK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 	}
@@ -2257,7 +2244,7 @@ nack_op_err:
 	qp->r_ack_psn = qp->r_psn;
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue NAK for later */
 	/* Queue NAK for later */
 	if (list_empty(&qp->rspwait)) {
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_NAK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 	}
@@ -2271,7 +2258,7 @@ nack_inv:
 	qp->r_ack_psn = qp->r_psn;
 	qp->r_ack_psn = qp->r_psn;
 	/* Queue NAK for later */
 	/* Queue NAK for later */
 	if (list_empty(&qp->rspwait)) {
 	if (list_empty(&qp->rspwait)) {
-		qp->r_flags |= QIB_R_RSP_NAK;
+		qp->r_flags |= RVT_R_RSP_NAK;
 		atomic_inc(&qp->refcount);
 		atomic_inc(&qp->refcount);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
 	}
 	}

+ 107 - 84
drivers/infiniband/hw/qib/qib_ruc.c

@@ -79,16 +79,16 @@ const u32 ib_qib_rnr_table[32] = {
  * Validate a RWQE and fill in the SGE state.
  * Validate a RWQE and fill in the SGE state.
  * Return 1 if OK.
  * Return 1 if OK.
  */
  */
-static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
+static int qib_init_sge(struct rvt_qp *qp, struct rvt_rwqe *wqe)
 {
 {
 	int i, j, ret;
 	int i, j, ret;
 	struct ib_wc wc;
 	struct ib_wc wc;
-	struct qib_lkey_table *rkt;
-	struct qib_pd *pd;
-	struct qib_sge_state *ss;
+	struct rvt_lkey_table *rkt;
+	struct rvt_pd *pd;
+	struct rvt_sge_state *ss;
 
 
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+	rkt = &to_idev(qp->ibqp.device)->rdi.lkey_table;
+	pd = ibpd_to_rvtpd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
 	ss = &qp->r_sge;
 	ss = &qp->r_sge;
 	ss->sg_list = qp->r_sg_list;
 	ss->sg_list = qp->r_sg_list;
 	qp->r_len = 0;
 	qp->r_len = 0;
@@ -96,7 +96,7 @@ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
 		if (wqe->sg_list[i].length == 0)
 		if (wqe->sg_list[i].length == 0)
 			continue;
 			continue;
 		/* Check LKEY */
 		/* Check LKEY */
-		if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+		if (!rvt_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
 				 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
 				 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
 			goto bad_lkey;
 			goto bad_lkey;
 		qp->r_len += wqe->sg_list[i].length;
 		qp->r_len += wqe->sg_list[i].length;
@@ -109,9 +109,9 @@ static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
 
 
 bad_lkey:
 bad_lkey:
 	while (j) {
 	while (j) {
-		struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+		struct rvt_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
 
 
-		qib_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	}
 	ss->num_sge = 0;
 	ss->num_sge = 0;
 	memset(&wc, 0, sizeof(wc));
 	memset(&wc, 0, sizeof(wc));
@@ -120,7 +120,7 @@ bad_lkey:
 	wc.opcode = IB_WC_RECV;
 	wc.opcode = IB_WC_RECV;
 	wc.qp = &qp->ibqp;
 	wc.qp = &qp->ibqp;
 	/* Signal solicited completion event. */
 	/* Signal solicited completion event. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
 	ret = 0;
 	ret = 0;
 bail:
 bail:
 	return ret;
 	return ret;
@@ -136,19 +136,19 @@ bail:
  *
  *
  * Can be called from interrupt level.
  * Can be called from interrupt level.
  */
  */
-int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
+int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
-	struct qib_rq *rq;
-	struct qib_rwq *wq;
-	struct qib_srq *srq;
-	struct qib_rwqe *wqe;
+	struct rvt_rq *rq;
+	struct rvt_rwq *wq;
+	struct rvt_srq *srq;
+	struct rvt_rwqe *wqe;
 	void (*handler)(struct ib_event *, void *);
 	void (*handler)(struct ib_event *, void *);
 	u32 tail;
 	u32 tail;
 	int ret;
 	int ret;
 
 
 	if (qp->ibqp.srq) {
 	if (qp->ibqp.srq) {
-		srq = to_isrq(qp->ibqp.srq);
+		srq = ibsrq_to_rvtsrq(qp->ibqp.srq);
 		handler = srq->ibsrq.event_handler;
 		handler = srq->ibsrq.event_handler;
 		rq = &srq->rq;
 		rq = &srq->rq;
 	} else {
 	} else {
@@ -158,7 +158,7 @@ int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
 	}
 	}
 
 
 	spin_lock_irqsave(&rq->lock, flags);
 	spin_lock_irqsave(&rq->lock, flags);
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
 		ret = 0;
 		ret = 0;
 		goto unlock;
 		goto unlock;
 	}
 	}
@@ -174,7 +174,7 @@ int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
 	}
 	}
 	/* Make sure entry is read after head index is read. */
 	/* Make sure entry is read after head index is read. */
 	smp_rmb();
 	smp_rmb();
-	wqe = get_rwqe_ptr(rq, tail);
+	wqe = rvt_get_rwqe_ptr(rq, tail);
 	/*
 	/*
 	 * Even though we update the tail index in memory, the verbs
 	 * Even though we update the tail index in memory, the verbs
 	 * consumer is not supposed to post more entries until a
 	 * consumer is not supposed to post more entries until a
@@ -190,7 +190,7 @@ int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
 	qp->r_wr_id = wqe->wr_id;
 	qp->r_wr_id = wqe->wr_id;
 
 
 	ret = 1;
 	ret = 1;
-	set_bit(QIB_R_WRID_VALID, &qp->r_aflags);
+	set_bit(RVT_R_WRID_VALID, &qp->r_aflags);
 	if (handler) {
 	if (handler) {
 		u32 n;
 		u32 n;
 
 
@@ -227,7 +227,7 @@ bail:
  * Switch to alternate path.
  * Switch to alternate path.
  * The QP s_lock should be held and interrupts disabled.
  * The QP s_lock should be held and interrupts disabled.
  */
  */
-void qib_migrate_qp(struct qib_qp *qp)
+void qib_migrate_qp(struct rvt_qp *qp)
 {
 {
 	struct ib_event ev;
 	struct ib_event ev;
 
 
@@ -266,7 +266,7 @@ static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
  * The s_lock will be acquired around the qib_migrate_qp() call.
  * The s_lock will be acquired around the qib_migrate_qp() call.
  */
  */
 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		      int has_grh, struct qib_qp *qp, u32 bth0)
+		      int has_grh, struct rvt_qp *qp, u32 bth0)
 {
 {
 	__be64 guid;
 	__be64 guid;
 	unsigned long flags;
 	unsigned long flags;
@@ -279,7 +279,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 			if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
 			if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
 				goto err;
 				goto err;
 			guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
 			guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid,
+				    ibp->rvp.gid_prefix, guid))
 				goto err;
 				goto err;
 			if (!gid_ok(&hdr->u.l.grh.sgid,
 			if (!gid_ok(&hdr->u.l.grh.sgid,
 			    qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
 			    qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
@@ -311,7 +312,8 @@ int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 				goto err;
 				goto err;
 			guid = get_sguid(ibp,
 			guid = get_sguid(ibp,
 					 qp->remote_ah_attr.grh.sgid_index);
 					 qp->remote_ah_attr.grh.sgid_index);
-			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+			if (!gid_ok(&hdr->u.l.grh.dgid,
+				    ibp->rvp.gid_prefix, guid))
 				goto err;
 				goto err;
 			if (!gid_ok(&hdr->u.l.grh.sgid,
 			if (!gid_ok(&hdr->u.l.grh.sgid,
 			    qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
 			    qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
@@ -353,12 +355,15 @@ err:
  * receive interrupts since this is a connected protocol and all packets
  * receive interrupts since this is a connected protocol and all packets
  * will pass through here.
  * will pass through here.
  */
  */
-static void qib_ruc_loopback(struct qib_qp *sqp)
+static void qib_ruc_loopback(struct rvt_qp *sqp)
 {
 {
 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct qib_qp *qp;
-	struct qib_swqe *wqe;
-	struct qib_sge *sge;
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+	struct rvt_qp *qp;
+	struct rvt_swqe *wqe;
+	struct rvt_sge *sge;
 	unsigned long flags;
 	unsigned long flags;
 	struct ib_wc wc;
 	struct ib_wc wc;
 	u64 sdata;
 	u64 sdata;
@@ -367,29 +372,33 @@ static void qib_ruc_loopback(struct qib_qp *sqp)
 	int release;
 	int release;
 	int ret;
 	int ret;
 
 
+	rcu_read_lock();
 	/*
 	/*
 	 * Note that we check the responder QP state after
 	 * Note that we check the responder QP state after
 	 * checking the requester's state.
 	 * checking the requester's state.
 	 */
 	 */
-	qp = qib_lookup_qpn(ibp, sqp->remote_qpn);
+	qp = rvt_lookup_qpn(rdi, &ibp->rvp, sqp->remote_qpn);
+	if (!qp)
+		goto done;
 
 
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
 
 	/* Return if we are already busy processing a work request. */
 	/* Return if we are already busy processing a work request. */
-	if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) ||
-	    !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+	if ((sqp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT)) ||
+	    !(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		goto unlock;
 		goto unlock;
 
 
-	sqp->s_flags |= QIB_S_BUSY;
+	sqp->s_flags |= RVT_S_BUSY;
 
 
 again:
 again:
-	if (sqp->s_last == sqp->s_head)
+	smp_read_barrier_depends(); /* see post_one_send() */
+	if (sqp->s_last == ACCESS_ONCE(sqp->s_head))
 		goto clr_busy;
 		goto clr_busy;
-	wqe = get_swqe_ptr(sqp, sqp->s_last);
+	wqe = rvt_get_swqe_ptr(sqp, sqp->s_last);
 
 
 	/* Return if it is not OK to start a new work reqeust. */
 	/* Return if it is not OK to start a new work reqeust. */
-	if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[sqp->state] & RVT_FLUSH_SEND))
 			goto clr_busy;
 			goto clr_busy;
 		/* We are in the error state, flush the work request. */
 		/* We are in the error state, flush the work request. */
 		send_status = IB_WC_WR_FLUSH_ERR;
 		send_status = IB_WC_WR_FLUSH_ERR;
@@ -407,9 +416,9 @@ again:
 	}
 	}
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 
 
-	if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) ||
+	if (!qp || !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) ||
 	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
 	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
 		/*
 		/*
 		 * For RC, the requester would timeout and retry so
 		 * For RC, the requester would timeout and retry so
 		 * shortcut the timeouts and just signal too many retries.
 		 * shortcut the timeouts and just signal too many retries.
@@ -458,7 +467,7 @@ again:
 			goto inv_err;
 			goto inv_err;
 		if (wqe->length == 0)
 		if (wqe->length == 0)
 			break;
 			break;
-		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
 					  wqe->rdma_wr.remote_addr,
 					  wqe->rdma_wr.remote_addr,
 					  wqe->rdma_wr.rkey,
 					  wqe->rdma_wr.rkey,
 					  IB_ACCESS_REMOTE_WRITE)))
 					  IB_ACCESS_REMOTE_WRITE)))
@@ -471,7 +480,7 @@ again:
 	case IB_WR_RDMA_READ:
 	case IB_WR_RDMA_READ:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
 			goto inv_err;
 			goto inv_err;
-		if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+		if (unlikely(!rvt_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
 					  wqe->rdma_wr.remote_addr,
 					  wqe->rdma_wr.remote_addr,
 					  wqe->rdma_wr.rkey,
 					  wqe->rdma_wr.rkey,
 					  IB_ACCESS_REMOTE_READ)))
 					  IB_ACCESS_REMOTE_READ)))
@@ -489,7 +498,7 @@ again:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
 			goto inv_err;
 			goto inv_err;
-		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+		if (unlikely(!rvt_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
 					  wqe->atomic_wr.remote_addr,
 					  wqe->atomic_wr.remote_addr,
 					  wqe->atomic_wr.rkey,
 					  wqe->atomic_wr.rkey,
 					  IB_ACCESS_REMOTE_ATOMIC)))
 					  IB_ACCESS_REMOTE_ATOMIC)))
@@ -502,7 +511,7 @@ again:
 			(u64) atomic64_add_return(sdata, maddr) - sdata :
 			(u64) atomic64_add_return(sdata, maddr) - sdata :
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
 			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
 				      sdata, wqe->atomic_wr.swap);
 				      sdata, wqe->atomic_wr.swap);
-		qib_put_mr(qp->r_sge.sge.mr);
+		rvt_put_mr(qp->r_sge.sge.mr);
 		qp->r_sge.num_sge = 0;
 		qp->r_sge.num_sge = 0;
 		goto send_comp;
 		goto send_comp;
 
 
@@ -526,11 +535,11 @@ again:
 		sge->sge_length -= len;
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 		if (sge->sge_length == 0) {
 			if (!release)
 			if (!release)
-				qib_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--sqp->s_sge.num_sge)
 			if (--sqp->s_sge.num_sge)
 				*sge = *sqp->s_sge.sg_list++;
 				*sge = *sqp->s_sge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 					break;
 				sge->n = 0;
 				sge->n = 0;
@@ -543,9 +552,9 @@ again:
 		sqp->s_len -= len;
 		sqp->s_len -= len;
 	}
 	}
 	if (release)
 	if (release)
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 
 
-	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto send_comp;
 		goto send_comp;
 
 
 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
 	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
@@ -561,12 +570,12 @@ again:
 	wc.sl = qp->remote_ah_attr.sl;
 	wc.sl = qp->remote_ah_attr.sl;
 	wc.port_num = 1;
 	wc.port_num = 1;
 	/* Signal completion event if the solicited bit is set. */
 	/* Signal completion event if the solicited bit is set. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
-		       wqe->wr.send_flags & IB_SEND_SOLICITED);
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
+		     wqe->wr.send_flags & IB_SEND_SOLICITED);
 
 
 send_comp:
 send_comp:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	ibp->n_loop_pkts++;
+	ibp->rvp.n_loop_pkts++;
 flush_send:
 flush_send:
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
 	qib_send_complete(sqp, wqe, send_status);
 	qib_send_complete(sqp, wqe, send_status);
@@ -576,7 +585,7 @@ rnr_nak:
 	/* Handle RNR NAK */
 	/* Handle RNR NAK */
 	if (qp->ibqp.qp_type == IB_QPT_UC)
 	if (qp->ibqp.qp_type == IB_QPT_UC)
 		goto send_comp;
 		goto send_comp;
-	ibp->n_rnr_naks++;
+	ibp->rvp.n_rnr_naks++;
 	/*
 	/*
 	 * Note: we don't need the s_lock held since the BUSY flag
 	 * Note: we don't need the s_lock held since the BUSY flag
 	 * makes this single threaded.
 	 * makes this single threaded.
@@ -588,9 +597,9 @@ rnr_nak:
 	if (sqp->s_rnr_retry_cnt < 7)
 	if (sqp->s_rnr_retry_cnt < 7)
 		sqp->s_rnr_retry--;
 		sqp->s_rnr_retry--;
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	spin_lock_irqsave(&sqp->s_lock, flags);
-	if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK))
+	if (!(ib_rvt_state_ops[sqp->state] & RVT_PROCESS_RECV_OK))
 		goto clr_busy;
 		goto clr_busy;
-	sqp->s_flags |= QIB_S_WAIT_RNR;
+	sqp->s_flags |= RVT_S_WAIT_RNR;
 	sqp->s_timer.function = qib_rc_rnr_retry;
 	sqp->s_timer.function = qib_rc_rnr_retry;
 	sqp->s_timer.expires = jiffies +
 	sqp->s_timer.expires = jiffies +
 		usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
 		usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
@@ -618,9 +627,9 @@ serr:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	qib_send_complete(sqp, wqe, send_status);
 	qib_send_complete(sqp, wqe, send_status);
 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
 	if (sqp->ibqp.qp_type == IB_QPT_RC) {
-		int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+		int lastwqe = rvt_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
 
 
-		sqp->s_flags &= ~QIB_S_BUSY;
+		sqp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		if (lastwqe) {
 		if (lastwqe) {
 			struct ib_event ev;
 			struct ib_event ev;
@@ -633,12 +642,11 @@ serr:
 		goto done;
 		goto done;
 	}
 	}
 clr_busy:
 clr_busy:
-	sqp->s_flags &= ~QIB_S_BUSY;
+	sqp->s_flags &= ~RVT_S_BUSY;
 unlock:
 unlock:
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 	spin_unlock_irqrestore(&sqp->s_lock, flags);
 done:
 done:
-	if (qp && atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
+	rcu_read_unlock();
 }
 }
 
 
 /**
 /**
@@ -663,7 +671,7 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 	hdr->next_hdr = IB_GRH_NEXT_HDR;
 	hdr->next_hdr = IB_GRH_NEXT_HDR;
 	hdr->hop_limit = grh->hop_limit;
 	hdr->hop_limit = grh->hop_limit;
 	/* The SGID is 32-bit aligned. */
 	/* The SGID is 32-bit aligned. */
-	hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+	hdr->sgid.global.subnet_prefix = ibp->rvp.gid_prefix;
 	hdr->sgid.global.interface_id = grh->sgid_index ?
 	hdr->sgid.global.interface_id = grh->sgid_index ?
 		ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid;
 		ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid;
 	hdr->dgid = grh->dgid;
 	hdr->dgid = grh->dgid;
@@ -672,9 +680,10 @@ u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 	return sizeof(struct ib_grh) / sizeof(u32);
 	return sizeof(struct ib_grh) / sizeof(u32);
 }
 }
 
 
-void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
 			 u32 bth0, u32 bth2)
 			 u32 bth0, u32 bth2)
 {
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	u16 lrh0;
 	u16 lrh0;
 	u32 nwords;
 	u32 nwords;
@@ -685,17 +694,18 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
 	nwords = (qp->s_cur_size + extra_bytes) >> 2;
 	lrh0 = QIB_LRH_BTH;
 	lrh0 = QIB_LRH_BTH;
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
 	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
-		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+		qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
 					       &qp->remote_ah_attr.grh,
 					       &qp->remote_ah_attr.grh,
 					       qp->s_hdrwords, nwords);
 					       qp->s_hdrwords, nwords);
 		lrh0 = QIB_LRH_GRH;
 		lrh0 = QIB_LRH_GRH;
 	}
 	}
 	lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
 	lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
 		qp->remote_ah_attr.sl << 4;
 		qp->remote_ah_attr.sl << 4;
-	qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
-	qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
-	qp->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+	priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+	priv->s_hdr->lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+	priv->s_hdr->lrh[2] =
+			cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	priv->s_hdr->lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
 				       qp->remote_ah_attr.src_path_bits);
 				       qp->remote_ah_attr.src_path_bits);
 	bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
 	bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
 	bth0 |= extra_bytes << 20;
 	bth0 |= extra_bytes << 20;
@@ -707,20 +717,29 @@ void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
 	this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 	this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 }
 }
 
 
+void _qib_do_send(struct work_struct *work)
+{
+	struct qib_qp_priv *priv = container_of(work, struct qib_qp_priv,
+						s_work);
+	struct rvt_qp *qp = priv->owner;
+
+	qib_do_send(qp);
+}
+
 /**
 /**
  * qib_do_send - perform a send on a QP
  * qib_do_send - perform a send on a QP
- * @work: contains a pointer to the QP
+ * @qp: pointer to the QP
  *
  *
  * Process entries in the send work queue until credit or queue is
  * Process entries in the send work queue until credit or queue is
  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
  * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
  * Otherwise, two threads could send packets out of order.
  * Otherwise, two threads could send packets out of order.
  */
  */
-void qib_do_send(struct work_struct *work)
+void qib_do_send(struct rvt_qp *qp)
 {
 {
-	struct qib_qp *qp = container_of(work, struct qib_qp, s_work);
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-	int (*make_req)(struct qib_qp *qp);
+	int (*make_req)(struct rvt_qp *qp);
 	unsigned long flags;
 	unsigned long flags;
 
 
 	if ((qp->ibqp.qp_type == IB_QPT_RC ||
 	if ((qp->ibqp.qp_type == IB_QPT_RC ||
@@ -745,50 +764,59 @@ void qib_do_send(struct work_struct *work)
 		return;
 		return;
 	}
 	}
 
 
-	qp->s_flags |= QIB_S_BUSY;
-
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags |= RVT_S_BUSY;
 
 
 	do {
 	do {
 		/* Check for a constructed packet to be sent. */
 		/* Check for a constructed packet to be sent. */
 		if (qp->s_hdrwords != 0) {
 		if (qp->s_hdrwords != 0) {
+			spin_unlock_irqrestore(&qp->s_lock, flags);
 			/*
 			/*
 			 * If the packet cannot be sent now, return and
 			 * If the packet cannot be sent now, return and
 			 * the send tasklet will be woken up later.
 			 * the send tasklet will be woken up later.
 			 */
 			 */
-			if (qib_verbs_send(qp, qp->s_hdr, qp->s_hdrwords,
+			if (qib_verbs_send(qp, priv->s_hdr, qp->s_hdrwords,
 					   qp->s_cur_sge, qp->s_cur_size))
 					   qp->s_cur_sge, qp->s_cur_size))
-				break;
+				return;
 			/* Record that s_hdr is empty. */
 			/* Record that s_hdr is empty. */
 			qp->s_hdrwords = 0;
 			qp->s_hdrwords = 0;
+			spin_lock_irqsave(&qp->s_lock, flags);
 		}
 		}
 	} while (make_req(qp));
 	} while (make_req(qp));
+
+	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 }
 
 
 /*
 /*
  * This should be called with s_lock held.
  * This should be called with s_lock held.
  */
  */
-void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 		       enum ib_wc_status status)
 		       enum ib_wc_status status)
 {
 {
 	u32 old_last, last;
 	u32 old_last, last;
 	unsigned i;
 	unsigned i;
 
 
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_OR_FLUSH_SEND))
 		return;
 		return;
 
 
+	last = qp->s_last;
+	old_last = last;
+	if (++last >= qp->s_size)
+		last = 0;
+	qp->s_last = last;
+	/* See post_send() */
+	barrier();
 	for (i = 0; i < wqe->wr.num_sge; i++) {
 	for (i = 0; i < wqe->wr.num_sge; i++) {
-		struct qib_sge *sge = &wqe->sg_list[i];
+		struct rvt_sge *sge = &wqe->sg_list[i];
 
 
-		qib_put_mr(sge->mr);
+		rvt_put_mr(sge->mr);
 	}
 	}
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	if (qp->ibqp.qp_type == IB_QPT_UD ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_SMI ||
 	    qp->ibqp.qp_type == IB_QPT_GSI)
 	    qp->ibqp.qp_type == IB_QPT_GSI)
-		atomic_dec(&to_iah(wqe->ud_wr.ah)->refcount);
+		atomic_dec(&ibah_to_rvtah(wqe->ud_wr.ah)->refcount);
 
 
 	/* See ch. 11.2.4.1 and 10.7.3.1 */
 	/* See ch. 11.2.4.1 and 10.7.3.1 */
-	if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+	if (!(qp->s_flags & RVT_S_SIGNAL_REQ_WR) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
 	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
 	    status != IB_WC_SUCCESS) {
 	    status != IB_WC_SUCCESS) {
 		struct ib_wc wc;
 		struct ib_wc wc;
@@ -800,15 +828,10 @@ void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
 		wc.qp = &qp->ibqp;
 		wc.qp = &qp->ibqp;
 		if (status == IB_WC_SUCCESS)
 		if (status == IB_WC_SUCCESS)
 			wc.byte_len = wqe->length;
 			wc.byte_len = wqe->length;
-		qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.send_cq), &wc,
 			     status != IB_WC_SUCCESS);
 			     status != IB_WC_SUCCESS);
 	}
 	}
 
 
-	last = qp->s_last;
-	old_last = last;
-	if (++last >= qp->s_size)
-		last = 0;
-	qp->s_last = last;
 	if (qp->s_acked == old_last)
 	if (qp->s_acked == old_last)
 		qp->s_acked = last;
 		qp->s_acked = last;
 	if (qp->s_cur == old_last)
 	if (qp->s_cur == old_last)

+ 23 - 18
drivers/infiniband/hw/qib/qib_sdma.c

@@ -513,7 +513,9 @@ int qib_sdma_running(struct qib_pportdata *ppd)
 static void complete_sdma_err_req(struct qib_pportdata *ppd,
 static void complete_sdma_err_req(struct qib_pportdata *ppd,
 				  struct qib_verbs_txreq *tx)
 				  struct qib_verbs_txreq *tx)
 {
 {
-	atomic_inc(&tx->qp->s_dma_busy);
+	struct qib_qp_priv *priv = tx->qp->priv;
+
+	atomic_inc(&priv->s_dma_busy);
 	/* no sdma descriptors, so no unmap_desc */
 	/* no sdma descriptors, so no unmap_desc */
 	tx->txreq.start_idx = 0;
 	tx->txreq.start_idx = 0;
 	tx->txreq.next_descq_idx = 0;
 	tx->txreq.next_descq_idx = 0;
@@ -531,18 +533,19 @@ static void complete_sdma_err_req(struct qib_pportdata *ppd,
  * 3) The SGE addresses are suitable for passing to dma_map_single().
  * 3) The SGE addresses are suitable for passing to dma_map_single().
  */
  */
 int qib_sdma_verbs_send(struct qib_pportdata *ppd,
 int qib_sdma_verbs_send(struct qib_pportdata *ppd,
-			struct qib_sge_state *ss, u32 dwords,
+			struct rvt_sge_state *ss, u32 dwords,
 			struct qib_verbs_txreq *tx)
 			struct qib_verbs_txreq *tx)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
-	struct qib_sge *sge;
-	struct qib_qp *qp;
+	struct rvt_sge *sge;
+	struct rvt_qp *qp;
 	int ret = 0;
 	int ret = 0;
 	u16 tail;
 	u16 tail;
 	__le64 *descqp;
 	__le64 *descqp;
 	u64 sdmadesc[2];
 	u64 sdmadesc[2];
 	u32 dwoffset;
 	u32 dwoffset;
 	dma_addr_t addr;
 	dma_addr_t addr;
+	struct qib_qp_priv *priv;
 
 
 	spin_lock_irqsave(&ppd->sdma_lock, flags);
 	spin_lock_irqsave(&ppd->sdma_lock, flags);
 
 
@@ -621,7 +624,7 @@ retry:
 			if (--ss->num_sge)
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 					break;
 				sge->n = 0;
 				sge->n = 0;
@@ -644,8 +647,8 @@ retry:
 		descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD);
 		descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD);
 	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ)
 	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ)
 		descqp[0] |= cpu_to_le64(SDMA_DESC_INTR);
 		descqp[0] |= cpu_to_le64(SDMA_DESC_INTR);
-
-	atomic_inc(&tx->qp->s_dma_busy);
+	priv = tx->qp->priv;
+	atomic_inc(&priv->s_dma_busy);
 	tx->txreq.next_descq_idx = tail;
 	tx->txreq.next_descq_idx = tail;
 	ppd->dd->f_sdma_update_tail(ppd, tail);
 	ppd->dd->f_sdma_update_tail(ppd, tail);
 	ppd->sdma_descq_added += tx->txreq.sg_count;
 	ppd->sdma_descq_added += tx->txreq.sg_count;
@@ -663,13 +666,14 @@ unmap:
 		unmap_desc(ppd, tail);
 		unmap_desc(ppd, tail);
 	}
 	}
 	qp = tx->qp;
 	qp = tx->qp;
+	priv = qp->priv;
 	qib_put_txreq(tx);
 	qib_put_txreq(tx);
 	spin_lock(&qp->r_lock);
 	spin_lock(&qp->r_lock);
 	spin_lock(&qp->s_lock);
 	spin_lock(&qp->s_lock);
 	if (qp->ibqp.qp_type == IB_QPT_RC) {
 	if (qp->ibqp.qp_type == IB_QPT_RC) {
 		/* XXX what about error sending RDMA read responses? */
 		/* XXX what about error sending RDMA read responses? */
-		if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)
-			qib_error_qp(qp, IB_WC_GENERAL_ERR);
+		if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)
+			rvt_error_qp(qp, IB_WC_GENERAL_ERR);
 	} else if (qp->s_wqe)
 	} else if (qp->s_wqe)
 		qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
 		qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
 	spin_unlock(&qp->s_lock);
 	spin_unlock(&qp->s_lock);
@@ -679,8 +683,9 @@ unmap:
 
 
 busy:
 busy:
 	qp = tx->qp;
 	qp = tx->qp;
+	priv = qp->priv;
 	spin_lock(&qp->s_lock);
 	spin_lock(&qp->s_lock);
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
 		struct qib_ibdev *dev;
 		struct qib_ibdev *dev;
 
 
 		/*
 		/*
@@ -690,19 +695,19 @@ busy:
 		 */
 		 */
 		tx->ss = ss;
 		tx->ss = ss;
 		tx->dwords = dwords;
 		tx->dwords = dwords;
-		qp->s_tx = tx;
+		priv->s_tx = tx;
 		dev = &ppd->dd->verbs_dev;
 		dev = &ppd->dd->verbs_dev;
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->iowait)) {
+		spin_lock(&dev->rdi.pending_lock);
+		if (list_empty(&priv->iowait)) {
 			struct qib_ibport *ibp;
 			struct qib_ibport *ibp;
 
 
 			ibp = &ppd->ibport_data;
 			ibp = &ppd->ibport_data;
-			ibp->n_dmawait++;
-			qp->s_flags |= QIB_S_WAIT_DMA_DESC;
-			list_add_tail(&qp->iowait, &dev->dmawait);
+			ibp->rvp.n_dmawait++;
+			qp->s_flags |= RVT_S_WAIT_DMA_DESC;
+			list_add_tail(&priv->iowait, &dev->dmawait);
 		}
 		}
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
 		spin_unlock(&qp->s_lock);
 		spin_unlock(&qp->s_lock);
 		ret = -EBUSY;
 		ret = -EBUSY;
 	} else {
 	} else {

+ 0 - 380
drivers/infiniband/hw/qib/qib_srq.c

@@ -1,380 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-#include "qib_verbs.h"
-
-/**
- * qib_post_srq_receive - post a receive on a shared receive queue
- * @ibsrq: the SRQ to post the receive on
- * @wr: the list of work requests to post
- * @bad_wr: A pointer to the first WR to cause a problem is put here
- *
- * This may be called from interrupt context.
- */
-int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-	struct qib_rwq *wq;
-	unsigned long flags;
-	int ret;
-
-	for (; wr; wr = wr->next) {
-		struct qib_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&srq->rq.lock, flags);
-		wq = srq->rq.wq;
-		next = wq->head + 1;
-		if (next >= srq->rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&srq->rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&srq->rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&srq->rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_create_srq - create a shared receive queue
- * @ibpd: the protection domain of the SRQ to create
- * @srq_init_attr: the attributes of the SRQ
- * @udata: data from libibverbs when creating a user SRQ
- */
-struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
-			      struct ib_srq_init_attr *srq_init_attr,
-			      struct ib_udata *udata)
-{
-	struct qib_ibdev *dev = to_idev(ibpd->device);
-	struct qib_srq *srq;
-	u32 sz;
-	struct ib_srq *ret;
-
-	if (srq_init_attr->srq_type != IB_SRQT_BASIC) {
-		ret = ERR_PTR(-ENOSYS);
-		goto done;
-	}
-
-	if (srq_init_attr->attr.max_sge == 0 ||
-	    srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
-	    srq_init_attr->attr.max_wr == 0 ||
-	    srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) {
-		ret = ERR_PTR(-EINVAL);
-		goto done;
-	}
-
-	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
-	if (!srq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto done;
-	}
-
-	/*
-	 * Need to use vmalloc() if we want to support large #s of entries.
-	 */
-	srq->rq.size = srq_init_attr->attr.max_wr + 1;
-	srq->rq.max_sge = srq_init_attr->attr.max_sge;
-	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
-		sizeof(struct qib_rwqe);
-	srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz);
-	if (!srq->rq.wq) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_srq;
-	}
-
-	/*
-	 * Return the address of the RWQ as the offset to mmap.
-	 * See qib_mmap() for details.
-	 */
-	if (udata && udata->outlen >= sizeof(__u64)) {
-		int err;
-		u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz;
-
-		srq->ip =
-		    qib_create_mmap_info(dev, s, ibpd->uobject->context,
-					 srq->rq.wq);
-		if (!srq->ip) {
-			ret = ERR_PTR(-ENOMEM);
-			goto bail_wq;
-		}
-
-		err = ib_copy_to_udata(udata, &srq->ip->offset,
-				       sizeof(srq->ip->offset));
-		if (err) {
-			ret = ERR_PTR(err);
-			goto bail_ip;
-		}
-	} else
-		srq->ip = NULL;
-
-	/*
-	 * ib_create_srq() will initialize srq->ibsrq.
-	 */
-	spin_lock_init(&srq->rq.lock);
-	srq->rq.wq->head = 0;
-	srq->rq.wq->tail = 0;
-	srq->limit = srq_init_attr->attr.srq_limit;
-
-	spin_lock(&dev->n_srqs_lock);
-	if (dev->n_srqs_allocated == ib_qib_max_srqs) {
-		spin_unlock(&dev->n_srqs_lock);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail_ip;
-	}
-
-	dev->n_srqs_allocated++;
-	spin_unlock(&dev->n_srqs_lock);
-
-	if (srq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
-	}
-
-	ret = &srq->ibsrq;
-	goto done;
-
-bail_ip:
-	kfree(srq->ip);
-bail_wq:
-	vfree(srq->rq.wq);
-bail_srq:
-	kfree(srq);
-done:
-	return ret;
-}
-
-/**
- * qib_modify_srq - modify a shared receive queue
- * @ibsrq: the SRQ to modify
- * @attr: the new attributes of the SRQ
- * @attr_mask: indicates which attributes to modify
- * @udata: user data for libibverbs.so
- */
-int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		   enum ib_srq_attr_mask attr_mask,
-		   struct ib_udata *udata)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-	struct qib_rwq *wq;
-	int ret = 0;
-
-	if (attr_mask & IB_SRQ_MAX_WR) {
-		struct qib_rwq *owq;
-		struct qib_rwqe *p;
-		u32 sz, size, n, head, tail;
-
-		/* Check that the requested sizes are below the limits. */
-		if ((attr->max_wr > ib_qib_max_srq_wrs) ||
-		    ((attr_mask & IB_SRQ_LIMIT) ?
-		     attr->srq_limit : srq->limit) > attr->max_wr) {
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		sz = sizeof(struct qib_rwqe) +
-			srq->rq.max_sge * sizeof(struct ib_sge);
-		size = attr->max_wr + 1;
-		wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz);
-		if (!wq) {
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		/* Check that we can write the offset to mmap. */
-		if (udata && udata->inlen >= sizeof(__u64)) {
-			__u64 offset_addr;
-			__u64 offset = 0;
-
-			ret = ib_copy_from_udata(&offset_addr, udata,
-						 sizeof(offset_addr));
-			if (ret)
-				goto bail_free;
-			udata->outbuf =
-				(void __user *) (unsigned long) offset_addr;
-			ret = ib_copy_to_udata(udata, &offset,
-					       sizeof(offset));
-			if (ret)
-				goto bail_free;
-		}
-
-		spin_lock_irq(&srq->rq.lock);
-		/*
-		 * validate head and tail pointer values and compute
-		 * the number of remaining WQEs.
-		 */
-		owq = srq->rq.wq;
-		head = owq->head;
-		tail = owq->tail;
-		if (head >= srq->rq.size || tail >= srq->rq.size) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = head;
-		if (n < tail)
-			n += srq->rq.size - tail;
-		else
-			n -= tail;
-		if (size <= n) {
-			ret = -EINVAL;
-			goto bail_unlock;
-		}
-		n = 0;
-		p = wq->wq;
-		while (tail != head) {
-			struct qib_rwqe *wqe;
-			int i;
-
-			wqe = get_rwqe_ptr(&srq->rq, tail);
-			p->wr_id = wqe->wr_id;
-			p->num_sge = wqe->num_sge;
-			for (i = 0; i < wqe->num_sge; i++)
-				p->sg_list[i] = wqe->sg_list[i];
-			n++;
-			p = (struct qib_rwqe *)((char *) p + sz);
-			if (++tail >= srq->rq.size)
-				tail = 0;
-		}
-		srq->rq.wq = wq;
-		srq->rq.size = size;
-		wq->head = n;
-		wq->tail = 0;
-		if (attr_mask & IB_SRQ_LIMIT)
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-
-		vfree(owq);
-
-		if (srq->ip) {
-			struct qib_mmap_info *ip = srq->ip;
-			struct qib_ibdev *dev = to_idev(srq->ibsrq.device);
-			u32 s = sizeof(struct qib_rwq) + size * sz;
-
-			qib_update_mmap_info(dev, ip, s, wq);
-
-			/*
-			 * Return the offset to mmap.
-			 * See qib_mmap() for details.
-			 */
-			if (udata && udata->inlen >= sizeof(__u64)) {
-				ret = ib_copy_to_udata(udata, &ip->offset,
-						       sizeof(ip->offset));
-				if (ret)
-					goto bail;
-			}
-
-			/*
-			 * Put user mapping info onto the pending list
-			 * unless it already is on the list.
-			 */
-			spin_lock_irq(&dev->pending_lock);
-			if (list_empty(&ip->pending_mmaps))
-				list_add(&ip->pending_mmaps,
-					 &dev->pending_mmaps);
-			spin_unlock_irq(&dev->pending_lock);
-		}
-	} else if (attr_mask & IB_SRQ_LIMIT) {
-		spin_lock_irq(&srq->rq.lock);
-		if (attr->srq_limit >= srq->rq.size)
-			ret = -EINVAL;
-		else
-			srq->limit = attr->srq_limit;
-		spin_unlock_irq(&srq->rq.lock);
-	}
-	goto bail;
-
-bail_unlock:
-	spin_unlock_irq(&srq->rq.lock);
-bail_free:
-	vfree(wq);
-bail:
-	return ret;
-}
-
-int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-
-	attr->max_wr = srq->rq.size - 1;
-	attr->max_sge = srq->rq.max_sge;
-	attr->srq_limit = srq->limit;
-	return 0;
-}
-
-/**
- * qib_destroy_srq - destroy a shared receive queue
- * @ibsrq: the SRQ to destroy
- */
-int qib_destroy_srq(struct ib_srq *ibsrq)
-{
-	struct qib_srq *srq = to_isrq(ibsrq);
-	struct qib_ibdev *dev = to_idev(ibsrq->device);
-
-	spin_lock(&dev->n_srqs_lock);
-	dev->n_srqs_allocated--;
-	spin_unlock(&dev->n_srqs_lock);
-	if (srq->ip)
-		kref_put(&srq->ip->ref, qib_release_mmap_info);
-	else
-		vfree(srq->rq.wq);
-	kfree(srq);
-
-	return 0;
-}

+ 69 - 16
drivers/infiniband/hw/qib/qib_sysfs.c

@@ -406,7 +406,13 @@ static struct kobj_type qib_sl2vl_ktype = {
 #define QIB_DIAGC_ATTR(N) \
 #define QIB_DIAGC_ATTR(N) \
 	static struct qib_diagc_attr qib_diagc_attr_##N = { \
 	static struct qib_diagc_attr qib_diagc_attr_##N = { \
 		.attr = { .name = __stringify(N), .mode = 0664 }, \
 		.attr = { .name = __stringify(N), .mode = 0664 }, \
-		.counter = offsetof(struct qib_ibport, n_##N) \
+		.counter = offsetof(struct qib_ibport, rvp.n_##N) \
+	}
+
+#define QIB_DIAGC_ATTR_PER_CPU(N) \
+	static struct qib_diagc_attr qib_diagc_attr_##N = { \
+		.attr = { .name = __stringify(N), .mode = 0664 }, \
+		.counter = offsetof(struct qib_ibport, rvp.z_##N) \
 	}
 	}
 
 
 struct qib_diagc_attr {
 struct qib_diagc_attr {
@@ -414,10 +420,11 @@ struct qib_diagc_attr {
 	size_t counter;
 	size_t counter;
 };
 };
 
 
+QIB_DIAGC_ATTR_PER_CPU(rc_acks);
+QIB_DIAGC_ATTR_PER_CPU(rc_qacks);
+QIB_DIAGC_ATTR_PER_CPU(rc_delayed_comp);
+
 QIB_DIAGC_ATTR(rc_resends);
 QIB_DIAGC_ATTR(rc_resends);
-QIB_DIAGC_ATTR(rc_acks);
-QIB_DIAGC_ATTR(rc_qacks);
-QIB_DIAGC_ATTR(rc_delayed_comp);
 QIB_DIAGC_ATTR(seq_naks);
 QIB_DIAGC_ATTR(seq_naks);
 QIB_DIAGC_ATTR(rdma_seq);
 QIB_DIAGC_ATTR(rdma_seq);
 QIB_DIAGC_ATTR(rnr_naks);
 QIB_DIAGC_ATTR(rnr_naks);
@@ -449,6 +456,35 @@ static struct attribute *diagc_default_attributes[] = {
 	NULL
 	NULL
 };
 };
 
 
+static u64 get_all_cpu_total(u64 __percpu *cntr)
+{
+	int cpu;
+	u64 counter = 0;
+
+	for_each_possible_cpu(cpu)
+		counter += *per_cpu_ptr(cntr, cpu);
+	return counter;
+}
+
+#define def_write_per_cpu(cntr) \
+static void write_per_cpu_##cntr(struct qib_pportdata *ppd, u32 data)	\
+{									\
+	struct qib_devdata *dd = ppd->dd;				\
+	struct qib_ibport *qibp = &ppd->ibport_data;			\
+	/*  A write can only zero the counter */			\
+	if (data == 0)							\
+		qibp->rvp.z_##cntr = get_all_cpu_total(qibp->rvp.cntr); \
+	else								\
+		qib_dev_err(dd, "Per CPU cntrs can only be zeroed");	\
+}
+
+def_write_per_cpu(rc_acks)
+def_write_per_cpu(rc_qacks)
+def_write_per_cpu(rc_delayed_comp)
+
+#define READ_PER_CPU_CNTR(cntr) (get_all_cpu_total(qibp->rvp.cntr) - \
+							qibp->rvp.z_##cntr)
+
 static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
 static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
 			       char *buf)
 			       char *buf)
 {
 {
@@ -458,7 +494,16 @@ static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
 		container_of(kobj, struct qib_pportdata, diagc_kobj);
 		container_of(kobj, struct qib_pportdata, diagc_kobj);
 	struct qib_ibport *qibp = &ppd->ibport_data;
 	struct qib_ibport *qibp = &ppd->ibport_data;
 
 
-	return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter));
+	if (!strncmp(dattr->attr.name, "rc_acks", 7))
+		return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_acks));
+	else if (!strncmp(dattr->attr.name, "rc_qacks", 8))
+		return sprintf(buf, "%llu\n", READ_PER_CPU_CNTR(rc_qacks));
+	else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15))
+		return sprintf(buf, "%llu\n",
+					READ_PER_CPU_CNTR(rc_delayed_comp));
+	else
+		return sprintf(buf, "%u\n",
+				*(u32 *)((char *)qibp + dattr->counter));
 }
 }
 
 
 static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
 static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
@@ -475,7 +520,15 @@ static ssize_t diagc_attr_store(struct kobject *kobj, struct attribute *attr,
 	ret = kstrtou32(buf, 0, &val);
 	ret = kstrtou32(buf, 0, &val);
 	if (ret)
 	if (ret)
 		return ret;
 		return ret;
-	*(u32 *)((char *) qibp + dattr->counter) = val;
+
+	if (!strncmp(dattr->attr.name, "rc_acks", 7))
+		write_per_cpu_rc_acks(ppd, val);
+	else if (!strncmp(dattr->attr.name, "rc_qacks", 8))
+		write_per_cpu_rc_qacks(ppd, val);
+	else if (!strncmp(dattr->attr.name, "rc_delayed_comp", 15))
+		write_per_cpu_rc_delayed_comp(ppd, val);
+	else
+		*(u32 *)((char *)qibp + dattr->counter) = val;
 	return size;
 	return size;
 }
 }
 
 
@@ -502,7 +555,7 @@ static ssize_t show_rev(struct device *device, struct device_attribute *attr,
 			char *buf)
 			char *buf)
 {
 {
 	struct qib_ibdev *dev =
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 
 
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
 }
 }
@@ -511,7 +564,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 			char *buf)
 			char *buf)
 {
 {
 	struct qib_ibdev *dev =
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 	int ret;
 
 
@@ -533,7 +586,7 @@ static ssize_t show_boardversion(struct device *device,
 				 struct device_attribute *attr, char *buf)
 				 struct device_attribute *attr, char *buf)
 {
 {
 	struct qib_ibdev *dev =
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 
 	/* The string printed here is already newline-terminated. */
 	/* The string printed here is already newline-terminated. */
@@ -545,7 +598,7 @@ static ssize_t show_localbus_info(struct device *device,
 				  struct device_attribute *attr, char *buf)
 				  struct device_attribute *attr, char *buf)
 {
 {
 	struct qib_ibdev *dev =
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 
 	/* The string printed here is already newline-terminated. */
 	/* The string printed here is already newline-terminated. */
@@ -557,7 +610,7 @@ static ssize_t show_nctxts(struct device *device,
 			   struct device_attribute *attr, char *buf)
 			   struct device_attribute *attr, char *buf)
 {
 {
 	struct qib_ibdev *dev =
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 
 	/* Return the number of user ports (contexts) available. */
 	/* Return the number of user ports (contexts) available. */
@@ -572,7 +625,7 @@ static ssize_t show_nfreectxts(struct device *device,
 			   struct device_attribute *attr, char *buf)
 			   struct device_attribute *attr, char *buf)
 {
 {
 	struct qib_ibdev *dev =
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 
 	/* Return the number of free user ports (contexts) available. */
 	/* Return the number of free user ports (contexts) available. */
@@ -583,7 +636,7 @@ static ssize_t show_serial(struct device *device,
 			   struct device_attribute *attr, char *buf)
 			   struct device_attribute *attr, char *buf)
 {
 {
 	struct qib_ibdev *dev =
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 
 
 	buf[sizeof(dd->serial)] = '\0';
 	buf[sizeof(dd->serial)] = '\0';
@@ -597,7 +650,7 @@ static ssize_t store_chip_reset(struct device *device,
 				size_t count)
 				size_t count)
 {
 {
 	struct qib_ibdev *dev =
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 	int ret;
 
 
@@ -618,7 +671,7 @@ static ssize_t show_tempsense(struct device *device,
 			      struct device_attribute *attr, char *buf)
 			      struct device_attribute *attr, char *buf)
 {
 {
 	struct qib_ibdev *dev =
 	struct qib_ibdev *dev =
-		container_of(device, struct qib_ibdev, ibdev.dev);
+		container_of(device, struct qib_ibdev, rdi.ibdev.dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	int ret;
 	int ret;
 	int idx;
 	int idx;
@@ -778,7 +831,7 @@ bail:
  */
  */
 int qib_verbs_register_sysfs(struct qib_devdata *dd)
 int qib_verbs_register_sysfs(struct qib_devdata *dd)
 {
 {
-	struct ib_device *dev = &dd->verbs_dev.ibdev;
+	struct ib_device *dev = &dd->verbs_dev.rdi.ibdev;
 	int i, ret;
 	int i, ret;
 
 
 	for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
 	for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {

+ 38 - 41
drivers/infiniband/hw/qib/qib_uc.c

@@ -41,61 +41,62 @@
  * qib_make_uc_req - construct a request packet (SEND, RDMA write)
  * qib_make_uc_req - construct a request packet (SEND, RDMA write)
  * @qp: a pointer to the QP
  * @qp: a pointer to the QP
  *
  *
+ * Assumes the s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  * Return 1 if constructed; otherwise, return 0.
  */
  */
-int qib_make_uc_req(struct qib_qp *qp)
+int qib_make_uc_req(struct rvt_qp *qp)
 {
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_other_headers *ohdr;
 	struct qib_other_headers *ohdr;
-	struct qib_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 hwords;
 	u32 hwords;
 	u32 bth0;
 	u32 bth0;
 	u32 len;
 	u32 len;
 	u32 pmtu = qp->pmtu;
 	u32 pmtu = qp->pmtu;
 	int ret = 0;
 	int ret = 0;
 
 
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
-		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 			goto bail;
 		/* We are in the error state, flush the work request. */
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= QIB_S_WAIT_DMA;
+		if (atomic_read(&priv->s_dma_busy)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 			goto bail;
 		}
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done;
 		goto done;
 	}
 	}
 
 
-	ohdr = &qp->s_hdr->u.oth;
+	ohdr = &priv->s_hdr->u.oth;
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
 	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
-		ohdr = &qp->s_hdr->u.l.oth;
+		ohdr = &priv->s_hdr->u.l.oth;
 
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 	hwords = 5;
 	bth0 = 0;
 	bth0 = 0;
 
 
 	/* Get the next send request. */
 	/* Get the next send request. */
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	qp->s_wqe = NULL;
 	qp->s_wqe = NULL;
 	switch (qp->s_state) {
 	switch (qp->s_state) {
 	default:
 	default:
-		if (!(ib_qib_state_ops[qp->state] &
-		    QIB_PROCESS_NEXT_SEND_OK))
+		if (!(ib_rvt_state_ops[qp->state] &
+		    RVT_PROCESS_NEXT_SEND_OK))
 			goto bail;
 			goto bail;
 		/* Check if send work queue is empty. */
 		/* Check if send work queue is empty. */
-		if (qp->s_cur == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send() */
+		if (qp->s_cur == ACCESS_ONCE(qp->s_head))
 			goto bail;
 			goto bail;
 		/*
 		/*
 		 * Start a new request.
 		 * Start a new request.
 		 */
 		 */
-		wqe->psn = qp->s_next_psn;
-		qp->s_psn = qp->s_next_psn;
+		qp->s_psn = wqe->psn;
 		qp->s_sge.sge = wqe->sg_list[0];
 		qp->s_sge.sge = wqe->sg_list[0];
 		qp->s_sge.sg_list = wqe->sg_list + 1;
 		qp->s_sge.sg_list = wqe->sg_list + 1;
 		qp->s_sge.num_sge = wqe->wr.num_sge;
 		qp->s_sge.num_sge = wqe->wr.num_sge;
@@ -214,15 +215,11 @@ int qib_make_uc_req(struct qib_qp *qp)
 	qp->s_cur_sge = &qp->s_sge;
 	qp->s_cur_sge = &qp->s_sge;
 	qp->s_cur_size = len;
 	qp->s_cur_size = len;
 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
 	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
-			    qp->s_next_psn++ & QIB_PSN_MASK);
+			    qp->s_psn++ & QIB_PSN_MASK);
 done:
 done:
-	ret = 1;
-	goto unlock;
-
+	return 1;
 bail:
 bail:
-	qp->s_flags &= ~QIB_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags &= ~RVT_S_BUSY;
 	return ret;
 	return ret;
 }
 }
 
 
@@ -240,7 +237,7 @@ unlock:
  * Called at interrupt level.
  * Called at interrupt level.
  */
  */
 void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 {
 	struct qib_other_headers *ohdr;
 	struct qib_other_headers *ohdr;
 	u32 opcode;
 	u32 opcode;
@@ -278,10 +275,10 @@ void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 inv:
 inv:
 		if (qp->r_state == OP(SEND_FIRST) ||
 		if (qp->r_state == OP(SEND_FIRST) ||
 		    qp->r_state == OP(SEND_MIDDLE)) {
 		    qp->r_state == OP(SEND_MIDDLE)) {
-			set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+			set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 			qp->r_sge.num_sge = 0;
 			qp->r_sge.num_sge = 0;
 		} else
 		} else
-			qib_put_ss(&qp->r_sge);
+			rvt_put_ss(&qp->r_sge);
 		qp->r_state = OP(SEND_LAST);
 		qp->r_state = OP(SEND_LAST);
 		switch (opcode) {
 		switch (opcode) {
 		case OP(SEND_FIRST):
 		case OP(SEND_FIRST):
@@ -328,8 +325,8 @@ inv:
 		goto inv;
 		goto inv;
 	}
 	}
 
 
-	if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
-		qp->r_flags |= QIB_R_COMM_EST;
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & RVT_R_COMM_EST)) {
+		qp->r_flags |= RVT_R_COMM_EST;
 		if (qp->ibqp.event_handler) {
 		if (qp->ibqp.event_handler) {
 			struct ib_event ev;
 			struct ib_event ev;
 
 
@@ -346,7 +343,7 @@ inv:
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
 	case OP(SEND_ONLY_WITH_IMMEDIATE):
 send_first:
 send_first:
-		if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
 			qp->r_sge = qp->s_rdma_read_sge;
 			qp->r_sge = qp->s_rdma_read_sge;
 		else {
 		else {
 			ret = qib_get_rwqe(qp, 0);
 			ret = qib_get_rwqe(qp, 0);
@@ -400,7 +397,7 @@ send_last:
 			goto rewind;
 			goto rewind;
 		wc.opcode = IB_WC_RECV;
 		wc.opcode = IB_WC_RECV;
 		qib_copy_sge(&qp->r_sge, data, tlen, 0);
 		qib_copy_sge(&qp->r_sge, data, tlen, 0);
-		qib_put_ss(&qp->s_rdma_read_sge);
+		rvt_put_ss(&qp->s_rdma_read_sge);
 last_imm:
 last_imm:
 		wc.wr_id = qp->r_wr_id;
 		wc.wr_id = qp->r_wr_id;
 		wc.status = IB_WC_SUCCESS;
 		wc.status = IB_WC_SUCCESS;
@@ -414,7 +411,7 @@ last_imm:
 		wc.dlid_path_bits = 0;
 		wc.dlid_path_bits = 0;
 		wc.port_num = 0;
 		wc.port_num = 0;
 		/* Signal completion event if the solicited bit is set. */
 		/* Signal completion event if the solicited bit is set. */
-		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+		rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 			     (ohdr->bth[0] &
 			     (ohdr->bth[0] &
 				cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 				cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 		break;
 		break;
@@ -438,7 +435,7 @@ rdma_first:
 			int ok;
 			int ok;
 
 
 			/* Check rkey */
 			/* Check rkey */
-			ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+			ok = rvt_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
 					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
 					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
 			if (unlikely(!ok))
 			if (unlikely(!ok))
 				goto drop;
 				goto drop;
@@ -483,8 +480,8 @@ rdma_last_imm:
 		tlen -= (hdrsize + pad + 4);
 		tlen -= (hdrsize + pad + 4);
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
 			goto drop;
-		if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
-			qib_put_ss(&qp->s_rdma_read_sge);
+		if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
+			rvt_put_ss(&qp->s_rdma_read_sge);
 		else {
 		else {
 			ret = qib_get_rwqe(qp, 1);
 			ret = qib_get_rwqe(qp, 1);
 			if (ret < 0)
 			if (ret < 0)
@@ -495,7 +492,7 @@ rdma_last_imm:
 		wc.byte_len = qp->r_len;
 		wc.byte_len = qp->r_len;
 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
 		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 		goto last_imm;
 		goto last_imm;
 
 
 	case OP(RDMA_WRITE_LAST):
 	case OP(RDMA_WRITE_LAST):
@@ -511,7 +508,7 @@ rdma_last:
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
 			goto drop;
 			goto drop;
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
 		qib_copy_sge(&qp->r_sge, data, tlen, 1);
-		qib_put_ss(&qp->r_sge);
+		rvt_put_ss(&qp->r_sge);
 		break;
 		break;
 
 
 	default:
 	default:
@@ -523,10 +520,10 @@ rdma_last:
 	return;
 	return;
 
 
 rewind:
 rewind:
-	set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+	set_bit(RVT_R_REWIND_SGE, &qp->r_aflags);
 	qp->r_sge.num_sge = 0;
 	qp->r_sge.num_sge = 0;
 drop:
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 	return;
 	return;
 
 
 op_err:
 op_err:

+ 74 - 68
drivers/infiniband/hw/qib/qib_ud.c

@@ -32,6 +32,7 @@
  */
  */
 
 
 #include <rdma/ib_smi.h>
 #include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
 
 
 #include "qib.h"
 #include "qib.h"
 #include "qib_mad.h"
 #include "qib_mad.h"
@@ -46,22 +47,26 @@
  * Note that the receive interrupt handler may be calling qib_ud_rcv()
  * Note that the receive interrupt handler may be calling qib_ud_rcv()
  * while this is being called.
  * while this is being called.
  */
  */
-static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
+static void qib_ud_loopback(struct rvt_qp *sqp, struct rvt_swqe *swqe)
 {
 {
 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
 	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
-	struct qib_pportdata *ppd;
-	struct qib_qp *qp;
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
+	struct rvt_qp *qp;
 	struct ib_ah_attr *ah_attr;
 	struct ib_ah_attr *ah_attr;
 	unsigned long flags;
 	unsigned long flags;
-	struct qib_sge_state ssge;
-	struct qib_sge *sge;
+	struct rvt_sge_state ssge;
+	struct rvt_sge *sge;
 	struct ib_wc wc;
 	struct ib_wc wc;
 	u32 length;
 	u32 length;
 	enum ib_qp_type sqptype, dqptype;
 	enum ib_qp_type sqptype, dqptype;
 
 
-	qp = qib_lookup_qpn(ibp, swqe->ud_wr.remote_qpn);
+	rcu_read_lock();
+	qp = rvt_lookup_qpn(rdi, &ibp->rvp, swqe->ud_wr.remote_qpn);
 	if (!qp) {
 	if (!qp) {
-		ibp->n_pkt_drops++;
+		ibp->rvp.n_pkt_drops++;
+		rcu_read_unlock();
 		return;
 		return;
 	}
 	}
 
 
@@ -71,12 +76,12 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 			IB_QPT_UD : qp->ibqp.qp_type;
 			IB_QPT_UD : qp->ibqp.qp_type;
 
 
 	if (dqptype != sqptype ||
 	if (dqptype != sqptype ||
-	    !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
-		ibp->n_pkt_drops++;
+	    !(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		ibp->rvp.n_pkt_drops++;
 		goto drop;
 		goto drop;
 	}
 	}
 
 
-	ah_attr = &to_iah(swqe->ud_wr.ah)->attr;
+	ah_attr = &ibah_to_rvtah(swqe->ud_wr.ah)->attr;
 	ppd = ppd_from_ibp(ibp);
 	ppd = ppd_from_ibp(ibp);
 
 
 	if (qp->ibqp.qp_num > 1) {
 	if (qp->ibqp.qp_num > 1) {
@@ -140,8 +145,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 	/*
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 * Get the next work request entry to find where to put the data.
 	 */
 	 */
-	if (qp->r_flags & QIB_R_REUSE_SGE)
-		qp->r_flags &= ~QIB_R_REUSE_SGE;
+	if (qp->r_flags & RVT_R_REUSE_SGE)
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
 	else {
 	else {
 		int ret;
 		int ret;
 
 
@@ -152,14 +157,14 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 		}
 		}
 		if (!ret) {
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			goto bail_unlock;
 			goto bail_unlock;
 		}
 		}
 	}
 	}
 	/* Silently drop packets which are too big. */
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= QIB_R_REUSE_SGE;
-		ibp->n_pkt_drops++;
+		qp->r_flags |= RVT_R_REUSE_SGE;
+		ibp->rvp.n_pkt_drops++;
 		goto bail_unlock;
 		goto bail_unlock;
 	}
 	}
 
 
@@ -189,7 +194,7 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 			if (--ssge.num_sge)
 			if (--ssge.num_sge)
 				*sge = *ssge.sg_list++;
 				*sge = *ssge.sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 					break;
 				sge->n = 0;
 				sge->n = 0;
@@ -201,8 +206,8 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 		}
 		}
 		length -= len;
 		length -= len;
 	}
 	}
-	qib_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		goto bail_unlock;
 		goto bail_unlock;
 	wc.wr_id = qp->r_wr_id;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.status = IB_WC_SUCCESS;
@@ -216,30 +221,31 @@ static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
 	wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
 	wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
 	wc.port_num = qp->port_num;
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
 	/* Signal completion event if the solicited bit is set. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 		     swqe->wr.send_flags & IB_SEND_SOLICITED);
 		     swqe->wr.send_flags & IB_SEND_SOLICITED);
-	ibp->n_loop_pkts++;
+	ibp->rvp.n_loop_pkts++;
 bail_unlock:
 bail_unlock:
 	spin_unlock_irqrestore(&qp->r_lock, flags);
 	spin_unlock_irqrestore(&qp->r_lock, flags);
 drop:
 drop:
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
+	rcu_read_unlock();
 }
 }
 
 
 /**
 /**
  * qib_make_ud_req - construct a UD request packet
  * qib_make_ud_req - construct a UD request packet
  * @qp: the QP
  * @qp: the QP
  *
  *
+ * Assumes the s_lock is held.
+ *
  * Return 1 if constructed; otherwise, return 0.
  * Return 1 if constructed; otherwise, return 0.
  */
  */
-int qib_make_ud_req(struct qib_qp *qp)
+int qib_make_ud_req(struct rvt_qp *qp)
 {
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_other_headers *ohdr;
 	struct qib_other_headers *ohdr;
 	struct ib_ah_attr *ah_attr;
 	struct ib_ah_attr *ah_attr;
 	struct qib_pportdata *ppd;
 	struct qib_pportdata *ppd;
 	struct qib_ibport *ibp;
 	struct qib_ibport *ibp;
-	struct qib_swqe *wqe;
-	unsigned long flags;
+	struct rvt_swqe *wqe;
 	u32 nwords;
 	u32 nwords;
 	u32 extra_bytes;
 	u32 extra_bytes;
 	u32 bth0;
 	u32 bth0;
@@ -248,28 +254,29 @@ int qib_make_ud_req(struct qib_qp *qp)
 	int ret = 0;
 	int ret = 0;
 	int next_cur;
 	int next_cur;
 
 
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
-		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_rvt_state_ops[qp->state] & RVT_FLUSH_SEND))
 			goto bail;
 			goto bail;
 		/* We are in the error state, flush the work request. */
 		/* We are in the error state, flush the work request. */
-		if (qp->s_last == qp->s_head)
+		smp_read_barrier_depends(); /* see post_one_send */
+		if (qp->s_last == ACCESS_ONCE(qp->s_head))
 			goto bail;
 			goto bail;
 		/* If DMAs are in progress, we can't flush immediately. */
 		/* If DMAs are in progress, we can't flush immediately. */
-		if (atomic_read(&qp->s_dma_busy)) {
-			qp->s_flags |= QIB_S_WAIT_DMA;
+		if (atomic_read(&priv->s_dma_busy)) {
+			qp->s_flags |= RVT_S_WAIT_DMA;
 			goto bail;
 			goto bail;
 		}
 		}
-		wqe = get_swqe_ptr(qp, qp->s_last);
+		wqe = rvt_get_swqe_ptr(qp, qp->s_last);
 		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
 		goto done;
 		goto done;
 	}
 	}
 
 
-	if (qp->s_cur == qp->s_head)
+	/* see post_one_send() */
+	smp_read_barrier_depends();
+	if (qp->s_cur == ACCESS_ONCE(qp->s_head))
 		goto bail;
 		goto bail;
 
 
-	wqe = get_swqe_ptr(qp, qp->s_cur);
+	wqe = rvt_get_swqe_ptr(qp, qp->s_cur);
 	next_cur = qp->s_cur + 1;
 	next_cur = qp->s_cur + 1;
 	if (next_cur >= qp->s_size)
 	if (next_cur >= qp->s_size)
 		next_cur = 0;
 		next_cur = 0;
@@ -277,9 +284,9 @@ int qib_make_ud_req(struct qib_qp *qp)
 	/* Construct the header. */
 	/* Construct the header. */
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ibp = to_iport(qp->ibqp.device, qp->port_num);
 	ppd = ppd_from_ibp(ibp);
 	ppd = ppd_from_ibp(ibp);
-	ah_attr = &to_iah(wqe->ud_wr.ah)->attr;
-	if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
-		if (ah_attr->dlid != QIB_PERMISSIVE_LID)
+	ah_attr = &ibah_to_rvtah(wqe->ud_wr.ah)->attr;
+	if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) {
+		if (ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE))
 			this_cpu_inc(ibp->pmastats->n_multicast_xmit);
 			this_cpu_inc(ibp->pmastats->n_multicast_xmit);
 		else
 		else
 			this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 			this_cpu_inc(ibp->pmastats->n_unicast_xmit);
@@ -287,6 +294,7 @@ int qib_make_ud_req(struct qib_qp *qp)
 		this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 		this_cpu_inc(ibp->pmastats->n_unicast_xmit);
 		lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
 		lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
 		if (unlikely(lid == ppd->lid)) {
 		if (unlikely(lid == ppd->lid)) {
+			unsigned long flags;
 			/*
 			/*
 			 * If DMAs are in progress, we can't generate
 			 * If DMAs are in progress, we can't generate
 			 * a completion for the loopback packet since
 			 * a completion for the loopback packet since
@@ -294,11 +302,12 @@ int qib_make_ud_req(struct qib_qp *qp)
 			 * XXX Instead of waiting, we could queue a
 			 * XXX Instead of waiting, we could queue a
 			 * zero length descriptor so we get a callback.
 			 * zero length descriptor so we get a callback.
 			 */
 			 */
-			if (atomic_read(&qp->s_dma_busy)) {
-				qp->s_flags |= QIB_S_WAIT_DMA;
+			if (atomic_read(&priv->s_dma_busy)) {
+				qp->s_flags |= RVT_S_WAIT_DMA;
 				goto bail;
 				goto bail;
 			}
 			}
 			qp->s_cur = next_cur;
 			qp->s_cur = next_cur;
+			local_irq_save(flags);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			spin_unlock_irqrestore(&qp->s_lock, flags);
 			qib_ud_loopback(qp, wqe);
 			qib_ud_loopback(qp, wqe);
 			spin_lock_irqsave(&qp->s_lock, flags);
 			spin_lock_irqsave(&qp->s_lock, flags);
@@ -324,11 +333,11 @@ int qib_make_ud_req(struct qib_qp *qp)
 
 
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 	if (ah_attr->ah_flags & IB_AH_GRH) {
 		/* Header size in 32-bit words. */
 		/* Header size in 32-bit words. */
-		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr->u.l.grh,
+		qp->s_hdrwords += qib_make_grh(ibp, &priv->s_hdr->u.l.grh,
 					       &ah_attr->grh,
 					       &ah_attr->grh,
 					       qp->s_hdrwords, nwords);
 					       qp->s_hdrwords, nwords);
 		lrh0 = QIB_LRH_GRH;
 		lrh0 = QIB_LRH_GRH;
-		ohdr = &qp->s_hdr->u.l.oth;
+		ohdr = &priv->s_hdr->u.l.oth;
 		/*
 		/*
 		 * Don't worry about sending to locally attached multicast
 		 * Don't worry about sending to locally attached multicast
 		 * QPs.  It is unspecified by the spec. what happens.
 		 * QPs.  It is unspecified by the spec. what happens.
@@ -336,7 +345,7 @@ int qib_make_ud_req(struct qib_qp *qp)
 	} else {
 	} else {
 		/* Header size in 32-bit words. */
 		/* Header size in 32-bit words. */
 		lrh0 = QIB_LRH_BTH;
 		lrh0 = QIB_LRH_BTH;
-		ohdr = &qp->s_hdr->u.oth;
+		ohdr = &priv->s_hdr->u.oth;
 	}
 	}
 	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
 		qp->s_hdrwords++;
 		qp->s_hdrwords++;
@@ -349,15 +358,16 @@ int qib_make_ud_req(struct qib_qp *qp)
 		lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
 		lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
 	else
 	else
 		lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
 		lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
-	qp->s_hdr->lrh[0] = cpu_to_be16(lrh0);
-	qp->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
-	qp->s_hdr->lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	priv->s_hdr->lrh[0] = cpu_to_be16(lrh0);
+	priv->s_hdr->lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
+	priv->s_hdr->lrh[2] =
+			cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
 	lid = ppd->lid;
 	lid = ppd->lid;
 	if (lid) {
 	if (lid) {
 		lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
 		lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
-		qp->s_hdr->lrh[3] = cpu_to_be16(lid);
+		priv->s_hdr->lrh[3] = cpu_to_be16(lid);
 	} else
 	} else
-		qp->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
+		priv->s_hdr->lrh[3] = IB_LID_PERMISSIVE;
 	if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 	if (wqe->wr.send_flags & IB_SEND_SOLICITED)
 		bth0 |= IB_BTH_SOLICITED;
 		bth0 |= IB_BTH_SOLICITED;
 	bth0 |= extra_bytes << 20;
 	bth0 |= extra_bytes << 20;
@@ -368,11 +378,11 @@ int qib_make_ud_req(struct qib_qp *qp)
 	/*
 	/*
 	 * Use the multicast QP if the destination LID is a multicast LID.
 	 * Use the multicast QP if the destination LID is a multicast LID.
 	 */
 	 */
-	ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
-		ah_attr->dlid != QIB_PERMISSIVE_LID ?
+	ohdr->bth[1] = ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+		ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) ?
 		cpu_to_be32(QIB_MULTICAST_QPN) :
 		cpu_to_be32(QIB_MULTICAST_QPN) :
 		cpu_to_be32(wqe->ud_wr.remote_qpn);
 		cpu_to_be32(wqe->ud_wr.remote_qpn);
-	ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
+	ohdr->bth[2] = cpu_to_be32(wqe->psn & QIB_PSN_MASK);
 	/*
 	/*
 	 * Qkeys with the high order bit set mean use the
 	 * Qkeys with the high order bit set mean use the
 	 * qkey from the QP context instead of the WR (see 10.2.5).
 	 * qkey from the QP context instead of the WR (see 10.2.5).
@@ -382,13 +392,9 @@ int qib_make_ud_req(struct qib_qp *qp)
 	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
 
 
 done:
 done:
-	ret = 1;
-	goto unlock;
-
+	return 1;
 bail:
 bail:
-	qp->s_flags &= ~QIB_S_BUSY;
-unlock:
-	spin_unlock_irqrestore(&qp->s_lock, flags);
+	qp->s_flags &= ~RVT_S_BUSY;
 	return ret;
 	return ret;
 }
 }
 
 
@@ -426,7 +432,7 @@ static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
  * Called at interrupt level.
  * Called at interrupt level.
  */
  */
 void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 {
 	struct qib_other_headers *ohdr;
 	struct qib_other_headers *ohdr;
 	int opcode;
 	int opcode;
@@ -446,7 +452,7 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 		hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
 		hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
 	}
 	}
 	qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
 	qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
-	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
+	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & RVT_QPN_MASK;
 
 
 	/*
 	/*
 	 * Get the number of bytes the message was padded by
 	 * Get the number of bytes the message was padded by
@@ -531,8 +537,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	/*
 	/*
 	 * Get the next work request entry to find where to put the data.
 	 * Get the next work request entry to find where to put the data.
 	 */
 	 */
-	if (qp->r_flags & QIB_R_REUSE_SGE)
-		qp->r_flags &= ~QIB_R_REUSE_SGE;
+	if (qp->r_flags & RVT_R_REUSE_SGE)
+		qp->r_flags &= ~RVT_R_REUSE_SGE;
 	else {
 	else {
 		int ret;
 		int ret;
 
 
@@ -543,13 +549,13 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 		}
 		}
 		if (!ret) {
 		if (!ret) {
 			if (qp->ibqp.qp_num == 0)
 			if (qp->ibqp.qp_num == 0)
-				ibp->n_vl15_dropped++;
+				ibp->rvp.n_vl15_dropped++;
 			return;
 			return;
 		}
 		}
 	}
 	}
 	/* Silently drop packets which are too big. */
 	/* Silently drop packets which are too big. */
 	if (unlikely(wc.byte_len > qp->r_len)) {
 	if (unlikely(wc.byte_len > qp->r_len)) {
-		qp->r_flags |= QIB_R_REUSE_SGE;
+		qp->r_flags |= RVT_R_REUSE_SGE;
 		goto drop;
 		goto drop;
 	}
 	}
 	if (has_grh) {
 	if (has_grh) {
@@ -559,8 +565,8 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	} else
 	} else
 		qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
 		qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
 	qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
 	qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
-	qib_put_ss(&qp->r_sge);
-	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+	rvt_put_ss(&qp->r_sge);
+	if (!test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags))
 		return;
 		return;
 	wc.wr_id = qp->r_wr_id;
 	wc.wr_id = qp->r_wr_id;
 	wc.status = IB_WC_SUCCESS;
 	wc.status = IB_WC_SUCCESS;
@@ -576,15 +582,15 @@ void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 	/*
 	/*
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
 	 * Save the LMC lower bits if the destination LID is a unicast LID.
 	 */
 	 */
-	wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 :
+	wc.dlid_path_bits = dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) ? 0 :
 		dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
 		dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
 	wc.port_num = qp->port_num;
 	wc.port_num = qp->port_num;
 	/* Signal completion event if the solicited bit is set. */
 	/* Signal completion event if the solicited bit is set. */
-	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+	rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc,
 		     (ohdr->bth[0] &
 		     (ohdr->bth[0] &
 			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
 	return;
 	return;
 
 
 drop:
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
 }

+ 327 - 896
drivers/infiniband/hw/qib/qib_verbs.c

@@ -41,6 +41,7 @@
 #include <linux/mm.h>
 #include <linux/mm.h>
 #include <linux/random.h>
 #include <linux/random.h>
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
+#include <rdma/rdma_vt.h>
 
 
 #include "qib.h"
 #include "qib.h"
 #include "qib_common.h"
 #include "qib_common.h"
@@ -49,8 +50,8 @@ static unsigned int ib_qib_qp_table_size = 256;
 module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
 module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
 MODULE_PARM_DESC(qp_table_size, "QP table size");
 MODULE_PARM_DESC(qp_table_size, "QP table size");
 
 
-unsigned int ib_qib_lkey_table_size = 16;
-module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint,
+static unsigned int qib_lkey_table_size = 16;
+module_param_named(lkey_table_size, qib_lkey_table_size, uint,
 		   S_IRUGO);
 		   S_IRUGO);
 MODULE_PARM_DESC(lkey_table_size,
 MODULE_PARM_DESC(lkey_table_size,
 		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
 		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
@@ -112,36 +113,6 @@ static unsigned int ib_qib_disable_sma;
 module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
 module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
 
 
-/*
- * Note that it is OK to post send work requests in the SQE and ERR
- * states; qib_do_send() will process them and generate error
- * completions as per IB 1.2 C10-96.
- */
-const int ib_qib_state_ops[IB_QPS_ERR + 1] = {
-	[IB_QPS_RESET] = 0,
-	[IB_QPS_INIT] = QIB_POST_RECV_OK,
-	[IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK,
-	[IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
-	    QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK |
-	    QIB_PROCESS_NEXT_SEND_OK,
-	[IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
-	    QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK,
-	[IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
-	    QIB_POST_SEND_OK | QIB_FLUSH_SEND,
-	[IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV |
-	    QIB_POST_SEND_OK | QIB_FLUSH_SEND,
-};
-
-struct qib_ucontext {
-	struct ib_ucontext ibucontext;
-};
-
-static inline struct qib_ucontext *to_iucontext(struct ib_ucontext
-						  *ibucontext)
-{
-	return container_of(ibucontext, struct qib_ucontext, ibucontext);
-}
-
 /*
 /*
  * Translate ib_wr_opcode into ib_wc_opcode.
  * Translate ib_wr_opcode into ib_wc_opcode.
  */
  */
@@ -166,9 +137,9 @@ __be64 ib_qib_sys_image_guid;
  * @data: the data to copy
  * @data: the data to copy
  * @length: the length of the data
  * @length: the length of the data
  */
  */
-void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
+void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length, int release)
 {
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 
 	while (length) {
 	while (length) {
 		u32 len = sge->length;
 		u32 len = sge->length;
@@ -184,11 +155,11 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
 		sge->sge_length -= len;
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 		if (sge->sge_length == 0) {
 			if (release)
 			if (release)
-				qib_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 					break;
 				sge->n = 0;
 				sge->n = 0;
@@ -208,9 +179,9 @@ void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
  * @ss: the SGE state
  * @ss: the SGE state
  * @length: the number of bytes to skip
  * @length: the number of bytes to skip
  */
  */
-void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
+void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release)
 {
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 
 	while (length) {
 	while (length) {
 		u32 len = sge->length;
 		u32 len = sge->length;
@@ -225,11 +196,11 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
 		sge->sge_length -= len;
 		sge->sge_length -= len;
 		if (sge->sge_length == 0) {
 		if (sge->sge_length == 0) {
 			if (release)
 			if (release)
-				qib_put_mr(sge->mr);
+				rvt_put_mr(sge->mr);
 			if (--ss->num_sge)
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 					break;
 				sge->n = 0;
 				sge->n = 0;
@@ -248,10 +219,10 @@ void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
  * Don't modify the qib_sge_state to get the count.
  * Don't modify the qib_sge_state to get the count.
  * Return zero if any of the segments is not aligned.
  * Return zero if any of the segments is not aligned.
  */
  */
-static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
+static u32 qib_count_sge(struct rvt_sge_state *ss, u32 length)
 {
 {
-	struct qib_sge *sg_list = ss->sg_list;
-	struct qib_sge sge = ss->sge;
+	struct rvt_sge *sg_list = ss->sg_list;
+	struct rvt_sge sge = ss->sge;
 	u8 num_sge = ss->num_sge;
 	u8 num_sge = ss->num_sge;
 	u32 ndesc = 1;  /* count the header */
 	u32 ndesc = 1;  /* count the header */
 
 
@@ -276,7 +247,7 @@ static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
 			if (--num_sge)
 			if (--num_sge)
 				sge = *sg_list++;
 				sge = *sg_list++;
 		} else if (sge.length == 0 && sge.mr->lkey) {
 		} else if (sge.length == 0 && sge.mr->lkey) {
-			if (++sge.n >= QIB_SEGSZ) {
+			if (++sge.n >= RVT_SEGSZ) {
 				if (++sge.m >= sge.mr->mapsz)
 				if (++sge.m >= sge.mr->mapsz)
 					break;
 					break;
 				sge.n = 0;
 				sge.n = 0;
@@ -294,9 +265,9 @@ static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
 /*
 /*
  * Copy from the SGEs to the data buffer.
  * Copy from the SGEs to the data buffer.
  */
  */
-static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
+static void qib_copy_from_sge(void *data, struct rvt_sge_state *ss, u32 length)
 {
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 
 	while (length) {
 	while (length) {
 		u32 len = sge->length;
 		u32 len = sge->length;
@@ -314,7 +285,7 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
 			if (--ss->num_sge)
 			if (--ss->num_sge)
 				*sge = *ss->sg_list++;
 				*sge = *ss->sg_list++;
 		} else if (sge->length == 0 && sge->mr->lkey) {
 		} else if (sge->length == 0 && sge->mr->lkey) {
-			if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->n >= RVT_SEGSZ) {
 				if (++sge->m >= sge->mr->mapsz)
 				if (++sge->m >= sge->mr->mapsz)
 					break;
 					break;
 				sge->n = 0;
 				sge->n = 0;
@@ -329,242 +300,6 @@ static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
 	}
 	}
 }
 }
 
 
-/**
- * qib_post_one_send - post one RC, UC, or UD send work request
- * @qp: the QP to post on
- * @wr: the work request to send
- */
-static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr,
-	int *scheduled)
-{
-	struct qib_swqe *wqe;
-	u32 next;
-	int i;
-	int j;
-	int acc;
-	int ret;
-	unsigned long flags;
-	struct qib_lkey_table *rkt;
-	struct qib_pd *pd;
-	int avoid_schedule = 0;
-
-	spin_lock_irqsave(&qp->s_lock, flags);
-
-	/* Check that state is OK to post send. */
-	if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK)))
-		goto bail_inval;
-
-	/* IB spec says that num_sge == 0 is OK. */
-	if (wr->num_sge > qp->s_max_sge)
-		goto bail_inval;
-
-	/*
-	 * Don't allow RDMA reads or atomic operations on UC or
-	 * undefined operations.
-	 * Make sure buffer is large enough to hold the result for atomics.
-	 */
-	if (wr->opcode == IB_WR_REG_MR) {
-		if (qib_reg_mr(qp, reg_wr(wr)))
-			goto bail_inval;
-	} else if (qp->ibqp.qp_type == IB_QPT_UC) {
-		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
-			goto bail_inval;
-	} else if (qp->ibqp.qp_type != IB_QPT_RC) {
-		/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
-		if (wr->opcode != IB_WR_SEND &&
-		    wr->opcode != IB_WR_SEND_WITH_IMM)
-			goto bail_inval;
-		/* Check UD destination address PD */
-		if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
-			goto bail_inval;
-	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
-		   (wr->num_sge == 0 ||
-		    wr->sg_list[0].length < sizeof(u64) ||
-		    wr->sg_list[0].addr & (sizeof(u64) - 1)))
-		goto bail_inval;
-	else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
-		goto bail_inval;
-
-	next = qp->s_head + 1;
-	if (next >= qp->s_size)
-		next = 0;
-	if (next == qp->s_last) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-
-	rkt = &to_idev(qp->ibqp.device)->lk_table;
-	pd = to_ipd(qp->ibqp.pd);
-	wqe = get_swqe_ptr(qp, qp->s_head);
-
-	if (qp->ibqp.qp_type != IB_QPT_UC &&
-	    qp->ibqp.qp_type != IB_QPT_RC)
-		memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
-	else if (wr->opcode == IB_WR_REG_MR)
-		memcpy(&wqe->reg_wr, reg_wr(wr),
-			sizeof(wqe->reg_wr));
-	else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
-		 wr->opcode == IB_WR_RDMA_WRITE ||
-		 wr->opcode == IB_WR_RDMA_READ)
-		memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
-	else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
-		 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
-		memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
-	else
-		memcpy(&wqe->wr, wr, sizeof(wqe->wr));
-
-	wqe->length = 0;
-	j = 0;
-	if (wr->num_sge) {
-		acc = wr->opcode >= IB_WR_RDMA_READ ?
-			IB_ACCESS_LOCAL_WRITE : 0;
-		for (i = 0; i < wr->num_sge; i++) {
-			u32 length = wr->sg_list[i].length;
-			int ok;
-
-			if (length == 0)
-				continue;
-			ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j],
-					 &wr->sg_list[i], acc);
-			if (!ok)
-				goto bail_inval_free;
-			wqe->length += length;
-			j++;
-		}
-		wqe->wr.num_sge = j;
-	}
-	if (qp->ibqp.qp_type == IB_QPT_UC ||
-	    qp->ibqp.qp_type == IB_QPT_RC) {
-		if (wqe->length > 0x80000000U)
-			goto bail_inval_free;
-		if (wqe->length <= qp->pmtu)
-			avoid_schedule = 1;
-	} else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
-				  qp->port_num - 1)->ibmtu) {
-		goto bail_inval_free;
-	} else {
-		atomic_inc(&to_iah(ud_wr(wr)->ah)->refcount);
-		avoid_schedule = 1;
-	}
-	wqe->ssn = qp->s_ssn++;
-	qp->s_head = next;
-
-	ret = 0;
-	goto bail;
-
-bail_inval_free:
-	while (j) {
-		struct qib_sge *sge = &wqe->sg_list[--j];
-
-		qib_put_mr(sge->mr);
-	}
-bail_inval:
-	ret = -EINVAL;
-bail:
-	if (!ret && !wr->next && !avoid_schedule &&
-	 !qib_sdma_empty(
-	   dd_from_ibdev(qp->ibqp.device)->pport + qp->port_num - 1)) {
-		qib_schedule_send(qp);
-		*scheduled = 1;
-	}
-	spin_unlock_irqrestore(&qp->s_lock, flags);
-	return ret;
-}
-
-/**
- * qib_post_send - post a send on a QP
- * @ibqp: the QP to post the send on
- * @wr: the list of work requests to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
-			 struct ib_send_wr **bad_wr)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	int err = 0;
-	int scheduled = 0;
-
-	for (; wr; wr = wr->next) {
-		err = qib_post_one_send(qp, wr, &scheduled);
-		if (err) {
-			*bad_wr = wr;
-			goto bail;
-		}
-	}
-
-	/* Try to do the send work in the caller's context. */
-	if (!scheduled)
-		qib_do_send(&qp->s_work);
-
-bail:
-	return err;
-}
-
-/**
- * qib_post_receive - post a receive on a QP
- * @ibqp: the QP to post the receive on
- * @wr: the WR to post
- * @bad_wr: the first bad WR is put here
- *
- * This may be called from interrupt context.
- */
-static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
-			    struct ib_recv_wr **bad_wr)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_rwq *wq = qp->r_rq.wq;
-	unsigned long flags;
-	int ret;
-
-	/* Check that state is OK to post receive. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) {
-		*bad_wr = wr;
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	for (; wr; wr = wr->next) {
-		struct qib_rwqe *wqe;
-		u32 next;
-		int i;
-
-		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
-			*bad_wr = wr;
-			ret = -EINVAL;
-			goto bail;
-		}
-
-		spin_lock_irqsave(&qp->r_rq.lock, flags);
-		next = wq->head + 1;
-		if (next >= qp->r_rq.size)
-			next = 0;
-		if (next == wq->tail) {
-			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-			*bad_wr = wr;
-			ret = -ENOMEM;
-			goto bail;
-		}
-
-		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
-		wqe->wr_id = wr->wr_id;
-		wqe->num_sge = wr->num_sge;
-		for (i = 0; i < wr->num_sge; i++)
-			wqe->sg_list[i] = wr->sg_list[i];
-		/* Make sure queue entry is written before the head index. */
-		smp_wmb();
-		wq->head = next;
-		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
-	}
-	ret = 0;
-
-bail:
-	return ret;
-}
-
 /**
 /**
  * qib_qp_rcv - processing an incoming packet on a QP
  * qib_qp_rcv - processing an incoming packet on a QP
  * @rcd: the context pointer
  * @rcd: the context pointer
@@ -579,15 +314,15 @@ bail:
  * Called at interrupt level.
  * Called at interrupt level.
  */
  */
 static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
-		       int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+		       int has_grh, void *data, u32 tlen, struct rvt_qp *qp)
 {
 {
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
 
 
 	spin_lock(&qp->r_lock);
 	spin_lock(&qp->r_lock);
 
 
 	/* Check for valid receive state. */
 	/* Check for valid receive state. */
-	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
-		ibp->n_pkt_drops++;
+	if (!(ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK)) {
+		ibp->rvp.n_pkt_drops++;
 		goto unlock;
 		goto unlock;
 	}
 	}
 
 
@@ -632,8 +367,10 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 	struct qib_pportdata *ppd = rcd->ppd;
 	struct qib_pportdata *ppd = rcd->ppd;
 	struct qib_ibport *ibp = &ppd->ibport_data;
 	struct qib_ibport *ibp = &ppd->ibport_data;
 	struct qib_ib_header *hdr = rhdr;
 	struct qib_ib_header *hdr = rhdr;
+	struct qib_devdata *dd = ppd->dd;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 	struct qib_other_headers *ohdr;
 	struct qib_other_headers *ohdr;
-	struct qib_qp *qp;
+	struct rvt_qp *qp;
 	u32 qp_num;
 	u32 qp_num;
 	int lnh;
 	int lnh;
 	u8 opcode;
 	u8 opcode;
@@ -645,7 +382,7 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 
 
 	/* Check for a valid destination LID (see ch. 7.11.1). */
 	/* Check for a valid destination LID (see ch. 7.11.1). */
 	lid = be16_to_cpu(hdr->lrh[1]);
 	lid = be16_to_cpu(hdr->lrh[1]);
-	if (lid < QIB_MULTICAST_LID_BASE) {
+	if (lid < be16_to_cpu(IB_MULTICAST_LID_BASE)) {
 		lid &= ~((1 << ppd->lmc) - 1);
 		lid &= ~((1 << ppd->lmc) - 1);
 		if (unlikely(lid != ppd->lid))
 		if (unlikely(lid != ppd->lid))
 			goto drop;
 			goto drop;
@@ -674,50 +411,40 @@ void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
 #endif
 #endif
 
 
 	/* Get the destination QP number. */
 	/* Get the destination QP number. */
-	qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+	qp_num = be32_to_cpu(ohdr->bth[1]) & RVT_QPN_MASK;
 	if (qp_num == QIB_MULTICAST_QPN) {
 	if (qp_num == QIB_MULTICAST_QPN) {
-		struct qib_mcast *mcast;
-		struct qib_mcast_qp *p;
+		struct rvt_mcast *mcast;
+		struct rvt_mcast_qp *p;
 
 
 		if (lnh != QIB_LRH_GRH)
 		if (lnh != QIB_LRH_GRH)
 			goto drop;
 			goto drop;
-		mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
+		mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid);
 		if (mcast == NULL)
 		if (mcast == NULL)
 			goto drop;
 			goto drop;
 		this_cpu_inc(ibp->pmastats->n_multicast_rcv);
 		this_cpu_inc(ibp->pmastats->n_multicast_rcv);
 		list_for_each_entry_rcu(p, &mcast->qp_list, list)
 		list_for_each_entry_rcu(p, &mcast->qp_list, list)
 			qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
 			qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
 		/*
 		/*
-		 * Notify qib_multicast_detach() if it is waiting for us
+		 * Notify rvt_multicast_detach() if it is waiting for us
 		 * to finish.
 		 * to finish.
 		 */
 		 */
 		if (atomic_dec_return(&mcast->refcount) <= 1)
 		if (atomic_dec_return(&mcast->refcount) <= 1)
 			wake_up(&mcast->wait);
 			wake_up(&mcast->wait);
 	} else {
 	} else {
-		if (rcd->lookaside_qp) {
-			if (rcd->lookaside_qpn != qp_num) {
-				if (atomic_dec_and_test(
-					&rcd->lookaside_qp->refcount))
-					wake_up(
-					 &rcd->lookaside_qp->wait);
-				rcd->lookaside_qp = NULL;
-			}
+		rcu_read_lock();
+		qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num);
+		if (!qp) {
+			rcu_read_unlock();
+			goto drop;
 		}
 		}
-		if (!rcd->lookaside_qp) {
-			qp = qib_lookup_qpn(ibp, qp_num);
-			if (!qp)
-				goto drop;
-			rcd->lookaside_qp = qp;
-			rcd->lookaside_qpn = qp_num;
-		} else
-			qp = rcd->lookaside_qp;
 		this_cpu_inc(ibp->pmastats->n_unicast_rcv);
 		this_cpu_inc(ibp->pmastats->n_unicast_rcv);
 		qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
 		qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
+		rcu_read_unlock();
 	}
 	}
 	return;
 	return;
 
 
 drop:
 drop:
-	ibp->n_pkt_drops++;
+	ibp->rvp.n_pkt_drops++;
 }
 }
 
 
 /*
 /*
@@ -728,23 +455,25 @@ static void mem_timer(unsigned long data)
 {
 {
 	struct qib_ibdev *dev = (struct qib_ibdev *) data;
 	struct qib_ibdev *dev = (struct qib_ibdev *) data;
 	struct list_head *list = &dev->memwait;
 	struct list_head *list = &dev->memwait;
-	struct qib_qp *qp = NULL;
+	struct rvt_qp *qp = NULL;
+	struct qib_qp_priv *priv = NULL;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 	if (!list_empty(list)) {
 	if (!list_empty(list)) {
-		qp = list_entry(list->next, struct qib_qp, iowait);
-		list_del_init(&qp->iowait);
+		priv = list_entry(list->next, struct qib_qp_priv, iowait);
+		qp = priv->owner;
+		list_del_init(&priv->iowait);
 		atomic_inc(&qp->refcount);
 		atomic_inc(&qp->refcount);
 		if (!list_empty(list))
 		if (!list_empty(list))
 			mod_timer(&dev->mem_timer, jiffies + 1);
 			mod_timer(&dev->mem_timer, jiffies + 1);
 	}
 	}
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 
 	if (qp) {
 	if (qp) {
 		spin_lock_irqsave(&qp->s_lock, flags);
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_flags & QIB_S_WAIT_KMEM) {
-			qp->s_flags &= ~QIB_S_WAIT_KMEM;
+		if (qp->s_flags & RVT_S_WAIT_KMEM) {
+			qp->s_flags &= ~RVT_S_WAIT_KMEM;
 			qib_schedule_send(qp);
 			qib_schedule_send(qp);
 		}
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -753,9 +482,9 @@ static void mem_timer(unsigned long data)
 	}
 	}
 }
 }
 
 
-static void update_sge(struct qib_sge_state *ss, u32 length)
+static void update_sge(struct rvt_sge_state *ss, u32 length)
 {
 {
-	struct qib_sge *sge = &ss->sge;
+	struct rvt_sge *sge = &ss->sge;
 
 
 	sge->vaddr += length;
 	sge->vaddr += length;
 	sge->length -= length;
 	sge->length -= length;
@@ -764,7 +493,7 @@ static void update_sge(struct qib_sge_state *ss, u32 length)
 		if (--ss->num_sge)
 		if (--ss->num_sge)
 			*sge = *ss->sg_list++;
 			*sge = *ss->sg_list++;
 	} else if (sge->length == 0 && sge->mr->lkey) {
 	} else if (sge->length == 0 && sge->mr->lkey) {
-		if (++sge->n >= QIB_SEGSZ) {
+		if (++sge->n >= RVT_SEGSZ) {
 			if (++sge->m >= sge->mr->mapsz)
 			if (++sge->m >= sge->mr->mapsz)
 				return;
 				return;
 			sge->n = 0;
 			sge->n = 0;
@@ -810,7 +539,7 @@ static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
 }
 }
 #endif
 #endif
 
 
-static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
+static void copy_io(u32 __iomem *piobuf, struct rvt_sge_state *ss,
 		    u32 length, unsigned flush_wc)
 		    u32 length, unsigned flush_wc)
 {
 {
 	u32 extra = 0;
 	u32 extra = 0;
@@ -947,30 +676,31 @@ static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
 }
 }
 
 
 static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
 static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
-					   struct qib_qp *qp)
+					   struct rvt_qp *qp)
 {
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_verbs_txreq *tx;
 	struct qib_verbs_txreq *tx;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 	spin_lock_irqsave(&qp->s_lock, flags);
-	spin_lock(&dev->pending_lock);
+	spin_lock(&dev->rdi.pending_lock);
 
 
 	if (!list_empty(&dev->txreq_free)) {
 	if (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
 		struct list_head *l = dev->txreq_free.next;
 
 
 		list_del(l);
 		list_del(l);
-		spin_unlock(&dev->pending_lock);
+		spin_unlock(&dev->rdi.pending_lock);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 	} else {
 	} else {
-		if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
-		    list_empty(&qp->iowait)) {
+		if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK &&
+		    list_empty(&priv->iowait)) {
 			dev->n_txwait++;
 			dev->n_txwait++;
-			qp->s_flags |= QIB_S_WAIT_TX;
-			list_add_tail(&qp->iowait, &dev->txwait);
+			qp->s_flags |= RVT_S_WAIT_TX;
+			list_add_tail(&priv->iowait, &dev->txwait);
 		}
 		}
-		qp->s_flags &= ~QIB_S_BUSY;
-		spin_unlock(&dev->pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		tx = ERR_PTR(-EBUSY);
 		tx = ERR_PTR(-EBUSY);
 	}
 	}
@@ -978,22 +708,22 @@ static noinline struct qib_verbs_txreq *__get_txreq(struct qib_ibdev *dev,
 }
 }
 
 
 static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
 static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
-					 struct qib_qp *qp)
+					 struct rvt_qp *qp)
 {
 {
 	struct qib_verbs_txreq *tx;
 	struct qib_verbs_txreq *tx;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 	/* assume the list non empty */
 	/* assume the list non empty */
 	if (likely(!list_empty(&dev->txreq_free))) {
 	if (likely(!list_empty(&dev->txreq_free))) {
 		struct list_head *l = dev->txreq_free.next;
 		struct list_head *l = dev->txreq_free.next;
 
 
 		list_del(l);
 		list_del(l);
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
 	} else {
 	} else {
 		/* call slow path to get the extra lock */
 		/* call slow path to get the extra lock */
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 		tx =  __get_txreq(dev, qp);
 		tx =  __get_txreq(dev, qp);
 	}
 	}
 	return tx;
 	return tx;
@@ -1002,16 +732,15 @@ static inline struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
 void qib_put_txreq(struct qib_verbs_txreq *tx)
 void qib_put_txreq(struct qib_verbs_txreq *tx)
 {
 {
 	struct qib_ibdev *dev;
 	struct qib_ibdev *dev;
-	struct qib_qp *qp;
+	struct rvt_qp *qp;
+	struct qib_qp_priv *priv;
 	unsigned long flags;
 	unsigned long flags;
 
 
 	qp = tx->qp;
 	qp = tx->qp;
 	dev = to_idev(qp->ibqp.device);
 	dev = to_idev(qp->ibqp.device);
 
 
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
 	if (tx->mr) {
 	if (tx->mr) {
-		qib_put_mr(tx->mr);
+		rvt_put_mr(tx->mr);
 		tx->mr = NULL;
 		tx->mr = NULL;
 	}
 	}
 	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
 	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
@@ -1022,21 +751,23 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
 		kfree(tx->align_buf);
 		kfree(tx->align_buf);
 	}
 	}
 
 
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 
 
 	/* Put struct back on free list */
 	/* Put struct back on free list */
 	list_add(&tx->txreq.list, &dev->txreq_free);
 	list_add(&tx->txreq.list, &dev->txreq_free);
 
 
 	if (!list_empty(&dev->txwait)) {
 	if (!list_empty(&dev->txwait)) {
 		/* Wake up first QP wanting a free struct */
 		/* Wake up first QP wanting a free struct */
-		qp = list_entry(dev->txwait.next, struct qib_qp, iowait);
-		list_del_init(&qp->iowait);
+		priv = list_entry(dev->txwait.next, struct qib_qp_priv,
+				  iowait);
+		qp = priv->owner;
+		list_del_init(&priv->iowait);
 		atomic_inc(&qp->refcount);
 		atomic_inc(&qp->refcount);
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 
 		spin_lock_irqsave(&qp->s_lock, flags);
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_flags & QIB_S_WAIT_TX) {
-			qp->s_flags &= ~QIB_S_WAIT_TX;
+		if (qp->s_flags & RVT_S_WAIT_TX) {
+			qp->s_flags &= ~RVT_S_WAIT_TX;
 			qib_schedule_send(qp);
 			qib_schedule_send(qp);
 		}
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1044,7 +775,7 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
 		if (atomic_dec_and_test(&qp->refcount))
 		if (atomic_dec_and_test(&qp->refcount))
 			wake_up(&qp->wait);
 			wake_up(&qp->wait);
 	} else
 	} else
-		spin_unlock_irqrestore(&dev->pending_lock, flags);
+		spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 }
 }
 
 
 /*
 /*
@@ -1055,36 +786,39 @@ void qib_put_txreq(struct qib_verbs_txreq *tx)
  */
  */
 void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
 void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
 {
 {
-	struct qib_qp *qp, *nqp;
-	struct qib_qp *qps[20];
+	struct rvt_qp *qp, *nqp;
+	struct qib_qp_priv *qpp, *nqpp;
+	struct rvt_qp *qps[20];
 	struct qib_ibdev *dev;
 	struct qib_ibdev *dev;
 	unsigned i, n;
 	unsigned i, n;
 
 
 	n = 0;
 	n = 0;
 	dev = &ppd->dd->verbs_dev;
 	dev = &ppd->dd->verbs_dev;
-	spin_lock(&dev->pending_lock);
+	spin_lock(&dev->rdi.pending_lock);
 
 
 	/* Search wait list for first QP wanting DMA descriptors. */
 	/* Search wait list for first QP wanting DMA descriptors. */
-	list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) {
+	list_for_each_entry_safe(qpp, nqpp, &dev->dmawait, iowait) {
+		qp = qpp->owner;
+		nqp = nqpp->owner;
 		if (qp->port_num != ppd->port)
 		if (qp->port_num != ppd->port)
 			continue;
 			continue;
 		if (n == ARRAY_SIZE(qps))
 		if (n == ARRAY_SIZE(qps))
 			break;
 			break;
-		if (qp->s_tx->txreq.sg_count > avail)
+		if (qpp->s_tx->txreq.sg_count > avail)
 			break;
 			break;
-		avail -= qp->s_tx->txreq.sg_count;
-		list_del_init(&qp->iowait);
+		avail -= qpp->s_tx->txreq.sg_count;
+		list_del_init(&qpp->iowait);
 		atomic_inc(&qp->refcount);
 		atomic_inc(&qp->refcount);
 		qps[n++] = qp;
 		qps[n++] = qp;
 	}
 	}
 
 
-	spin_unlock(&dev->pending_lock);
+	spin_unlock(&dev->rdi.pending_lock);
 
 
 	for (i = 0; i < n; i++) {
 	for (i = 0; i < n; i++) {
 		qp = qps[i];
 		qp = qps[i];
 		spin_lock(&qp->s_lock);
 		spin_lock(&qp->s_lock);
-		if (qp->s_flags & QIB_S_WAIT_DMA_DESC) {
-			qp->s_flags &= ~QIB_S_WAIT_DMA_DESC;
+		if (qp->s_flags & RVT_S_WAIT_DMA_DESC) {
+			qp->s_flags &= ~RVT_S_WAIT_DMA_DESC;
 			qib_schedule_send(qp);
 			qib_schedule_send(qp);
 		}
 		}
 		spin_unlock(&qp->s_lock);
 		spin_unlock(&qp->s_lock);
@@ -1100,7 +834,8 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 {
 {
 	struct qib_verbs_txreq *tx =
 	struct qib_verbs_txreq *tx =
 		container_of(cookie, struct qib_verbs_txreq, txreq);
 		container_of(cookie, struct qib_verbs_txreq, txreq);
-	struct qib_qp *qp = tx->qp;
+	struct rvt_qp *qp = tx->qp;
+	struct qib_qp_priv *priv = qp->priv;
 
 
 	spin_lock(&qp->s_lock);
 	spin_lock(&qp->s_lock);
 	if (tx->wqe)
 	if (tx->wqe)
@@ -1117,11 +852,11 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 		}
 		}
 		qib_rc_send_complete(qp, hdr);
 		qib_rc_send_complete(qp, hdr);
 	}
 	}
-	if (atomic_dec_and_test(&qp->s_dma_busy)) {
+	if (atomic_dec_and_test(&priv->s_dma_busy)) {
 		if (qp->state == IB_QPS_RESET)
 		if (qp->state == IB_QPS_RESET)
-			wake_up(&qp->wait_dma);
-		else if (qp->s_flags & QIB_S_WAIT_DMA) {
-			qp->s_flags &= ~QIB_S_WAIT_DMA;
+			wake_up(&priv->wait_dma);
+		else if (qp->s_flags & RVT_S_WAIT_DMA) {
+			qp->s_flags &= ~RVT_S_WAIT_DMA;
 			qib_schedule_send(qp);
 			qib_schedule_send(qp);
 		}
 		}
 	}
 	}
@@ -1130,22 +865,23 @@ static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
 	qib_put_txreq(tx);
 	qib_put_txreq(tx);
 }
 }
 
 
-static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
+static int wait_kmem(struct qib_ibdev *dev, struct rvt_qp *qp)
 {
 {
+	struct qib_qp_priv *priv = qp->priv;
 	unsigned long flags;
 	unsigned long flags;
 	int ret = 0;
 	int ret = 0;
 
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->iowait)) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		spin_lock(&dev->rdi.pending_lock);
+		if (list_empty(&priv->iowait)) {
 			if (list_empty(&dev->memwait))
 			if (list_empty(&dev->memwait))
 				mod_timer(&dev->mem_timer, jiffies + 1);
 				mod_timer(&dev->mem_timer, jiffies + 1);
-			qp->s_flags |= QIB_S_WAIT_KMEM;
-			list_add_tail(&qp->iowait, &dev->memwait);
+			qp->s_flags |= RVT_S_WAIT_KMEM;
+			list_add_tail(&priv->iowait, &dev->memwait);
 		}
 		}
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 		ret = -EBUSY;
 	}
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1153,10 +889,11 @@ static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
 	return ret;
 	return ret;
 }
 }
 
 
-static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
-			      u32 hdrwords, struct qib_sge_state *ss, u32 len,
+static int qib_verbs_send_dma(struct rvt_qp *qp, struct qib_ib_header *hdr,
+			      u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 			      u32 plen, u32 dwords)
 			      u32 plen, u32 dwords)
 {
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_devdata *dd = dd_from_dev(dev);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
 	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
@@ -1167,9 +904,9 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
 	u32 ndesc;
 	u32 ndesc;
 	int ret;
 	int ret;
 
 
-	tx = qp->s_tx;
+	tx = priv->s_tx;
 	if (tx) {
 	if (tx) {
-		qp->s_tx = NULL;
+		priv->s_tx = NULL;
 		/* resend previously constructed packet */
 		/* resend previously constructed packet */
 		ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
 		ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
 		goto bail;
 		goto bail;
@@ -1182,7 +919,6 @@ static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
 	control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 	control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
 				       be16_to_cpu(hdr->lrh[0]) >> 12);
 				       be16_to_cpu(hdr->lrh[0]) >> 12);
 	tx->qp = qp;
 	tx->qp = qp;
-	atomic_inc(&qp->refcount);
 	tx->wqe = qp->s_wqe;
 	tx->wqe = qp->s_wqe;
 	tx->mr = qp->s_rdma_mr;
 	tx->mr = qp->s_rdma_mr;
 	if (qp->s_rdma_mr)
 	if (qp->s_rdma_mr)
@@ -1245,7 +981,7 @@ err_tx:
 	qib_put_txreq(tx);
 	qib_put_txreq(tx);
 	ret = wait_kmem(dev, qp);
 	ret = wait_kmem(dev, qp);
 unaligned:
 unaligned:
-	ibp->n_unaligned++;
+	ibp->rvp.n_unaligned++;
 bail:
 bail:
 	return ret;
 	return ret;
 bail_tx:
 bail_tx:
@@ -1257,8 +993,9 @@ bail_tx:
  * If we are now in the error state, return zero to flush the
  * If we are now in the error state, return zero to flush the
  * send work request.
  * send work request.
  */
  */
-static int no_bufs_available(struct qib_qp *qp)
+static int no_bufs_available(struct rvt_qp *qp)
 {
 {
+	struct qib_qp_priv *priv = qp->priv;
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
 	struct qib_devdata *dd;
 	struct qib_devdata *dd;
 	unsigned long flags;
 	unsigned long flags;
@@ -1271,25 +1008,25 @@ static int no_bufs_available(struct qib_qp *qp)
 	 * enabling the PIO avail interrupt.
 	 * enabling the PIO avail interrupt.
 	 */
 	 */
 	spin_lock_irqsave(&qp->s_lock, flags);
 	spin_lock_irqsave(&qp->s_lock, flags);
-	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
-		spin_lock(&dev->pending_lock);
-		if (list_empty(&qp->iowait)) {
+	if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) {
+		spin_lock(&dev->rdi.pending_lock);
+		if (list_empty(&priv->iowait)) {
 			dev->n_piowait++;
 			dev->n_piowait++;
-			qp->s_flags |= QIB_S_WAIT_PIO;
-			list_add_tail(&qp->iowait, &dev->piowait);
+			qp->s_flags |= RVT_S_WAIT_PIO;
+			list_add_tail(&priv->iowait, &dev->piowait);
 			dd = dd_from_dev(dev);
 			dd = dd_from_dev(dev);
 			dd->f_wantpiobuf_intr(dd, 1);
 			dd->f_wantpiobuf_intr(dd, 1);
 		}
 		}
-		spin_unlock(&dev->pending_lock);
-		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&dev->rdi.pending_lock);
+		qp->s_flags &= ~RVT_S_BUSY;
 		ret = -EBUSY;
 		ret = -EBUSY;
 	}
 	}
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	return ret;
 	return ret;
 }
 }
 
 
-static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
-			      u32 hdrwords, struct qib_sge_state *ss, u32 len,
+static int qib_verbs_send_pio(struct rvt_qp *qp, struct qib_ib_header *ibhdr,
+			      u32 hdrwords, struct rvt_sge_state *ss, u32 len,
 			      u32 plen, u32 dwords)
 			      u32 plen, u32 dwords)
 {
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
@@ -1370,7 +1107,7 @@ done:
 	}
 	}
 	qib_sendbuf_done(dd, pbufn);
 	qib_sendbuf_done(dd, pbufn);
 	if (qp->s_rdma_mr) {
 	if (qp->s_rdma_mr) {
-		qib_put_mr(qp->s_rdma_mr);
+		rvt_put_mr(qp->s_rdma_mr);
 		qp->s_rdma_mr = NULL;
 		qp->s_rdma_mr = NULL;
 	}
 	}
 	if (qp->s_wqe) {
 	if (qp->s_wqe) {
@@ -1394,10 +1131,10 @@ done:
  * @len: the length of the packet in bytes
  * @len: the length of the packet in bytes
  *
  *
  * Return zero if packet is sent or queued OK.
  * Return zero if packet is sent or queued OK.
- * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise.
+ * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise.
  */
  */
-int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
-		   u32 hdrwords, struct qib_sge_state *ss, u32 len)
+int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+		   u32 hdrwords, struct rvt_sge_state *ss, u32 len)
 {
 {
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
 	u32 plen;
 	u32 plen;
@@ -1529,10 +1266,11 @@ void qib_ib_piobufavail(struct qib_devdata *dd)
 {
 {
 	struct qib_ibdev *dev = &dd->verbs_dev;
 	struct qib_ibdev *dev = &dd->verbs_dev;
 	struct list_head *list;
 	struct list_head *list;
-	struct qib_qp *qps[5];
-	struct qib_qp *qp;
+	struct rvt_qp *qps[5];
+	struct rvt_qp *qp;
 	unsigned long flags;
 	unsigned long flags;
 	unsigned i, n;
 	unsigned i, n;
+	struct qib_qp_priv *priv;
 
 
 	list = &dev->piowait;
 	list = &dev->piowait;
 	n = 0;
 	n = 0;
@@ -1543,25 +1281,26 @@ void qib_ib_piobufavail(struct qib_devdata *dd)
 	 * could end up with QPs on the wait list with the interrupt
 	 * could end up with QPs on the wait list with the interrupt
 	 * disabled.
 	 * disabled.
 	 */
 	 */
-	spin_lock_irqsave(&dev->pending_lock, flags);
+	spin_lock_irqsave(&dev->rdi.pending_lock, flags);
 	while (!list_empty(list)) {
 	while (!list_empty(list)) {
 		if (n == ARRAY_SIZE(qps))
 		if (n == ARRAY_SIZE(qps))
 			goto full;
 			goto full;
-		qp = list_entry(list->next, struct qib_qp, iowait);
-		list_del_init(&qp->iowait);
+		priv = list_entry(list->next, struct qib_qp_priv, iowait);
+		qp = priv->owner;
+		list_del_init(&priv->iowait);
 		atomic_inc(&qp->refcount);
 		atomic_inc(&qp->refcount);
 		qps[n++] = qp;
 		qps[n++] = qp;
 	}
 	}
 	dd->f_wantpiobuf_intr(dd, 0);
 	dd->f_wantpiobuf_intr(dd, 0);
 full:
 full:
-	spin_unlock_irqrestore(&dev->pending_lock, flags);
+	spin_unlock_irqrestore(&dev->rdi.pending_lock, flags);
 
 
 	for (i = 0; i < n; i++) {
 	for (i = 0; i < n; i++) {
 		qp = qps[i];
 		qp = qps[i];
 
 
 		spin_lock_irqsave(&qp->s_lock, flags);
 		spin_lock_irqsave(&qp->s_lock, flags);
-		if (qp->s_flags & QIB_S_WAIT_PIO) {
-			qp->s_flags &= ~QIB_S_WAIT_PIO;
+		if (qp->s_flags & RVT_S_WAIT_PIO) {
+			qp->s_flags &= ~RVT_S_WAIT_PIO;
 			qib_schedule_send(qp);
 			qib_schedule_send(qp);
 		}
 		}
 		spin_unlock_irqrestore(&qp->s_lock, flags);
 		spin_unlock_irqrestore(&qp->s_lock, flags);
@@ -1572,82 +1311,24 @@ full:
 	}
 	}
 }
 }
 
 
-static int qib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
-			    struct ib_udata *uhw)
-{
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	struct qib_ibdev *dev = to_idev(ibdev);
-
-	if (uhw->inlen || uhw->outlen)
-		return -EINVAL;
-	memset(props, 0, sizeof(*props));
-
-	props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
-		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
-		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
-		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
-	props->page_size_cap = PAGE_SIZE;
-	props->vendor_id =
-		QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
-	props->vendor_part_id = dd->deviceid;
-	props->hw_ver = dd->minrev;
-	props->sys_image_guid = ib_qib_sys_image_guid;
-	props->max_mr_size = ~0ULL;
-	props->max_qp = ib_qib_max_qps;
-	props->max_qp_wr = ib_qib_max_qp_wrs;
-	props->max_sge = ib_qib_max_sges;
-	props->max_sge_rd = ib_qib_max_sges;
-	props->max_cq = ib_qib_max_cqs;
-	props->max_ah = ib_qib_max_ahs;
-	props->max_cqe = ib_qib_max_cqes;
-	props->max_mr = dev->lk_table.max;
-	props->max_fmr = dev->lk_table.max;
-	props->max_map_per_fmr = 32767;
-	props->max_pd = ib_qib_max_pds;
-	props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
-	props->max_qp_init_rd_atom = 255;
-	/* props->max_res_rd_atom */
-	props->max_srq = ib_qib_max_srqs;
-	props->max_srq_wr = ib_qib_max_srq_wrs;
-	props->max_srq_sge = ib_qib_max_srq_sges;
-	/* props->local_ca_ack_delay */
-	props->atomic_cap = IB_ATOMIC_GLOB;
-	props->max_pkeys = qib_get_npkeys(dd);
-	props->max_mcast_grp = ib_qib_max_mcast_grps;
-	props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
-	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
-		props->max_mcast_grp;
-
-	return 0;
-}
-
-static int qib_query_port(struct ib_device *ibdev, u8 port,
+static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
 			  struct ib_port_attr *props)
 			  struct ib_port_attr *props)
 {
 {
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	struct qib_ibport *ibp = to_iport(ibdev, port);
-	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = dd_from_dev(ibdev);
+	struct qib_pportdata *ppd = &dd->pport[port_num - 1];
 	enum ib_mtu mtu;
 	enum ib_mtu mtu;
 	u16 lid = ppd->lid;
 	u16 lid = ppd->lid;
 
 
-	memset(props, 0, sizeof(*props));
 	props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
 	props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
 	props->lmc = ppd->lmc;
 	props->lmc = ppd->lmc;
-	props->sm_lid = ibp->sm_lid;
-	props->sm_sl = ibp->sm_sl;
 	props->state = dd->f_iblink_state(ppd->lastibcstat);
 	props->state = dd->f_iblink_state(ppd->lastibcstat);
 	props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
 	props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
-	props->port_cap_flags = ibp->port_cap_flags;
 	props->gid_tbl_len = QIB_GUIDS_PER_PORT;
 	props->gid_tbl_len = QIB_GUIDS_PER_PORT;
-	props->max_msg_sz = 0x80000000;
-	props->pkey_tbl_len = qib_get_npkeys(dd);
-	props->bad_pkey_cntr = ibp->pkey_violations;
-	props->qkey_viol_cntr = ibp->qkey_violations;
 	props->active_width = ppd->link_width_active;
 	props->active_width = ppd->link_width_active;
 	/* See rate_show() */
 	/* See rate_show() */
 	props->active_speed = ppd->link_speed_active;
 	props->active_speed = ppd->link_speed_active;
 	props->max_vl_num = qib_num_vls(ppd->vls_supported);
 	props->max_vl_num = qib_num_vls(ppd->vls_supported);
-	props->init_type_reply = 0;
 
 
 	props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
 	props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
 	switch (ppd->ibmtu) {
 	switch (ppd->ibmtu) {
@@ -1670,7 +1351,6 @@ static int qib_query_port(struct ib_device *ibdev, u8 port,
 		mtu = IB_MTU_2048;
 		mtu = IB_MTU_2048;
 	}
 	}
 	props->active_mtu = mtu;
 	props->active_mtu = mtu;
-	props->subnet_timeout = ibp->subnet_timeout;
 
 
 	return 0;
 	return 0;
 }
 }
@@ -1714,236 +1394,76 @@ bail:
 	return ret;
 	return ret;
 }
 }
 
 
-static int qib_modify_port(struct ib_device *ibdev, u8 port,
-			   int port_modify_mask, struct ib_port_modify *props)
+static int qib_shut_down_port(struct rvt_dev_info *rdi, u8 port_num)
 {
 {
-	struct qib_ibport *ibp = to_iport(ibdev, port);
-	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-
-	ibp->port_cap_flags |= props->set_port_cap_mask;
-	ibp->port_cap_flags &= ~props->clr_port_cap_mask;
-	if (props->set_port_cap_mask || props->clr_port_cap_mask)
-		qib_cap_mask_chg(ibp);
-	if (port_modify_mask & IB_PORT_SHUTDOWN)
-		qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
-	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
-		ibp->qkey_violations = 0;
-	return 0;
-}
-
-static int qib_query_gid(struct ib_device *ibdev, u8 port,
-			 int index, union ib_gid *gid)
-{
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	int ret = 0;
-
-	if (!port || port > dd->num_pports)
-		ret = -EINVAL;
-	else {
-		struct qib_ibport *ibp = to_iport(ibdev, port);
-		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
-
-		gid->global.subnet_prefix = ibp->gid_prefix;
-		if (index == 0)
-			gid->global.interface_id = ppd->guid;
-		else if (index < QIB_GUIDS_PER_PORT)
-			gid->global.interface_id = ibp->guids[index - 1];
-		else
-			ret = -EINVAL;
-	}
-
-	return ret;
-}
+	struct qib_ibdev *ibdev = container_of(rdi, struct qib_ibdev, rdi);
+	struct qib_devdata *dd = dd_from_dev(ibdev);
+	struct qib_pportdata *ppd = &dd->pport[port_num - 1];
 
 
-static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev,
-				  struct ib_ucontext *context,
-				  struct ib_udata *udata)
-{
-	struct qib_ibdev *dev = to_idev(ibdev);
-	struct qib_pd *pd;
-	struct ib_pd *ret;
+	qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
 
 
-	/*
-	 * This is actually totally arbitrary.  Some correctness tests
-	 * assume there's a maximum number of PDs that can be allocated.
-	 * We don't actually have this limit, but we fail the test if
-	 * we allow allocations of more than we report for this value.
-	 */
-
-	pd = kmalloc(sizeof(*pd), GFP_KERNEL);
-	if (!pd) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock(&dev->n_pds_lock);
-	if (dev->n_pds_allocated == ib_qib_max_pds) {
-		spin_unlock(&dev->n_pds_lock);
-		kfree(pd);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_pds_allocated++;
-	spin_unlock(&dev->n_pds_lock);
-
-	/* ib_alloc_pd() will initialize pd->ibpd. */
-	pd->user = udata != NULL;
-
-	ret = &pd->ibpd;
-
-bail:
-	return ret;
+	return 0;
 }
 }
 
 
-static int qib_dealloc_pd(struct ib_pd *ibpd)
+static int qib_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp,
+			   int guid_index, __be64 *guid)
 {
 {
-	struct qib_pd *pd = to_ipd(ibpd);
-	struct qib_ibdev *dev = to_idev(ibpd->device);
-
-	spin_lock(&dev->n_pds_lock);
-	dev->n_pds_allocated--;
-	spin_unlock(&dev->n_pds_lock);
+	struct qib_ibport *ibp = container_of(rvp, struct qib_ibport, rvp);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
 
 
-	kfree(pd);
+	if (guid_index == 0)
+		*guid = ppd->guid;
+	else if (guid_index < QIB_GUIDS_PER_PORT)
+		*guid = ibp->guids[guid_index - 1];
+	else
+		return -EINVAL;
 
 
 	return 0;
 	return 0;
 }
 }
 
 
 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
 {
 {
-	/* A multicast address requires a GRH (see ch. 8.4.1). */
-	if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
-	    ah_attr->dlid != QIB_PERMISSIVE_LID &&
-	    !(ah_attr->ah_flags & IB_AH_GRH))
-		goto bail;
-	if ((ah_attr->ah_flags & IB_AH_GRH) &&
-	    ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT)
-		goto bail;
-	if (ah_attr->dlid == 0)
-		goto bail;
-	if (ah_attr->port_num < 1 ||
-	    ah_attr->port_num > ibdev->phys_port_cnt)
-		goto bail;
-	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
-	    ib_rate_to_mult(ah_attr->static_rate) < 0)
-		goto bail;
 	if (ah_attr->sl > 15)
 	if (ah_attr->sl > 15)
-		goto bail;
+		return -EINVAL;
+
 	return 0;
 	return 0;
-bail:
-	return -EINVAL;
 }
 }
 
 
-/**
- * qib_create_ah - create an address handle
- * @pd: the protection domain
- * @ah_attr: the attributes of the AH
- *
- * This may be called from interrupt context.
- */
-static struct ib_ah *qib_create_ah(struct ib_pd *pd,
-				   struct ib_ah_attr *ah_attr)
+static void qib_notify_new_ah(struct ib_device *ibdev,
+			      struct ib_ah_attr *ah_attr,
+			      struct rvt_ah *ah)
 {
 {
-	struct qib_ah *ah;
-	struct ib_ah *ret;
-	struct qib_ibdev *dev = to_idev(pd->device);
-	unsigned long flags;
+	struct qib_ibport *ibp;
+	struct qib_pportdata *ppd;
 
 
-	if (qib_check_ah(pd->device, ah_attr)) {
-		ret = ERR_PTR(-EINVAL);
-		goto bail;
-	}
-
-	ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
-	if (!ah) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	if (dev->n_ahs_allocated == ib_qib_max_ahs) {
-		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-		kfree(ah);
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	dev->n_ahs_allocated++;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	/* ib_create_ah() will initialize ah->ibah. */
-	ah->attr = *ah_attr;
-	atomic_set(&ah->refcount, 0);
-
-	ret = &ah->ibah;
+	/*
+	 * Do not trust reading anything from rvt_ah at this point as it is not
+	 * done being setup. We can however modify things which we need to set.
+	 */
 
 
-bail:
-	return ret;
+	ibp = to_iport(ibdev, ah_attr->port_num);
+	ppd = ppd_from_ibp(ibp);
+	ah->vl = ibp->sl_to_vl[ah->attr.sl];
+	ah->log_pmtu = ilog2(ppd->ibmtu);
 }
 }
 
 
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid)
 {
 {
 	struct ib_ah_attr attr;
 	struct ib_ah_attr attr;
 	struct ib_ah *ah = ERR_PTR(-EINVAL);
 	struct ib_ah *ah = ERR_PTR(-EINVAL);
-	struct qib_qp *qp0;
+	struct rvt_qp *qp0;
 
 
 	memset(&attr, 0, sizeof(attr));
 	memset(&attr, 0, sizeof(attr));
 	attr.dlid = dlid;
 	attr.dlid = dlid;
 	attr.port_num = ppd_from_ibp(ibp)->port;
 	attr.port_num = ppd_from_ibp(ibp)->port;
 	rcu_read_lock();
 	rcu_read_lock();
-	qp0 = rcu_dereference(ibp->qp0);
+	qp0 = rcu_dereference(ibp->rvp.qp[0]);
 	if (qp0)
 	if (qp0)
 		ah = ib_create_ah(qp0->ibqp.pd, &attr);
 		ah = ib_create_ah(qp0->ibqp.pd, &attr);
 	rcu_read_unlock();
 	rcu_read_unlock();
 	return ah;
 	return ah;
 }
 }
 
 
-/**
- * qib_destroy_ah - destroy an address handle
- * @ibah: the AH to destroy
- *
- * This may be called from interrupt context.
- */
-static int qib_destroy_ah(struct ib_ah *ibah)
-{
-	struct qib_ibdev *dev = to_idev(ibah->device);
-	struct qib_ah *ah = to_iah(ibah);
-	unsigned long flags;
-
-	if (atomic_read(&ah->refcount) != 0)
-		return -EBUSY;
-
-	spin_lock_irqsave(&dev->n_ahs_lock, flags);
-	dev->n_ahs_allocated--;
-	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
-
-	kfree(ah);
-
-	return 0;
-}
-
-static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct qib_ah *ah = to_iah(ibah);
-
-	if (qib_check_ah(ibah->device, ah_attr))
-		return -EINVAL;
-
-	ah->attr = *ah_attr;
-
-	return 0;
-}
-
-static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
-{
-	struct qib_ah *ah = to_iah(ibah);
-
-	*ah_attr = ah->attr;
-
-	return 0;
-}
-
 /**
 /**
  * qib_get_npkeys - return the size of the PKEY table for context 0
  * qib_get_npkeys - return the size of the PKEY table for context 0
  * @dd: the qlogic_ib device
  * @dd: the qlogic_ib device
@@ -1973,75 +1493,27 @@ unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
 	return ret;
 	return ret;
 }
 }
 
 
-static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
-			  u16 *pkey)
-{
-	struct qib_devdata *dd = dd_from_ibdev(ibdev);
-	int ret;
-
-	if (index >= qib_get_npkeys(dd)) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	*pkey = qib_get_pkey(to_iport(ibdev, port), index);
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-/**
- * qib_alloc_ucontext - allocate a ucontest
- * @ibdev: the infiniband device
- * @udata: not used by the QLogic_IB driver
- */
-
-static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev,
-					      struct ib_udata *udata)
-{
-	struct qib_ucontext *context;
-	struct ib_ucontext *ret;
-
-	context = kmalloc(sizeof(*context), GFP_KERNEL);
-	if (!context) {
-		ret = ERR_PTR(-ENOMEM);
-		goto bail;
-	}
-
-	ret = &context->ibucontext;
-
-bail:
-	return ret;
-}
-
-static int qib_dealloc_ucontext(struct ib_ucontext *context)
-{
-	kfree(to_iucontext(context));
-	return 0;
-}
-
 static void init_ibport(struct qib_pportdata *ppd)
 static void init_ibport(struct qib_pportdata *ppd)
 {
 {
 	struct qib_verbs_counters cntrs;
 	struct qib_verbs_counters cntrs;
 	struct qib_ibport *ibp = &ppd->ibport_data;
 	struct qib_ibport *ibp = &ppd->ibport_data;
 
 
-	spin_lock_init(&ibp->lock);
+	spin_lock_init(&ibp->rvp.lock);
 	/* Set the prefix to the default value (see ch. 4.1.1) */
 	/* Set the prefix to the default value (see ch. 4.1.1) */
-	ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
-	ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
-	ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
+	ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX;
+	ibp->rvp.sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
+	ibp->rvp.port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
 		IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
 		IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
 		IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
 		IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
 		IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
 		IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
 		IB_PORT_OTHER_LOCAL_CHANGES_SUP;
 		IB_PORT_OTHER_LOCAL_CHANGES_SUP;
 	if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
 	if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
-		ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
-	ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
-	ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
-	ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
-	ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
-	ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+		ibp->rvp.port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
+	ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+	ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+	ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+	ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+	ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
 
 
 	/* Snapshot current HW counters to "clear" them. */
 	/* Snapshot current HW counters to "clear" them. */
 	qib_get_counters(ppd, &cntrs);
 	qib_get_counters(ppd, &cntrs);
@@ -2061,26 +1533,55 @@ static void init_ibport(struct qib_pportdata *ppd)
 	ibp->z_excessive_buffer_overrun_errors =
 	ibp->z_excessive_buffer_overrun_errors =
 		cntrs.excessive_buffer_overrun_errors;
 		cntrs.excessive_buffer_overrun_errors;
 	ibp->z_vl15_dropped = cntrs.vl15_dropped;
 	ibp->z_vl15_dropped = cntrs.vl15_dropped;
-	RCU_INIT_POINTER(ibp->qp0, NULL);
-	RCU_INIT_POINTER(ibp->qp1, NULL);
+	RCU_INIT_POINTER(ibp->rvp.qp[0], NULL);
+	RCU_INIT_POINTER(ibp->rvp.qp[1], NULL);
 }
 }
 
 
-static int qib_port_immutable(struct ib_device *ibdev, u8 port_num,
-			      struct ib_port_immutable *immutable)
+/**
+ * qib_fill_device_attr - Fill in rvt dev info device attributes.
+ * @dd: the device data structure
+ */
+static void qib_fill_device_attr(struct qib_devdata *dd)
 {
 {
-	struct ib_port_attr attr;
-	int err;
-
-	err = qib_query_port(ibdev, port_num, &attr);
-	if (err)
-		return err;
+	struct rvt_dev_info *rdi = &dd->verbs_dev.rdi;
 
 
-	immutable->pkey_tbl_len = attr.pkey_tbl_len;
-	immutable->gid_tbl_len = attr.gid_tbl_len;
-	immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
-	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+	memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props));
 
 
-	return 0;
+	rdi->dparms.props.max_pd = ib_qib_max_pds;
+	rdi->dparms.props.max_ah = ib_qib_max_ahs;
+	rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+	rdi->dparms.props.page_size_cap = PAGE_SIZE;
+	rdi->dparms.props.vendor_id =
+		QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
+	rdi->dparms.props.vendor_part_id = dd->deviceid;
+	rdi->dparms.props.hw_ver = dd->minrev;
+	rdi->dparms.props.sys_image_guid = ib_qib_sys_image_guid;
+	rdi->dparms.props.max_mr_size = ~0ULL;
+	rdi->dparms.props.max_qp = ib_qib_max_qps;
+	rdi->dparms.props.max_qp_wr = ib_qib_max_qp_wrs;
+	rdi->dparms.props.max_sge = ib_qib_max_sges;
+	rdi->dparms.props.max_sge_rd = ib_qib_max_sges;
+	rdi->dparms.props.max_cq = ib_qib_max_cqs;
+	rdi->dparms.props.max_cqe = ib_qib_max_cqes;
+	rdi->dparms.props.max_ah = ib_qib_max_ahs;
+	rdi->dparms.props.max_mr = rdi->lkey_table.max;
+	rdi->dparms.props.max_fmr = rdi->lkey_table.max;
+	rdi->dparms.props.max_map_per_fmr = 32767;
+	rdi->dparms.props.max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
+	rdi->dparms.props.max_qp_init_rd_atom = 255;
+	rdi->dparms.props.max_srq = ib_qib_max_srqs;
+	rdi->dparms.props.max_srq_wr = ib_qib_max_srq_wrs;
+	rdi->dparms.props.max_srq_sge = ib_qib_max_srq_sges;
+	rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB;
+	rdi->dparms.props.max_pkeys = qib_get_npkeys(dd);
+	rdi->dparms.props.max_mcast_grp = ib_qib_max_mcast_grps;
+	rdi->dparms.props.max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
+	rdi->dparms.props.max_total_mcast_qp_attach =
+					rdi->dparms.props.max_mcast_qp_attach *
+					rdi->dparms.props.max_mcast_grp;
 }
 }
 
 
 /**
 /**
@@ -2091,68 +1592,20 @@ static int qib_port_immutable(struct ib_device *ibdev, u8 port_num,
 int qib_register_ib_device(struct qib_devdata *dd)
 int qib_register_ib_device(struct qib_devdata *dd)
 {
 {
 	struct qib_ibdev *dev = &dd->verbs_dev;
 	struct qib_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
+	struct ib_device *ibdev = &dev->rdi.ibdev;
 	struct qib_pportdata *ppd = dd->pport;
 	struct qib_pportdata *ppd = dd->pport;
-	unsigned i, lk_tab_size;
+	unsigned i, ctxt;
 	int ret;
 	int ret;
 
 
-	dev->qp_table_size = ib_qib_qp_table_size;
 	get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
 	get_random_bytes(&dev->qp_rnd, sizeof(dev->qp_rnd));
-	dev->qp_table = kmalloc_array(
-				dev->qp_table_size,
-				sizeof(*dev->qp_table),
-				GFP_KERNEL);
-	if (!dev->qp_table) {
-		ret = -ENOMEM;
-		goto err_qpt;
-	}
-	for (i = 0; i < dev->qp_table_size; i++)
-		RCU_INIT_POINTER(dev->qp_table[i], NULL);
-
 	for (i = 0; i < dd->num_pports; i++)
 	for (i = 0; i < dd->num_pports; i++)
 		init_ibport(ppd + i);
 		init_ibport(ppd + i);
 
 
 	/* Only need to initialize non-zero fields. */
 	/* Only need to initialize non-zero fields. */
-	spin_lock_init(&dev->qpt_lock);
-	spin_lock_init(&dev->n_pds_lock);
-	spin_lock_init(&dev->n_ahs_lock);
-	spin_lock_init(&dev->n_cqs_lock);
-	spin_lock_init(&dev->n_qps_lock);
-	spin_lock_init(&dev->n_srqs_lock);
-	spin_lock_init(&dev->n_mcast_grps_lock);
-	init_timer(&dev->mem_timer);
-	dev->mem_timer.function = mem_timer;
-	dev->mem_timer.data = (unsigned long) dev;
-
-	qib_init_qpn_table(dd, &dev->qpn_table);
+	setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev);
+
+	qpt_mask = dd->qpn_mask;
 
 
-	/*
-	 * The top ib_qib_lkey_table_size bits are used to index the
-	 * table.  The lower 8 bits can be owned by the user (copied from
-	 * the LKEY).  The remaining bits act as a generation number or tag.
-	 */
-	spin_lock_init(&dev->lk_table.lock);
-	/* insure generation is at least 4 bits see keys.c */
-	if (ib_qib_lkey_table_size > MAX_LKEY_TABLE_BITS) {
-		qib_dev_warn(dd, "lkey bits %u too large, reduced to %u\n",
-			ib_qib_lkey_table_size, MAX_LKEY_TABLE_BITS);
-		ib_qib_lkey_table_size = MAX_LKEY_TABLE_BITS;
-	}
-	dev->lk_table.max = 1 << ib_qib_lkey_table_size;
-	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
-	dev->lk_table.table = (struct qib_mregion __rcu **)
-		vmalloc(lk_tab_size);
-	if (dev->lk_table.table == NULL) {
-		ret = -ENOMEM;
-		goto err_lk;
-	}
-	RCU_INIT_POINTER(dev->dma_mr, NULL);
-	for (i = 0; i < dev->lk_table.max; i++)
-		RCU_INIT_POINTER(dev->lk_table.table[i], NULL);
-	INIT_LIST_HEAD(&dev->pending_mmaps);
-	spin_lock_init(&dev->pending_lock);
-	dev->mmap_offset = PAGE_SIZE;
-	spin_lock_init(&dev->mmap_offset_lock);
 	INIT_LIST_HEAD(&dev->piowait);
 	INIT_LIST_HEAD(&dev->piowait);
 	INIT_LIST_HEAD(&dev->dmawait);
 	INIT_LIST_HEAD(&dev->dmawait);
 	INIT_LIST_HEAD(&dev->txwait);
 	INIT_LIST_HEAD(&dev->txwait);
@@ -2194,110 +1647,91 @@ int qib_register_ib_device(struct qib_devdata *dd)
 	strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
 	strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
 	ibdev->owner = THIS_MODULE;
 	ibdev->owner = THIS_MODULE;
 	ibdev->node_guid = ppd->guid;
 	ibdev->node_guid = ppd->guid;
-	ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION;
-	ibdev->uverbs_cmd_mask =
-		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
-		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
-		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
-		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
-		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
-		(1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
-		(1ull << IB_USER_VERBS_CMD_REG_MR)              |
-		(1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
-		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
-		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
-		(1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
-		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
-		(1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
-		(1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
-		(1ull << IB_USER_VERBS_CMD_POST_SEND)           |
-		(1ull << IB_USER_VERBS_CMD_POST_RECV)           |
-		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
-		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
-		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
-		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
-		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
-	ibdev->node_type = RDMA_NODE_IB_CA;
 	ibdev->phys_port_cnt = dd->num_pports;
 	ibdev->phys_port_cnt = dd->num_pports;
-	ibdev->num_comp_vectors = 1;
 	ibdev->dma_device = &dd->pcidev->dev;
 	ibdev->dma_device = &dd->pcidev->dev;
-	ibdev->query_device = qib_query_device;
 	ibdev->modify_device = qib_modify_device;
 	ibdev->modify_device = qib_modify_device;
-	ibdev->query_port = qib_query_port;
-	ibdev->modify_port = qib_modify_port;
-	ibdev->query_pkey = qib_query_pkey;
-	ibdev->query_gid = qib_query_gid;
-	ibdev->alloc_ucontext = qib_alloc_ucontext;
-	ibdev->dealloc_ucontext = qib_dealloc_ucontext;
-	ibdev->alloc_pd = qib_alloc_pd;
-	ibdev->dealloc_pd = qib_dealloc_pd;
-	ibdev->create_ah = qib_create_ah;
-	ibdev->destroy_ah = qib_destroy_ah;
-	ibdev->modify_ah = qib_modify_ah;
-	ibdev->query_ah = qib_query_ah;
-	ibdev->create_srq = qib_create_srq;
-	ibdev->modify_srq = qib_modify_srq;
-	ibdev->query_srq = qib_query_srq;
-	ibdev->destroy_srq = qib_destroy_srq;
-	ibdev->create_qp = qib_create_qp;
-	ibdev->modify_qp = qib_modify_qp;
-	ibdev->query_qp = qib_query_qp;
-	ibdev->destroy_qp = qib_destroy_qp;
-	ibdev->post_send = qib_post_send;
-	ibdev->post_recv = qib_post_receive;
-	ibdev->post_srq_recv = qib_post_srq_receive;
-	ibdev->create_cq = qib_create_cq;
-	ibdev->destroy_cq = qib_destroy_cq;
-	ibdev->resize_cq = qib_resize_cq;
-	ibdev->poll_cq = qib_poll_cq;
-	ibdev->req_notify_cq = qib_req_notify_cq;
-	ibdev->get_dma_mr = qib_get_dma_mr;
-	ibdev->reg_user_mr = qib_reg_user_mr;
-	ibdev->dereg_mr = qib_dereg_mr;
-	ibdev->alloc_mr = qib_alloc_mr;
-	ibdev->map_mr_sg = qib_map_mr_sg;
-	ibdev->alloc_fmr = qib_alloc_fmr;
-	ibdev->map_phys_fmr = qib_map_phys_fmr;
-	ibdev->unmap_fmr = qib_unmap_fmr;
-	ibdev->dealloc_fmr = qib_dealloc_fmr;
-	ibdev->attach_mcast = qib_multicast_attach;
-	ibdev->detach_mcast = qib_multicast_detach;
 	ibdev->process_mad = qib_process_mad;
 	ibdev->process_mad = qib_process_mad;
-	ibdev->mmap = qib_mmap;
-	ibdev->dma_ops = &qib_dma_mapping_ops;
-	ibdev->get_port_immutable = qib_port_immutable;
 
 
 	snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
 	snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
 		 "Intel Infiniband HCA %s", init_utsname()->nodename);
 		 "Intel Infiniband HCA %s", init_utsname()->nodename);
 
 
-	ret = ib_register_device(ibdev, qib_create_port_files);
-	if (ret)
-		goto err_reg;
+	/*
+	 * Fill in rvt info object.
+	 */
+	dd->verbs_dev.rdi.driver_f.port_callback = qib_create_port_files;
+	dd->verbs_dev.rdi.driver_f.get_card_name = qib_get_card_name;
+	dd->verbs_dev.rdi.driver_f.get_pci_dev = qib_get_pci_dev;
+	dd->verbs_dev.rdi.driver_f.check_ah = qib_check_ah;
+	dd->verbs_dev.rdi.driver_f.check_send_wqe = qib_check_send_wqe;
+	dd->verbs_dev.rdi.driver_f.notify_new_ah = qib_notify_new_ah;
+	dd->verbs_dev.rdi.driver_f.alloc_qpn = qib_alloc_qpn;
+	dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qib_qp_priv_alloc;
+	dd->verbs_dev.rdi.driver_f.qp_priv_free = qib_qp_priv_free;
+	dd->verbs_dev.rdi.driver_f.free_all_qps = qib_free_all_qps;
+	dd->verbs_dev.rdi.driver_f.notify_qp_reset = qib_notify_qp_reset;
+	dd->verbs_dev.rdi.driver_f.do_send = qib_do_send;
+	dd->verbs_dev.rdi.driver_f.schedule_send = qib_schedule_send;
+	dd->verbs_dev.rdi.driver_f.quiesce_qp = qib_quiesce_qp;
+	dd->verbs_dev.rdi.driver_f.stop_send_queue = qib_stop_send_queue;
+	dd->verbs_dev.rdi.driver_f.flush_qp_waiters = qib_flush_qp_waiters;
+	dd->verbs_dev.rdi.driver_f.notify_error_qp = qib_notify_error_qp;
+	dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = qib_mtu_to_path_mtu;
+	dd->verbs_dev.rdi.driver_f.mtu_from_qp = qib_mtu_from_qp;
+	dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = qib_get_pmtu_from_attr;
+	dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _qib_schedule_send;
+	dd->verbs_dev.rdi.driver_f.query_port_state = qib_query_port;
+	dd->verbs_dev.rdi.driver_f.shut_down_port = qib_shut_down_port;
+	dd->verbs_dev.rdi.driver_f.cap_mask_chg = qib_cap_mask_chg;
+	dd->verbs_dev.rdi.driver_f.notify_create_mad_agent =
+						qib_notify_create_mad_agent;
+	dd->verbs_dev.rdi.driver_f.notify_free_mad_agent =
+						qib_notify_free_mad_agent;
+
+	dd->verbs_dev.rdi.dparms.max_rdma_atomic = QIB_MAX_RDMA_ATOMIC;
+	dd->verbs_dev.rdi.driver_f.get_guid_be = qib_get_guid_be;
+	dd->verbs_dev.rdi.dparms.lkey_table_size = qib_lkey_table_size;
+	dd->verbs_dev.rdi.dparms.qp_table_size = ib_qib_qp_table_size;
+	dd->verbs_dev.rdi.dparms.qpn_start = 1;
+	dd->verbs_dev.rdi.dparms.qpn_res_start = QIB_KD_QP;
+	dd->verbs_dev.rdi.dparms.qpn_res_end = QIB_KD_QP; /* Reserve one QP */
+	dd->verbs_dev.rdi.dparms.qpn_inc = 1;
+	dd->verbs_dev.rdi.dparms.qos_shift = 1;
+	dd->verbs_dev.rdi.dparms.psn_mask = QIB_PSN_MASK;
+	dd->verbs_dev.rdi.dparms.psn_shift = QIB_PSN_SHIFT;
+	dd->verbs_dev.rdi.dparms.psn_modify_mask = QIB_PSN_MASK;
+	dd->verbs_dev.rdi.dparms.nports = dd->num_pports;
+	dd->verbs_dev.rdi.dparms.npkeys = qib_get_npkeys(dd);
+	dd->verbs_dev.rdi.dparms.node = dd->assigned_node_id;
+	dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+	dd->verbs_dev.rdi.dparms.max_mad_size = IB_MGMT_MAD_SIZE;
+
+	snprintf(dd->verbs_dev.rdi.dparms.cq_name,
+		 sizeof(dd->verbs_dev.rdi.dparms.cq_name),
+		 "qib_cq%d", dd->unit);
+
+	qib_fill_device_attr(dd);
+
+	ppd = dd->pport;
+	for (i = 0; i < dd->num_pports; i++, ppd++) {
+		ctxt = ppd->hw_pidx;
+		rvt_init_port(&dd->verbs_dev.rdi,
+			      &ppd->ibport_data.rvp,
+			      i,
+			      dd->rcd[ctxt]->pkeys);
+	}
 
 
-	ret = qib_create_agents(dev);
+	ret = rvt_register_device(&dd->verbs_dev.rdi);
 	if (ret)
 	if (ret)
-		goto err_agents;
+		goto err_tx;
 
 
 	ret = qib_verbs_register_sysfs(dd);
 	ret = qib_verbs_register_sysfs(dd);
 	if (ret)
 	if (ret)
 		goto err_class;
 		goto err_class;
 
 
-	goto bail;
+	return ret;
 
 
 err_class:
 err_class:
-	qib_free_agents(dev);
-err_agents:
-	ib_unregister_device(ibdev);
-err_reg:
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 err_tx:
 err_tx:
 	while (!list_empty(&dev->txreq_free)) {
 	while (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
 		struct list_head *l = dev->txreq_free.next;
@@ -2313,27 +1747,17 @@ err_tx:
 					sizeof(struct qib_pio_header),
 					sizeof(struct qib_pio_header),
 				  dev->pio_hdrs, dev->pio_hdrs_phys);
 				  dev->pio_hdrs, dev->pio_hdrs_phys);
 err_hdrs:
 err_hdrs:
-	vfree(dev->lk_table.table);
-err_lk:
-	kfree(dev->qp_table);
-err_qpt:
 	qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
 	qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
-bail:
 	return ret;
 	return ret;
 }
 }
 
 
 void qib_unregister_ib_device(struct qib_devdata *dd)
 void qib_unregister_ib_device(struct qib_devdata *dd)
 {
 {
 	struct qib_ibdev *dev = &dd->verbs_dev;
 	struct qib_ibdev *dev = &dd->verbs_dev;
-	struct ib_device *ibdev = &dev->ibdev;
-	u32 qps_inuse;
-	unsigned lk_tab_size;
 
 
 	qib_verbs_unregister_sysfs(dd);
 	qib_verbs_unregister_sysfs(dd);
 
 
-	qib_free_agents(dev);
-
-	ib_unregister_device(ibdev);
+	rvt_unregister_device(&dd->verbs_dev.rdi);
 
 
 	if (!list_empty(&dev->piowait))
 	if (!list_empty(&dev->piowait))
 		qib_dev_err(dd, "piowait list not empty!\n");
 		qib_dev_err(dd, "piowait list not empty!\n");
@@ -2343,16 +1767,8 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
 		qib_dev_err(dd, "txwait list not empty!\n");
 		qib_dev_err(dd, "txwait list not empty!\n");
 	if (!list_empty(&dev->memwait))
 	if (!list_empty(&dev->memwait))
 		qib_dev_err(dd, "memwait list not empty!\n");
 		qib_dev_err(dd, "memwait list not empty!\n");
-	if (dev->dma_mr)
-		qib_dev_err(dd, "DMA MR not NULL!\n");
-
-	qps_inuse = qib_free_all_qps(dd);
-	if (qps_inuse)
-		qib_dev_err(dd, "QP memory leak! %u still in use\n",
-			    qps_inuse);
 
 
 	del_timer_sync(&dev->mem_timer);
 	del_timer_sync(&dev->mem_timer);
-	qib_free_qpn_table(&dev->qpn_table);
 	while (!list_empty(&dev->txreq_free)) {
 	while (!list_empty(&dev->txreq_free)) {
 		struct list_head *l = dev->txreq_free.next;
 		struct list_head *l = dev->txreq_free.next;
 		struct qib_verbs_txreq *tx;
 		struct qib_verbs_txreq *tx;
@@ -2366,21 +1782,36 @@ void qib_unregister_ib_device(struct qib_devdata *dd)
 				  dd->pport->sdma_descq_cnt *
 				  dd->pport->sdma_descq_cnt *
 					sizeof(struct qib_pio_header),
 					sizeof(struct qib_pio_header),
 				  dev->pio_hdrs, dev->pio_hdrs_phys);
 				  dev->pio_hdrs, dev->pio_hdrs_phys);
-	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
-	vfree(dev->lk_table.table);
-	kfree(dev->qp_table);
 }
 }
 
 
-/*
- * This must be called with s_lock held.
+/**
+ * _qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules progress w/o regard to the s_flags.
+ *
+ * It is only used in post send, which doesn't hold
+ * the s_lock.
  */
  */
-void qib_schedule_send(struct qib_qp *qp)
+void _qib_schedule_send(struct rvt_qp *qp)
 {
 {
-	if (qib_send_ok(qp)) {
-		struct qib_ibport *ibp =
-			to_iport(qp->ibqp.device, qp->port_num);
-		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_ibport *ibp =
+		to_iport(qp->ibqp.device, qp->port_num);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_qp_priv *priv = qp->priv;
 
 
-		queue_work(ppd->qib_wq, &qp->s_work);
-	}
+	queue_work(ppd->qib_wq, &priv->s_work);
+}
+
+/**
+ * qib_schedule_send - schedule progress
+ * @qp - the qp
+ *
+ * This schedules qp progress.  The s_lock
+ * should be held.
+ */
+void qib_schedule_send(struct rvt_qp *qp)
+{
+	if (qib_send_ok(qp))
+		_qib_schedule_send(qp);
 }
 }

+ 64 - 748
drivers/infiniband/hw/qib/qib_verbs.h

@@ -45,6 +45,8 @@
 #include <linux/completion.h>
 #include <linux/completion.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_pack.h>
 #include <rdma/ib_user_verbs.h>
 #include <rdma/ib_user_verbs.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_cq.h>
 
 
 struct qib_ctxtdata;
 struct qib_ctxtdata;
 struct qib_pportdata;
 struct qib_pportdata;
@@ -53,9 +55,7 @@ struct qib_verbs_txreq;
 
 
 #define QIB_MAX_RDMA_ATOMIC     16
 #define QIB_MAX_RDMA_ATOMIC     16
 #define QIB_GUIDS_PER_PORT	5
 #define QIB_GUIDS_PER_PORT	5
-
-#define QPN_MAX                 (1 << 24)
-#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+#define QIB_PSN_SHIFT		8
 
 
 /*
 /*
  * Increment this value if any changes that break userspace ABI
  * Increment this value if any changes that break userspace ABI
@@ -63,12 +63,6 @@ struct qib_verbs_txreq;
  */
  */
 #define QIB_UVERBS_ABI_VERSION       2
 #define QIB_UVERBS_ABI_VERSION       2
 
 
-/*
- * Define an ib_cq_notify value that is not valid so we know when CQ
- * notifications are armed.
- */
-#define IB_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
-
 #define IB_SEQ_NAK	(3 << 29)
 #define IB_SEQ_NAK	(3 << 29)
 
 
 /* AETH NAK opcode values */
 /* AETH NAK opcode values */
@@ -79,17 +73,6 @@ struct qib_verbs_txreq;
 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
 #define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
 #define IB_NAK_INVALID_RD_REQUEST       0x64
 #define IB_NAK_INVALID_RD_REQUEST       0x64
 
 
-/* Flags for checking QP state (see ib_qib_state_ops[]) */
-#define QIB_POST_SEND_OK                0x01
-#define QIB_POST_RECV_OK                0x02
-#define QIB_PROCESS_RECV_OK             0x04
-#define QIB_PROCESS_SEND_OK             0x08
-#define QIB_PROCESS_NEXT_SEND_OK        0x10
-#define QIB_FLUSH_SEND			0x20
-#define QIB_FLUSH_RECV			0x40
-#define QIB_PROCESS_OR_FLUSH_SEND \
-	(QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND)
-
 /* IB Performance Manager status values */
 /* IB Performance Manager status values */
 #define IB_PMA_SAMPLE_STATUS_DONE       0x00
 #define IB_PMA_SAMPLE_STATUS_DONE       0x00
 #define IB_PMA_SAMPLE_STATUS_STARTED    0x01
 #define IB_PMA_SAMPLE_STATUS_STARTED    0x01
@@ -203,468 +186,21 @@ struct qib_pio_header {
 } __packed;
 } __packed;
 
 
 /*
 /*
- * There is one struct qib_mcast for each multicast GID.
- * All attached QPs are then stored as a list of
- * struct qib_mcast_qp.
+ * qib specific data structure that will be hidden from rvt after the queue pair
+ * is made common.
  */
  */
-struct qib_mcast_qp {
-	struct list_head list;
-	struct qib_qp *qp;
-};
-
-struct qib_mcast {
-	struct rb_node rb_node;
-	union ib_gid mgid;
-	struct list_head qp_list;
-	wait_queue_head_t wait;
-	atomic_t refcount;
-	int n_attached;
-};
-
-/* Protection domain */
-struct qib_pd {
-	struct ib_pd ibpd;
-	int user;               /* non-zero if created from user space */
-};
-
-/* Address Handle */
-struct qib_ah {
-	struct ib_ah ibah;
-	struct ib_ah_attr attr;
-	atomic_t refcount;
-};
-
-/*
- * This structure is used by qib_mmap() to validate an offset
- * when an mmap() request is made.  The vm_area_struct then uses
- * this as its vm_private_data.
- */
-struct qib_mmap_info {
-	struct list_head pending_mmaps;
-	struct ib_ucontext *context;
-	void *obj;
-	__u64 offset;
-	struct kref ref;
-	unsigned size;
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and completion queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- */
-struct qib_cq_wc {
-	u32 head;               /* index of next entry to fill */
-	u32 tail;               /* index of next ib_poll_cq() entry */
-	union {
-		/* these are actually size ibcq.cqe + 1 */
-		struct ib_uverbs_wc uqueue[0];
-		struct ib_wc kqueue[0];
-	};
-};
-
-/*
- * The completion queue structure.
- */
-struct qib_cq {
-	struct ib_cq ibcq;
-	struct kthread_work comptask;
-	struct qib_devdata *dd;
-	spinlock_t lock; /* protect changes in this struct */
-	u8 notify;
-	u8 triggered;
-	struct qib_cq_wc *queue;
-	struct qib_mmap_info *ip;
-};
-
-/*
- * A segment is a linear region of low physical memory.
- * XXX Maybe we should use phys addr here and kmap()/kunmap().
- * Used by the verbs layer.
- */
-struct qib_seg {
-	void *vaddr;
-	size_t length;
-};
-
-/* The number of qib_segs that fit in a page. */
-#define QIB_SEGSZ     (PAGE_SIZE / sizeof(struct qib_seg))
-
-struct qib_segarray {
-	struct qib_seg segs[QIB_SEGSZ];
-};
-
-struct qib_mregion {
-	struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
-	u64 user_base;          /* User's address for this region */
-	u64 iova;               /* IB start address of this region */
-	size_t length;
-	u32 lkey;
-	u32 offset;             /* offset (bytes) to start of region */
-	int access_flags;
-	u32 max_segs;           /* number of qib_segs in all the arrays */
-	u32 mapsz;              /* size of the map array */
-	u8  page_shift;         /* 0 - non unform/non powerof2 sizes */
-	u8  lkey_published;     /* in global table */
-	struct completion comp; /* complete when refcount goes to zero */
-	struct rcu_head list;
-	atomic_t refcount;
-	struct qib_segarray *map[0];    /* the segments */
-};
-
-/*
- * These keep track of the copy progress within a memory region.
- * Used by the verbs layer.
- */
-struct qib_sge {
-	struct qib_mregion *mr;
-	void *vaddr;            /* kernel virtual address of segment */
-	u32 sge_length;         /* length of the SGE */
-	u32 length;             /* remaining length of the segment */
-	u16 m;                  /* current index: mr->map[m] */
-	u16 n;                  /* current index: mr->map[m]->segs[n] */
-};
-
-/* Memory region */
-struct qib_mr {
-	struct ib_mr ibmr;
-	struct ib_umem *umem;
-	u64 *pages;
-	u32 npages;
-	struct qib_mregion mr;  /* must be last */
-};
-
-/*
- * Send work request queue entry.
- * The size of the sg_list is determined when the QP is created and stored
- * in qp->s_max_sge.
- */
-struct qib_swqe {
-	union {
-		struct ib_send_wr wr;   /* don't use wr.sg_list */
-		struct ib_ud_wr ud_wr;
-		struct ib_reg_wr reg_wr;
-		struct ib_rdma_wr rdma_wr;
-		struct ib_atomic_wr atomic_wr;
-	};
-	u32 psn;                /* first packet sequence number */
-	u32 lpsn;               /* last packet sequence number */
-	u32 ssn;                /* send sequence number */
-	u32 length;             /* total length of data in sg_list */
-	struct qib_sge sg_list[0];
-};
-
-/*
- * Receive work request queue entry.
- * The size of the sg_list is determined when the QP (or SRQ) is created
- * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
- */
-struct qib_rwqe {
-	u64 wr_id;
-	u8 num_sge;
-	struct ib_sge sg_list[0];
-};
-
-/*
- * This structure is used to contain the head pointer, tail pointer,
- * and receive work queue entries as a single memory allocation so
- * it can be mmap'ed into user space.
- * Note that the wq array elements are variable size so you can't
- * just index into the array to get the N'th element;
- * use get_rwqe_ptr() instead.
- */
-struct qib_rwq {
-	u32 head;               /* new work requests posted to the head */
-	u32 tail;               /* receives pull requests from here. */
-	struct qib_rwqe wq[0];
-};
-
-struct qib_rq {
-	struct qib_rwq *wq;
-	u32 size;               /* size of RWQE array */
-	u8 max_sge;
-	spinlock_t lock /* protect changes in this struct */
-		____cacheline_aligned_in_smp;
-};
-
-struct qib_srq {
-	struct ib_srq ibsrq;
-	struct qib_rq rq;
-	struct qib_mmap_info *ip;
-	/* send signal when number of RWQEs < limit */
-	u32 limit;
-};
-
-struct qib_sge_state {
-	struct qib_sge *sg_list;      /* next SGE to be used if any */
-	struct qib_sge sge;   /* progress state for the current SGE */
-	u32 total_len;
-	u8 num_sge;
-};
-
-/*
- * This structure holds the information that the send tasklet needs
- * to send a RDMA read response or atomic operation.
- */
-struct qib_ack_entry {
-	u8 opcode;
-	u8 sent;
-	u32 psn;
-	u32 lpsn;
-	union {
-		struct qib_sge rdma_sge;
-		u64 atomic_data;
-	};
-};
-
-/*
- * Variables prefixed with s_ are for the requester (sender).
- * Variables prefixed with r_ are for the responder (receiver).
- * Variables prefixed with ack_ are for responder replies.
- *
- * Common variables are protected by both r_rq.lock and s_lock in that order
- * which only happens in modify_qp() or changing the QP 'state'.
- */
-struct qib_qp {
-	struct ib_qp ibqp;
-	/* read mostly fields above and below */
-	struct ib_ah_attr remote_ah_attr;
-	struct ib_ah_attr alt_ah_attr;
-	struct qib_qp __rcu *next;            /* link list for QPN hash table */
-	struct qib_swqe *s_wq;  /* send work queue */
-	struct qib_mmap_info *ip;
-	struct qib_ib_header *s_hdr;     /* next packet header to send */
-	unsigned long timeout_jiffies;  /* computed from timeout */
-
-	enum ib_mtu path_mtu;
-	u32 remote_qpn;
-	u32 pmtu;		/* decoded from path_mtu */
-	u32 qkey;               /* QKEY for this QP (for UD or RD) */
-	u32 s_size;             /* send work queue size */
-	u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
-
-	u8 state;               /* QP state */
-	u8 qp_access_flags;
-	u8 alt_timeout;         /* Alternate path timeout for this QP */
-	u8 timeout;             /* Timeout for this QP */
-	u8 s_srate;
-	u8 s_mig_state;
-	u8 port_num;
-	u8 s_pkey_index;        /* PKEY index to use */
-	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
-	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
-	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
-	u8 s_retry_cnt;         /* number of times to retry */
-	u8 s_rnr_retry_cnt;
-	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
-	u8 s_max_sge;           /* size of s_wq->sg_list */
-	u8 s_draining;
-
-	/* start of read/write fields */
-
-	atomic_t refcount ____cacheline_aligned_in_smp;
-	wait_queue_head_t wait;
-
-
-	struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1]
-		____cacheline_aligned_in_smp;
-	struct qib_sge_state s_rdma_read_sge;
-
-	spinlock_t r_lock ____cacheline_aligned_in_smp;      /* used for APM */
-	unsigned long r_aflags;
-	u64 r_wr_id;            /* ID for current receive WQE */
-	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
-	u32 r_len;              /* total length of r_sge */
-	u32 r_rcv_len;          /* receive data len processed */
-	u32 r_psn;              /* expected rcv packet sequence number */
-	u32 r_msn;              /* message sequence number */
-
-	u8 r_state;             /* opcode of last packet received */
-	u8 r_flags;
-	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
-
-	struct list_head rspwait;       /* link for waititing to respond */
-
-	struct qib_sge_state r_sge;     /* current receive data */
-	struct qib_rq r_rq;             /* receive work queue */
-
-	spinlock_t s_lock ____cacheline_aligned_in_smp;
-	struct qib_sge_state *s_cur_sge;
-	u32 s_flags;
-	struct qib_verbs_txreq *s_tx;
-	struct qib_swqe *s_wqe;
-	struct qib_sge_state s_sge;     /* current send request data */
-	struct qib_mregion *s_rdma_mr;
-	atomic_t s_dma_busy;
-	u32 s_cur_size;         /* size of send packet in bytes */
-	u32 s_len;              /* total length of s_sge */
-	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
-	u32 s_next_psn;         /* PSN for next request */
-	u32 s_last_psn;         /* last response PSN processed */
-	u32 s_sending_psn;      /* lowest PSN that is being sent */
-	u32 s_sending_hpsn;     /* highest PSN that is being sent */
-	u32 s_psn;              /* current packet sequence number */
-	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
-	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
-	u32 s_head;             /* new entries added here */
-	u32 s_tail;             /* next entry to process */
-	u32 s_cur;              /* current work queue entry */
-	u32 s_acked;            /* last un-ACK'ed entry */
-	u32 s_last;             /* last completed entry */
-	u32 s_ssn;              /* SSN of tail entry */
-	u32 s_lsn;              /* limit sequence number (credit) */
-	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
-	u16 s_rdma_ack_cnt;
-	u8 s_state;             /* opcode of last packet sent */
-	u8 s_ack_state;         /* opcode of packet to ACK */
-	u8 s_nak_state;         /* non-zero if NAK is pending */
-	u8 r_nak_state;         /* non-zero if NAK is pending */
-	u8 s_retry;             /* requester retry counter */
-	u8 s_rnr_retry;         /* requester RNR retry counter */
-	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
-	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
-
-	struct qib_sge_state s_ack_rdma_sge;
-	struct timer_list s_timer;
+struct qib_qp_priv {
+	struct qib_ib_header *s_hdr;    /* next packet header to send */
 	struct list_head iowait;        /* link for wait PIO buf */
 	struct list_head iowait;        /* link for wait PIO buf */
-
+	atomic_t s_dma_busy;
+	struct qib_verbs_txreq *s_tx;
 	struct work_struct s_work;
 	struct work_struct s_work;
-
 	wait_queue_head_t wait_dma;
 	wait_queue_head_t wait_dma;
-
-	struct qib_sge r_sg_list[0] /* verified SGEs */
-		____cacheline_aligned_in_smp;
+	struct rvt_qp *owner;
 };
 };
 
 
-/*
- * Atomic bit definitions for r_aflags.
- */
-#define QIB_R_WRID_VALID        0
-#define QIB_R_REWIND_SGE        1
-
-/*
- * Bit definitions for r_flags.
- */
-#define QIB_R_REUSE_SGE 0x01
-#define QIB_R_RDMAR_SEQ 0x02
-#define QIB_R_RSP_NAK   0x04
-#define QIB_R_RSP_SEND  0x08
-#define QIB_R_COMM_EST  0x10
-
-/*
- * Bit definitions for s_flags.
- *
- * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
- * QIB_S_BUSY - send tasklet is processing the QP
- * QIB_S_TIMER - the RC retry timer is active
- * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
- * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
- *                         before processing the next SWQE
- * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
- *                         before processing the next SWQE
- * QIB_S_WAIT_RNR - waiting for RNR timeout
- * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
- * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating
- *                  next send completion entry not via send DMA
- * QIB_S_WAIT_PIO - waiting for a send buffer to be available
- * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available
- * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
- * QIB_S_WAIT_KMEM - waiting for kernel memory to be available
- * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
- * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests
- * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK
- */
-#define QIB_S_SIGNAL_REQ_WR	0x0001
-#define QIB_S_BUSY		0x0002
-#define QIB_S_TIMER		0x0004
-#define QIB_S_RESP_PENDING	0x0008
-#define QIB_S_ACK_PENDING	0x0010
-#define QIB_S_WAIT_FENCE	0x0020
-#define QIB_S_WAIT_RDMAR	0x0040
-#define QIB_S_WAIT_RNR		0x0080
-#define QIB_S_WAIT_SSN_CREDIT	0x0100
-#define QIB_S_WAIT_DMA		0x0200
-#define QIB_S_WAIT_PIO		0x0400
-#define QIB_S_WAIT_TX		0x0800
-#define QIB_S_WAIT_DMA_DESC	0x1000
-#define QIB_S_WAIT_KMEM		0x2000
-#define QIB_S_WAIT_PSN		0x4000
-#define QIB_S_WAIT_ACK		0x8000
-#define QIB_S_SEND_ONE		0x10000
-#define QIB_S_UNLIMITED_CREDIT	0x20000
-
-/*
- * Wait flags that would prevent any packet type from being sent.
- */
-#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \
-	QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM)
-
-/*
- * Wait flags that would prevent send work requests from making progress.
- */
-#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \
-	QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \
-	QIB_S_WAIT_PSN | QIB_S_WAIT_ACK)
-
-#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND)
-
 #define QIB_PSN_CREDIT  16
 #define QIB_PSN_CREDIT  16
 
 
-/*
- * Since struct qib_swqe is not a fixed size, we can't simply index into
- * struct qib_qp.s_wq.  This function does the array index computation.
- */
-static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp,
-					      unsigned n)
-{
-	return (struct qib_swqe *)((char *)qp->s_wq +
-				     (sizeof(struct qib_swqe) +
-				      qp->s_max_sge *
-				      sizeof(struct qib_sge)) * n);
-}
-
-/*
- * Since struct qib_rwqe is not a fixed size, we can't simply index into
- * struct qib_rwq.wq.  This function does the array index computation.
- */
-static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n)
-{
-	return (struct qib_rwqe *)
-		((char *) rq->wq->wq +
-		 (sizeof(struct qib_rwqe) +
-		  rq->max_sge * sizeof(struct ib_sge)) * n);
-}
-
-/*
- * QPN-map pages start out as NULL, they get allocated upon
- * first use and are never deallocated. This way,
- * large bitmaps are not allocated unless large numbers of QPs are used.
- */
-struct qpn_map {
-	void *page;
-};
-
-struct qib_qpn_table {
-	spinlock_t lock; /* protect changes in this struct */
-	unsigned flags;         /* flags for QP0/1 allocated for each port */
-	u32 last;               /* last QP number allocated */
-	u32 nmaps;              /* size of the map table */
-	u16 limit;
-	u16 mask;
-	/* bit map of free QP numbers other than 0/1 */
-	struct qpn_map map[QPNMAP_ENTRIES];
-};
-
-#define MAX_LKEY_TABLE_BITS 23
-
-struct qib_lkey_table {
-	spinlock_t lock; /* protect changes in this struct */
-	u32 next;               /* next unused index (speeds search) */
-	u32 gen;                /* generation count */
-	u32 max;                /* size of the table */
-	struct qib_mregion __rcu **table;
-};
-
 struct qib_opcode_stats {
 struct qib_opcode_stats {
 	u64 n_packets;          /* number of packets */
 	u64 n_packets;          /* number of packets */
 	u64 n_bytes;            /* total number of bytes */
 	u64 n_bytes;            /* total number of bytes */
@@ -682,21 +218,9 @@ struct qib_pma_counters {
 };
 };
 
 
 struct qib_ibport {
 struct qib_ibport {
-	struct qib_qp __rcu *qp0;
-	struct qib_qp __rcu *qp1;
-	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
-	struct qib_ah *sm_ah;
-	struct qib_ah *smi_ah;
-	struct rb_root mcast_tree;
-	spinlock_t lock;		/* protect changes in this struct */
-
-	/* non-zero when timer is set */
-	unsigned long mkey_lease_timeout;
-	unsigned long trap_timeout;
-	__be64 gid_prefix;      /* in network order */
-	__be64 mkey;
+	struct rvt_ibport rvp;
+	struct rvt_ah *smi_ah;
 	__be64 guids[QIB_GUIDS_PER_PORT	- 1];	/* writable GUIDs */
 	__be64 guids[QIB_GUIDS_PER_PORT	- 1];	/* writable GUIDs */
-	u64 tid;		/* TID for traps */
 	struct qib_pma_counters __percpu *pmastats;
 	struct qib_pma_counters __percpu *pmastats;
 	u64 z_unicast_xmit;     /* starting count for PMA */
 	u64 z_unicast_xmit;     /* starting count for PMA */
 	u64 z_unicast_rcv;      /* starting count for PMA */
 	u64 z_unicast_rcv;      /* starting count for PMA */
@@ -715,82 +239,25 @@ struct qib_ibport {
 	u32 z_local_link_integrity_errors;      /* starting count for PMA */
 	u32 z_local_link_integrity_errors;      /* starting count for PMA */
 	u32 z_excessive_buffer_overrun_errors;  /* starting count for PMA */
 	u32 z_excessive_buffer_overrun_errors;  /* starting count for PMA */
 	u32 z_vl15_dropped;                     /* starting count for PMA */
 	u32 z_vl15_dropped;                     /* starting count for PMA */
-	u32 n_rc_resends;
-	u32 n_rc_acks;
-	u32 n_rc_qacks;
-	u32 n_rc_delayed_comp;
-	u32 n_seq_naks;
-	u32 n_rdma_seq;
-	u32 n_rnr_naks;
-	u32 n_other_naks;
-	u32 n_loop_pkts;
-	u32 n_pkt_drops;
-	u32 n_vl15_dropped;
-	u32 n_rc_timeouts;
-	u32 n_dmawait;
-	u32 n_unaligned;
-	u32 n_rc_dupreq;
-	u32 n_rc_seqnak;
-	u32 port_cap_flags;
-	u32 pma_sample_start;
-	u32 pma_sample_interval;
-	__be16 pma_counter_select[5];
-	u16 pma_tag;
-	u16 pkey_violations;
-	u16 qkey_violations;
-	u16 mkey_violations;
-	u16 mkey_lease_period;
-	u16 sm_lid;
-	u16 repress_traps;
-	u8 sm_sl;
-	u8 mkeyprot;
-	u8 subnet_timeout;
-	u8 vl_high_limit;
 	u8 sl_to_vl[16];
 	u8 sl_to_vl[16];
-
 };
 };
 
 
-
 struct qib_ibdev {
 struct qib_ibdev {
-	struct ib_device ibdev;
-	struct list_head pending_mmaps;
-	spinlock_t mmap_offset_lock; /* protect mmap_offset */
-	u32 mmap_offset;
-	struct qib_mregion __rcu *dma_mr;
-
-	/* QP numbers are shared by all IB ports */
-	struct qib_qpn_table qpn_table;
-	struct qib_lkey_table lk_table;
+	struct rvt_dev_info rdi;
+
 	struct list_head piowait;       /* list for wait PIO buf */
 	struct list_head piowait;       /* list for wait PIO buf */
 	struct list_head dmawait;	/* list for wait DMA */
 	struct list_head dmawait;	/* list for wait DMA */
 	struct list_head txwait;        /* list for wait qib_verbs_txreq */
 	struct list_head txwait;        /* list for wait qib_verbs_txreq */
 	struct list_head memwait;       /* list for wait kernel memory */
 	struct list_head memwait;       /* list for wait kernel memory */
 	struct list_head txreq_free;
 	struct list_head txreq_free;
 	struct timer_list mem_timer;
 	struct timer_list mem_timer;
-	struct qib_qp __rcu **qp_table;
 	struct qib_pio_header *pio_hdrs;
 	struct qib_pio_header *pio_hdrs;
 	dma_addr_t pio_hdrs_phys;
 	dma_addr_t pio_hdrs_phys;
-	/* list of QPs waiting for RNR timer */
-	spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
-	u32 qp_table_size; /* size of the hash table */
 	u32 qp_rnd; /* random bytes for hash */
 	u32 qp_rnd; /* random bytes for hash */
-	spinlock_t qpt_lock;
 
 
 	u32 n_piowait;
 	u32 n_piowait;
 	u32 n_txwait;
 	u32 n_txwait;
 
 
-	u32 n_pds_allocated;    /* number of PDs allocated for device */
-	spinlock_t n_pds_lock;
-	u32 n_ahs_allocated;    /* number of AHs allocated for device */
-	spinlock_t n_ahs_lock;
-	u32 n_cqs_allocated;    /* number of CQs allocated for device */
-	spinlock_t n_cqs_lock;
-	u32 n_qps_allocated;    /* number of QPs allocated for device */
-	spinlock_t n_qps_lock;
-	u32 n_srqs_allocated;   /* number of SRQs allocated for device */
-	spinlock_t n_srqs_lock;
-	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
-	spinlock_t n_mcast_grps_lock;
 #ifdef CONFIG_DEBUG_FS
 #ifdef CONFIG_DEBUG_FS
 	/* per HCA debugfs */
 	/* per HCA debugfs */
 	struct dentry *qib_ibdev_dbg;
 	struct dentry *qib_ibdev_dbg;
@@ -813,56 +280,27 @@ struct qib_verbs_counters {
 	u32 vl15_dropped;
 	u32 vl15_dropped;
 };
 };
 
 
-static inline struct qib_mr *to_imr(struct ib_mr *ibmr)
-{
-	return container_of(ibmr, struct qib_mr, ibmr);
-}
-
-static inline struct qib_pd *to_ipd(struct ib_pd *ibpd)
-{
-	return container_of(ibpd, struct qib_pd, ibpd);
-}
-
-static inline struct qib_ah *to_iah(struct ib_ah *ibah)
-{
-	return container_of(ibah, struct qib_ah, ibah);
-}
-
-static inline struct qib_cq *to_icq(struct ib_cq *ibcq)
-{
-	return container_of(ibcq, struct qib_cq, ibcq);
-}
-
-static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq)
-{
-	return container_of(ibsrq, struct qib_srq, ibsrq);
-}
-
-static inline struct qib_qp *to_iqp(struct ib_qp *ibqp)
-{
-	return container_of(ibqp, struct qib_qp, ibqp);
-}
-
 static inline struct qib_ibdev *to_idev(struct ib_device *ibdev)
 static inline struct qib_ibdev *to_idev(struct ib_device *ibdev)
 {
 {
-	return container_of(ibdev, struct qib_ibdev, ibdev);
+	struct rvt_dev_info *rdi;
+
+	rdi = container_of(ibdev, struct rvt_dev_info, ibdev);
+	return container_of(rdi, struct qib_ibdev, rdi);
 }
 }
 
 
 /*
 /*
  * Send if not busy or waiting for I/O and either
  * Send if not busy or waiting for I/O and either
  * a RC response is pending or we can process send work requests.
  * a RC response is pending or we can process send work requests.
  */
  */
-static inline int qib_send_ok(struct qib_qp *qp)
+static inline int qib_send_ok(struct rvt_qp *qp)
 {
 {
-	return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) &&
-		(qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) ||
-		 !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
+	return !(qp->s_flags & (RVT_S_BUSY | RVT_S_ANY_WAIT_IO)) &&
+		(qp->s_hdrwords || (qp->s_flags & RVT_S_RESP_PENDING) ||
+		 !(qp->s_flags & RVT_S_ANY_WAIT_SEND));
 }
 }
 
 
-/*
- * This must be called with s_lock held.
- */
-void qib_schedule_send(struct qib_qp *qp);
+void _qib_schedule_send(struct rvt_qp *qp);
+void qib_schedule_send(struct rvt_qp *qp);
 
 
 static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
 static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
 {
 {
@@ -878,7 +316,7 @@ static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
 
 
 void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
 		   u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
 		   u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
-void qib_cap_mask_chg(struct qib_ibport *ibp);
+void qib_cap_mask_chg(struct rvt_dev_info *rdi, u8 port_num);
 void qib_sys_guid_chg(struct qib_ibport *ibp);
 void qib_sys_guid_chg(struct qib_ibport *ibp);
 void qib_node_desc_chg(struct qib_ibport *ibp);
 void qib_node_desc_chg(struct qib_ibport *ibp);
 int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
@@ -886,8 +324,8 @@ int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 		    const struct ib_mad_hdr *in, size_t in_mad_size,
 		    const struct ib_mad_hdr *in, size_t in_mad_size,
 		    struct ib_mad_hdr *out, size_t *out_mad_size,
 		    struct ib_mad_hdr *out, size_t *out_mad_size,
 		    u16 *out_mad_pkey_index);
 		    u16 *out_mad_pkey_index);
-int qib_create_agents(struct qib_ibdev *dev);
-void qib_free_agents(struct qib_ibdev *dev);
+void qib_notify_create_mad_agent(struct rvt_dev_info *rdi, int port_idx);
+void qib_notify_free_mad_agent(struct rvt_dev_info *rdi, int port_idx);
 
 
 /*
 /*
  * Compare the lower 24 bits of the two values.
  * Compare the lower 24 bits of the two values.
@@ -898,8 +336,6 @@ static inline int qib_cmp24(u32 a, u32 b)
 	return (((int) a) - ((int) b)) << 8;
 	return (((int) a) - ((int) b)) << 8;
 }
 }
 
 
-struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid);
-
 int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
 int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
 			  u64 *rwords, u64 *spkts, u64 *rpkts,
 			  u64 *rwords, u64 *spkts, u64 *rpkts,
 			  u64 *xmit_wait);
 			  u64 *xmit_wait);
@@ -907,35 +343,17 @@ int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
 int qib_get_counters(struct qib_pportdata *ppd,
 int qib_get_counters(struct qib_pportdata *ppd,
 		     struct qib_verbs_counters *cntrs);
 		     struct qib_verbs_counters *cntrs);
 
 
-int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
-
-int qib_mcast_tree_empty(struct qib_ibport *ibp);
-
-__be32 qib_compute_aeth(struct qib_qp *qp);
-
-struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn);
-
-struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
-			    struct ib_qp_init_attr *init_attr,
-			    struct ib_udata *udata);
-
-int qib_destroy_qp(struct ib_qp *ibqp);
-
-int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err);
-
-int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		  int attr_mask, struct ib_udata *udata);
-
-int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
-		 int attr_mask, struct ib_qp_init_attr *init_attr);
-
-unsigned qib_free_all_qps(struct qib_devdata *dd);
+__be32 qib_compute_aeth(struct rvt_qp *qp);
 
 
-void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
-
-void qib_free_qpn_table(struct qib_qpn_table *qpt);
+/*
+ * Functions provided by qib driver for rdmavt to use
+ */
+unsigned qib_free_all_qps(struct rvt_dev_info *rdi);
+void *qib_qp_priv_alloc(struct rvt_dev_info *rdi, struct rvt_qp *qp, gfp_t gfp);
+void qib_qp_priv_free(struct rvt_dev_info *rdi, struct rvt_qp *qp);
+void qib_notify_qp_reset(struct rvt_qp *qp);
+int qib_alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
+		  enum ib_qp_type type, u8 port, gfp_t gfp);
 
 
 #ifdef CONFIG_DEBUG_FS
 #ifdef CONFIG_DEBUG_FS
 
 
@@ -949,7 +367,7 @@ void qib_qp_iter_print(struct seq_file *s, struct qib_qp_iter *iter);
 
 
 #endif
 #endif
 
 
-void qib_get_credit(struct qib_qp *qp, u32 aeth);
+void qib_get_credit(struct rvt_qp *qp, u32 aeth);
 
 
 unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
 unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
 
 
@@ -957,166 +375,66 @@ void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
 
 
 void qib_put_txreq(struct qib_verbs_txreq *tx);
 void qib_put_txreq(struct qib_verbs_txreq *tx);
 
 
-int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
-		   u32 hdrwords, struct qib_sge_state *ss, u32 len);
+int qib_verbs_send(struct rvt_qp *qp, struct qib_ib_header *hdr,
+		   u32 hdrwords, struct rvt_sge_state *ss, u32 len);
 
 
-void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length,
+void qib_copy_sge(struct rvt_sge_state *ss, void *data, u32 length,
 		  int release);
 		  int release);
 
 
-void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release);
+void qib_skip_sge(struct rvt_sge_state *ss, u32 length, int release);
 
 
 void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 
 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
 void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 
 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
 int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
 
 
+int qib_check_send_wqe(struct rvt_qp *qp, struct rvt_swqe *wqe);
+
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
 struct ib_ah *qib_create_qp0_ah(struct qib_ibport *ibp, u16 dlid);
 
 
 void qib_rc_rnr_retry(unsigned long arg);
 void qib_rc_rnr_retry(unsigned long arg);
 
 
-void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
+void qib_rc_send_complete(struct rvt_qp *qp, struct qib_ib_header *hdr);
 
 
-void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err);
+void qib_rc_error(struct rvt_qp *qp, enum ib_wc_status err);
 
 
-int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
+int qib_post_ud_send(struct rvt_qp *qp, struct ib_send_wr *wr);
 
 
 void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
-
-int qib_alloc_lkey(struct qib_mregion *mr, int dma_region);
-
-void qib_free_lkey(struct qib_mregion *mr);
-
-int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
-		struct qib_sge *isge, struct ib_sge *sge, int acc);
-
-int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
-		u32 len, u64 vaddr, u32 rkey, int acc);
-
-int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
-			 struct ib_recv_wr **bad_wr);
-
-struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
-			      struct ib_srq_init_attr *srq_init_attr,
-			      struct ib_udata *udata);
-
-int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
-		   enum ib_srq_attr_mask attr_mask,
-		   struct ib_udata *udata);
-
-int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
-
-int qib_destroy_srq(struct ib_srq *ibsrq);
-
-int qib_cq_init(struct qib_devdata *dd);
-
-void qib_cq_exit(struct qib_devdata *dd);
-
-void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
-
-int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
-
-struct ib_cq *qib_create_cq(struct ib_device *ibdev,
-			    const struct ib_cq_init_attr *attr,
-			    struct ib_ucontext *context,
-			    struct ib_udata *udata);
-
-int qib_destroy_cq(struct ib_cq *ibcq);
-
-int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
-
-int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
-
-struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
-
-struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
-			      u64 virt_addr, int mr_access_flags,
-			      struct ib_udata *udata);
-
-int qib_dereg_mr(struct ib_mr *ibmr);
-
-struct ib_mr *qib_alloc_mr(struct ib_pd *pd,
-			   enum ib_mr_type mr_type,
-			   u32 max_entries);
-
-int qib_map_mr_sg(struct ib_mr *ibmr,
-		  struct scatterlist *sg,
-		  int sg_nents);
-
-int qib_reg_mr(struct qib_qp *qp, struct ib_reg_wr *wr);
-
-struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
-			     struct ib_fmr_attr *fmr_attr);
-
-int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
-		     int list_len, u64 iova);
-
-int qib_unmap_fmr(struct list_head *fmr_list);
-
-int qib_dealloc_fmr(struct ib_fmr *ibfmr);
-
-static inline void qib_get_mr(struct qib_mregion *mr)
-{
-	atomic_inc(&mr->refcount);
-}
+		int has_grh, void *data, u32 tlen, struct rvt_qp *qp);
 
 
 void mr_rcu_callback(struct rcu_head *list);
 void mr_rcu_callback(struct rcu_head *list);
 
 
-static inline void qib_put_mr(struct qib_mregion *mr)
-{
-	if (unlikely(atomic_dec_and_test(&mr->refcount)))
-		call_rcu(&mr->list, mr_rcu_callback);
-}
-
-static inline void qib_put_ss(struct qib_sge_state *ss)
-{
-	while (ss->num_sge) {
-		qib_put_mr(ss->sge.mr);
-		if (--ss->num_sge)
-			ss->sge = *ss->sg_list++;
-	}
-}
-
-
-void qib_release_mmap_info(struct kref *ref);
+int qib_get_rwqe(struct rvt_qp *qp, int wr_id_only);
 
 
-struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
-					   struct ib_ucontext *context,
-					   void *obj);
-
-void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
-			  u32 size, void *obj);
-
-int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
-
-int qib_get_rwqe(struct qib_qp *qp, int wr_id_only);
-
-void qib_migrate_qp(struct qib_qp *qp);
+void qib_migrate_qp(struct rvt_qp *qp);
 
 
 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
 int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
-		      int has_grh, struct qib_qp *qp, u32 bth0);
+		      int has_grh, struct rvt_qp *qp, u32 bth0);
 
 
 u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
 		 struct ib_global_route *grh, u32 hwords, u32 nwords);
 		 struct ib_global_route *grh, u32 hwords, u32 nwords);
 
 
-void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+void qib_make_ruc_header(struct rvt_qp *qp, struct qib_other_headers *ohdr,
 			 u32 bth0, u32 bth2);
 			 u32 bth0, u32 bth2);
 
 
-void qib_do_send(struct work_struct *work);
+void _qib_do_send(struct work_struct *work);
+
+void qib_do_send(struct rvt_qp *qp);
 
 
-void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+void qib_send_complete(struct rvt_qp *qp, struct rvt_swqe *wqe,
 		       enum ib_wc_status status);
 		       enum ib_wc_status status);
 
 
-void qib_send_rc_ack(struct qib_qp *qp);
+void qib_send_rc_ack(struct rvt_qp *qp);
 
 
-int qib_make_rc_req(struct qib_qp *qp);
+int qib_make_rc_req(struct rvt_qp *qp);
 
 
-int qib_make_uc_req(struct qib_qp *qp);
+int qib_make_uc_req(struct rvt_qp *qp);
 
 
-int qib_make_ud_req(struct qib_qp *qp);
+int qib_make_ud_req(struct rvt_qp *qp);
 
 
 int qib_register_ib_device(struct qib_devdata *);
 int qib_register_ib_device(struct qib_devdata *);
 
 
@@ -1150,11 +468,11 @@ extern const enum ib_wc_opcode ib_qib_wc_opcode[];
 #define IB_PHYSPORTSTATE_CFG_ENH 0x10
 #define IB_PHYSPORTSTATE_CFG_ENH 0x10
 #define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13
 #define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13
 
 
-extern const int ib_qib_state_ops[];
+extern const int ib_rvt_state_ops[];
 
 
 extern __be64 ib_qib_sys_image_guid;    /* in network order */
 extern __be64 ib_qib_sys_image_guid;    /* in network order */
 
 
-extern unsigned int ib_qib_lkey_table_size;
+extern unsigned int ib_rvt_lkey_table_size;
 
 
 extern unsigned int ib_qib_max_cqes;
 extern unsigned int ib_qib_max_cqes;
 
 
@@ -1178,6 +496,4 @@ extern unsigned int ib_qib_max_srq_wrs;
 
 
 extern const u32 ib_qib_rnr_table[];
 extern const u32 ib_qib_rnr_table[];
 
 
-extern struct ib_dma_mapping_ops qib_dma_mapping_ops;
-
 #endif                          /* QIB_VERBS_H */
 #endif                          /* QIB_VERBS_H */

+ 0 - 363
drivers/infiniband/hw/qib/qib_verbs_mcast.c

@@ -1,363 +0,0 @@
-/*
- * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
- * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/rculist.h>
-
-#include "qib.h"
-
-/**
- * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
- * @qp: the QP to link
- */
-static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp)
-{
-	struct qib_mcast_qp *mqp;
-
-	mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
-	if (!mqp)
-		goto bail;
-
-	mqp->qp = qp;
-	atomic_inc(&qp->refcount);
-
-bail:
-	return mqp;
-}
-
-static void qib_mcast_qp_free(struct qib_mcast_qp *mqp)
-{
-	struct qib_qp *qp = mqp->qp;
-
-	/* Notify qib_destroy_qp() if it is waiting. */
-	if (atomic_dec_and_test(&qp->refcount))
-		wake_up(&qp->wait);
-
-	kfree(mqp);
-}
-
-/**
- * qib_mcast_alloc - allocate the multicast GID structure
- * @mgid: the multicast GID
- *
- * A list of QPs will be attached to this structure.
- */
-static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid)
-{
-	struct qib_mcast *mcast;
-
-	mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
-	if (!mcast)
-		goto bail;
-
-	mcast->mgid = *mgid;
-	INIT_LIST_HEAD(&mcast->qp_list);
-	init_waitqueue_head(&mcast->wait);
-	atomic_set(&mcast->refcount, 0);
-	mcast->n_attached = 0;
-
-bail:
-	return mcast;
-}
-
-static void qib_mcast_free(struct qib_mcast *mcast)
-{
-	struct qib_mcast_qp *p, *tmp;
-
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-		qib_mcast_qp_free(p);
-
-	kfree(mcast);
-}
-
-/**
- * qib_mcast_find - search the global table for the given multicast GID
- * @ibp: the IB port structure
- * @mgid: the multicast GID to search for
- *
- * Returns NULL if not found.
- *
- * The caller is responsible for decrementing the reference count if found.
- */
-struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid)
-{
-	struct rb_node *n;
-	unsigned long flags;
-	struct qib_mcast *mcast;
-
-	spin_lock_irqsave(&ibp->lock, flags);
-	n = ibp->mcast_tree.rb_node;
-	while (n) {
-		int ret;
-
-		mcast = rb_entry(n, struct qib_mcast, rb_node);
-
-		ret = memcmp(mgid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else {
-			atomic_inc(&mcast->refcount);
-			spin_unlock_irqrestore(&ibp->lock, flags);
-			goto bail;
-		}
-	}
-	spin_unlock_irqrestore(&ibp->lock, flags);
-
-	mcast = NULL;
-
-bail:
-	return mcast;
-}
-
-/**
- * qib_mcast_add - insert mcast GID into table and attach QP struct
- * @mcast: the mcast GID table
- * @mqp: the QP to attach
- *
- * Return zero if both were added.  Return EEXIST if the GID was already in
- * the table but the QP was added.  Return ESRCH if the QP was already
- * attached and neither structure was added.
- */
-static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp,
-			 struct qib_mcast *mcast, struct qib_mcast_qp *mqp)
-{
-	struct rb_node **n = &ibp->mcast_tree.rb_node;
-	struct rb_node *pn = NULL;
-	int ret;
-
-	spin_lock_irq(&ibp->lock);
-
-	while (*n) {
-		struct qib_mcast *tmcast;
-		struct qib_mcast_qp *p;
-
-		pn = *n;
-		tmcast = rb_entry(pn, struct qib_mcast, rb_node);
-
-		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0) {
-			n = &pn->rb_left;
-			continue;
-		}
-		if (ret > 0) {
-			n = &pn->rb_right;
-			continue;
-		}
-
-		/* Search the QP list to see if this is already there. */
-		list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
-			if (p->qp == mqp->qp) {
-				ret = ESRCH;
-				goto bail;
-			}
-		}
-		if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) {
-			ret = ENOMEM;
-			goto bail;
-		}
-
-		tmcast->n_attached++;
-
-		list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
-		ret = EEXIST;
-		goto bail;
-	}
-
-	spin_lock(&dev->n_mcast_grps_lock);
-	if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) {
-		spin_unlock(&dev->n_mcast_grps_lock);
-		ret = ENOMEM;
-		goto bail;
-	}
-
-	dev->n_mcast_grps_allocated++;
-	spin_unlock(&dev->n_mcast_grps_lock);
-
-	mcast->n_attached++;
-
-	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
-
-	atomic_inc(&mcast->refcount);
-	rb_link_node(&mcast->rb_node, pn, n);
-	rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
-
-	ret = 0;
-
-bail:
-	spin_unlock_irq(&ibp->lock);
-
-	return ret;
-}
-
-int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_ibdev *dev = to_idev(ibqp->device);
-	struct qib_ibport *ibp;
-	struct qib_mcast *mcast;
-	struct qib_mcast_qp *mqp;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
-
-	/*
-	 * Allocate data structures since its better to do this outside of
-	 * spin locks and it will most likely be needed.
-	 */
-	mcast = qib_mcast_alloc(gid);
-	if (mcast == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	mqp = qib_mcast_qp_alloc(qp);
-	if (mqp == NULL) {
-		qib_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	}
-	ibp = to_iport(ibqp->device, qp->port_num);
-	switch (qib_mcast_add(dev, ibp, mcast, mqp)) {
-	case ESRCH:
-		/* Neither was used: OK to attach the same QP twice. */
-		qib_mcast_qp_free(mqp);
-		qib_mcast_free(mcast);
-		break;
-
-	case EEXIST:            /* The mcast wasn't used */
-		qib_mcast_free(mcast);
-		break;
-
-	case ENOMEM:
-		/* Exceeded the maximum number of mcast groups. */
-		qib_mcast_qp_free(mqp);
-		qib_mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-
-	default:
-		break;
-	}
-
-	ret = 0;
-
-bail:
-	return ret;
-}
-
-int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
-{
-	struct qib_qp *qp = to_iqp(ibqp);
-	struct qib_ibdev *dev = to_idev(ibqp->device);
-	struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
-	struct qib_mcast *mcast = NULL;
-	struct qib_mcast_qp *p, *tmp, *delp = NULL;
-	struct rb_node *n;
-	int last = 0;
-	int ret;
-
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
-		return -EINVAL;
-
-	spin_lock_irq(&ibp->lock);
-
-	/* Find the GID in the mcast table. */
-	n = ibp->mcast_tree.rb_node;
-	while (1) {
-		if (n == NULL) {
-			spin_unlock_irq(&ibp->lock);
-			return -EINVAL;
-		}
-
-		mcast = rb_entry(n, struct qib_mcast, rb_node);
-		ret = memcmp(gid->raw, mcast->mgid.raw,
-			     sizeof(union ib_gid));
-		if (ret < 0)
-			n = n->rb_left;
-		else if (ret > 0)
-			n = n->rb_right;
-		else
-			break;
-	}
-
-	/* Search the QP list. */
-	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
-		if (p->qp != qp)
-			continue;
-		/*
-		 * We found it, so remove it, but don't poison the forward
-		 * link until we are sure there are no list walkers.
-		 */
-		list_del_rcu(&p->list);
-		mcast->n_attached--;
-		delp = p;
-
-		/* If this was the last attached QP, remove the GID too. */
-		if (list_empty(&mcast->qp_list)) {
-			rb_erase(&mcast->rb_node, &ibp->mcast_tree);
-			last = 1;
-		}
-		break;
-	}
-
-	spin_unlock_irq(&ibp->lock);
-	/* QP not attached */
-	if (!delp)
-		return -EINVAL;
-	/*
-	 * Wait for any list walkers to finish before freeing the
-	 * list element.
-	 */
-	wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-	qib_mcast_qp_free(delp);
-
-	if (last) {
-		atomic_dec(&mcast->refcount);
-		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-		qib_mcast_free(mcast);
-		spin_lock_irq(&dev->n_mcast_grps_lock);
-		dev->n_mcast_grps_allocated--;
-		spin_unlock_irq(&dev->n_mcast_grps_lock);
-	}
-	return 0;
-}
-
-int qib_mcast_tree_empty(struct qib_ibport *ibp)
-{
-	return ibp->mcast_tree.rb_node == NULL;
-}

+ 1 - 0
drivers/infiniband/sw/Makefile

@@ -0,0 +1 @@
+obj-$(CONFIG_INFINIBAND_RDMAVT)		+= rdmavt/

+ 6 - 0
drivers/infiniband/sw/rdmavt/Kconfig

@@ -0,0 +1,6 @@
+config INFINIBAND_RDMAVT
+	tristate "RDMA verbs transport library"
+	depends on 64BIT
+	default m
+	---help---
+	This is a common software verbs provider for RDMA networks.

+ 13 - 0
drivers/infiniband/sw/rdmavt/Makefile

@@ -0,0 +1,13 @@
+#
+# rdmavt driver
+#
+#
+#
+# Called from the kernel module build system.
+#
+obj-$(CONFIG_INFINIBAND_RDMAVT) += rdmavt.o
+
+rdmavt-y := vt.o ah.o cq.o dma.o mad.o mcast.o mmap.o mr.o pd.o qp.o srq.o \
+	trace.o
+
+CFLAGS_trace.o = -I$(src)

+ 196 - 0
drivers/infiniband/sw/rdmavt/ah.c

@@ -0,0 +1,196 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/slab.h>
+#include "ah.h"
+#include "vt.h" /* for prints */
+
+/**
+ * rvt_check_ah - validate the attributes of AH
+ * @ibdev: the ib device
+ * @ah_attr: the attributes of the AH
+ *
+ * If driver supports a more detailed check_ah function call back to it
+ * otherwise just check the basics.
+ *
+ * Return: 0 on success
+ */
+int rvt_check_ah(struct ib_device *ibdev,
+		 struct ib_ah_attr *ah_attr)
+{
+	int err;
+	struct ib_port_attr port_attr;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	enum rdma_link_layer link = rdma_port_get_link_layer(ibdev,
+							     ah_attr->port_num);
+
+	err = ib_query_port(ibdev, ah_attr->port_num, &port_attr);
+	if (err)
+		return -EINVAL;
+	if (ah_attr->port_num < 1 ||
+	    ah_attr->port_num > ibdev->phys_port_cnt)
+		return -EINVAL;
+	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
+	    ib_rate_to_mbps(ah_attr->static_rate) < 0)
+		return -EINVAL;
+	if ((ah_attr->ah_flags & IB_AH_GRH) &&
+	    ah_attr->grh.sgid_index >= port_attr.gid_tbl_len)
+		return -EINVAL;
+	if (link != IB_LINK_LAYER_ETHERNET) {
+		if (ah_attr->dlid == 0)
+			return -EINVAL;
+		if (ah_attr->dlid >= be16_to_cpu(IB_MULTICAST_LID_BASE) &&
+		    ah_attr->dlid != be16_to_cpu(IB_LID_PERMISSIVE) &&
+		    !(ah_attr->ah_flags & IB_AH_GRH))
+			return -EINVAL;
+	}
+	if (rdi->driver_f.check_ah)
+		return rdi->driver_f.check_ah(ibdev, ah_attr);
+	return 0;
+}
+EXPORT_SYMBOL(rvt_check_ah);
+
+/**
+ * rvt_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ *
+ * This may be called from interrupt context.
+ *
+ * Return: newly allocated ah
+ */
+struct ib_ah *rvt_create_ah(struct ib_pd *pd,
+			    struct ib_ah_attr *ah_attr)
+{
+	struct rvt_ah *ah;
+	struct rvt_dev_info *dev = ib_to_rvt(pd->device);
+	unsigned long flags;
+
+	if (rvt_check_ah(pd->device, ah_attr))
+		return ERR_PTR(-EINVAL);
+
+	ah = kmalloc(sizeof(*ah), GFP_ATOMIC);
+	if (!ah)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
+	if (dev->n_ahs_allocated == dev->dparms.props.max_ah) {
+		spin_unlock(&dev->n_ahs_lock);
+		kfree(ah);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	dev->n_ahs_allocated++;
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+	ah->attr = *ah_attr;
+	atomic_set(&ah->refcount, 0);
+
+	if (dev->driver_f.notify_new_ah)
+		dev->driver_f.notify_new_ah(pd->device, ah_attr, ah);
+
+	return &ah->ibah;
+}
+
+/**
+ * rvt_destory_ah - Destory an address handle
+ * @ibah: address handle
+ *
+ * Return: 0 on success
+ */
+int rvt_destroy_ah(struct ib_ah *ibah)
+{
+	struct rvt_dev_info *dev = ib_to_rvt(ibah->device);
+	struct rvt_ah *ah = ibah_to_rvtah(ibah);
+	unsigned long flags;
+
+	if (atomic_read(&ah->refcount) != 0)
+		return -EBUSY;
+
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
+	dev->n_ahs_allocated--;
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+	kfree(ah);
+
+	return 0;
+}
+
+/**
+ * rvt_modify_ah - modify an ah with given attrs
+ * @ibah: address handle to modify
+ * @ah_attr: attrs to apply
+ *
+ * Return: 0 on success
+ */
+int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+	struct rvt_ah *ah = ibah_to_rvtah(ibah);
+
+	if (rvt_check_ah(ibah->device, ah_attr))
+		return -EINVAL;
+
+	ah->attr = *ah_attr;
+
+	return 0;
+}
+
+/**
+ * rvt_query_ah - return attrs for ah
+ * @ibah: address handle to query
+ * @ah_attr: return info in this
+ *
+ * Return: always 0
+ */
+int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+	struct rvt_ah *ah = ibah_to_rvtah(ibah);
+
+	*ah_attr = ah->attr;
+
+	return 0;
+}

+ 59 - 0
drivers/infiniband/sw/rdmavt/ah.h

@@ -0,0 +1,59 @@
+#ifndef DEF_RVTAH_H
+#define DEF_RVTAH_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+struct ib_ah *rvt_create_ah(struct ib_pd *pd,
+			    struct ib_ah_attr *ah_attr);
+int rvt_destroy_ah(struct ib_ah *ibah);
+int rvt_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+int rvt_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr);
+
+#endif          /* DEF_RVTAH_H */

+ 159 - 166
drivers/staging/rdma/hfi1/cq.c → drivers/infiniband/sw/rdmavt/cq.c

@@ -1,12 +1,11 @@
 /*
 /*
+ * Copyright(c) 2016 Intel Corporation.
  *
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  * redistributing this file, you may do so under either license.
  *
  *
  * GPL LICENSE SUMMARY
  * GPL LICENSE SUMMARY
  *
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  *
  * BSD LICENSE
  * BSD LICENSE
  *
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * modification, are permitted provided that the following conditions
  * are met:
  * are met:
@@ -48,25 +45,23 @@
  *
  *
  */
  */
 
 
-#include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
 #include <linux/kthread.h>
 #include <linux/kthread.h>
-
-#include "verbs.h"
-#include "hfi.h"
+#include "cq.h"
+#include "vt.h"
 
 
 /**
 /**
- * hfi1_cq_enter - add a new entry to the completion queue
+ * rvt_cq_enter - add a new entry to the completion queue
  * @cq: completion queue
  * @cq: completion queue
  * @entry: work completion entry to add
  * @entry: work completion entry to add
- * @sig: true if @entry is a solicited entry
+ * @sig: true if @entry is solicited
  *
  *
  * This may be called with qp->s_lock held.
  * This may be called with qp->s_lock held.
  */
  */
-void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
+void rvt_cq_enter(struct rvt_cq *cq, struct ib_wc *entry, bool solicited)
 {
 {
-	struct hfi1_cq_wc *wc;
+	struct rvt_cq_wc *wc;
 	unsigned long flags;
 	unsigned long flags;
 	u32 head;
 	u32 head;
 	u32 next;
 	u32 next;
@@ -79,11 +74,13 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
 	 */
 	 */
 	wc = cq->queue;
 	wc = cq->queue;
 	head = wc->head;
 	head = wc->head;
-	if (head >= (unsigned) cq->ibcq.cqe) {
+	if (head >= (unsigned)cq->ibcq.cqe) {
 		head = cq->ibcq.cqe;
 		head = cq->ibcq.cqe;
 		next = 0;
 		next = 0;
-	} else
+	} else {
 		next = head + 1;
 		next = head + 1;
+	}
+
 	if (unlikely(next == wc->tail)) {
 	if (unlikely(next == wc->tail)) {
 		spin_unlock_irqrestore(&cq->lock, flags);
 		spin_unlock_irqrestore(&cq->lock, flags);
 		if (cq->ibcq.event_handler) {
 		if (cq->ibcq.event_handler) {
@@ -114,8 +111,9 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
 		wc->uqueue[head].port_num = entry->port_num;
 		wc->uqueue[head].port_num = entry->port_num;
 		/* Make sure entry is written before the head index. */
 		/* Make sure entry is written before the head index. */
 		smp_wmb();
 		smp_wmb();
-	} else
+	} else {
 		wc->kqueue[head] = *entry;
 		wc->kqueue[head] = *entry;
+	}
 	wc->head = next;
 	wc->head = next;
 
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
 	if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -126,10 +124,10 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
 		 * This will cause send_complete() to be called in
 		 * This will cause send_complete() to be called in
 		 * another thread.
 		 * another thread.
 		 */
 		 */
-		smp_read_barrier_depends(); /* see hfi1_cq_exit */
-		worker = cq->dd->worker;
+		smp_read_barrier_depends(); /* see rvt_cq_exit */
+		worker = cq->rdi->worker;
 		if (likely(worker)) {
 		if (likely(worker)) {
-			cq->notify = IB_CQ_NONE;
+			cq->notify = RVT_CQ_NONE;
 			cq->triggered++;
 			cq->triggered++;
 			queue_kthread_work(worker, &cq->comptask);
 			queue_kthread_work(worker, &cq->comptask);
 		}
 		}
@@ -137,59 +135,11 @@ void hfi1_cq_enter(struct hfi1_cq *cq, struct ib_wc *entry, int solicited)
 
 
 	spin_unlock_irqrestore(&cq->lock, flags);
 	spin_unlock_irqrestore(&cq->lock, flags);
 }
 }
-
-/**
- * hfi1_poll_cq - poll for work completion entries
- * @ibcq: the completion queue to poll
- * @num_entries: the maximum number of entries to return
- * @entry: pointer to array where work completions are placed
- *
- * Returns the number of completion entries polled.
- *
- * This may be called from interrupt context.  Also called by ib_poll_cq()
- * in the generic verbs code.
- */
-int hfi1_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
-{
-	struct hfi1_cq *cq = to_icq(ibcq);
-	struct hfi1_cq_wc *wc;
-	unsigned long flags;
-	int npolled;
-	u32 tail;
-
-	/* The kernel can only poll a kernel completion queue */
-	if (cq->ip) {
-		npolled = -EINVAL;
-		goto bail;
-	}
-
-	spin_lock_irqsave(&cq->lock, flags);
-
-	wc = cq->queue;
-	tail = wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
-	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
-		if (tail == wc->head)
-			break;
-		/* The kernel doesn't need a RMB since it has the lock. */
-		*entry = wc->kqueue[tail];
-		if (tail >= cq->ibcq.cqe)
-			tail = 0;
-		else
-			tail++;
-	}
-	wc->tail = tail;
-
-	spin_unlock_irqrestore(&cq->lock, flags);
-
-bail:
-	return npolled;
-}
+EXPORT_SYMBOL(rvt_cq_enter);
 
 
 static void send_complete(struct kthread_work *work)
 static void send_complete(struct kthread_work *work)
 {
 {
-	struct hfi1_cq *cq = container_of(work, struct hfi1_cq, comptask);
+	struct rvt_cq *cq = container_of(work, struct rvt_cq, comptask);
 
 
 	/*
 	/*
 	 * The completion handler will most likely rearm the notification
 	 * The completion handler will most likely rearm the notification
@@ -217,26 +167,25 @@ static void send_complete(struct kthread_work *work)
 }
 }
 
 
 /**
 /**
- * hfi1_create_cq - create a completion queue
+ * rvt_create_cq - create a completion queue
  * @ibdev: the device this completion queue is attached to
  * @ibdev: the device this completion queue is attached to
  * @attr: creation attributes
  * @attr: creation attributes
- * @context: unused by the driver
+ * @context: unused by the QLogic_IB driver
  * @udata: user data for libibverbs.so
  * @udata: user data for libibverbs.so
  *
  *
- * Returns a pointer to the completion queue or negative errno values
- * for failure.
- *
  * Called by ib_create_cq() in the generic verbs code.
  * Called by ib_create_cq() in the generic verbs code.
+ *
+ * Return: pointer to the completion queue or negative errno values
+ * for failure.
  */
  */
-struct ib_cq *hfi1_create_cq(
-	struct ib_device *ibdev,
-	const struct ib_cq_init_attr *attr,
-	struct ib_ucontext *context,
-	struct ib_udata *udata)
+struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
+			    const struct ib_cq_init_attr *attr,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata)
 {
 {
-	struct hfi1_ibdev *dev = to_idev(ibdev);
-	struct hfi1_cq *cq;
-	struct hfi1_cq_wc *wc;
+	struct rvt_dev_info *rdi = ib_to_rvt(ibdev);
+	struct rvt_cq *cq;
+	struct rvt_cq_wc *wc;
 	struct ib_cq *ret;
 	struct ib_cq *ret;
 	u32 sz;
 	u32 sz;
 	unsigned int entries = attr->cqe;
 	unsigned int entries = attr->cqe;
@@ -244,11 +193,11 @@ struct ib_cq *hfi1_create_cq(
 	if (attr->flags)
 	if (attr->flags)
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 
 
-	if (entries < 1 || entries > hfi1_max_cqes)
+	if (entries < 1 || entries > rdi->dparms.props.max_cqe)
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 
 
 	/* Allocate the completion queue structure. */
 	/* Allocate the completion queue structure. */
-	cq = kmalloc(sizeof(*cq), GFP_KERNEL);
+	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
 	if (!cq)
 	if (!cq)
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 
 
@@ -272,12 +221,12 @@ struct ib_cq *hfi1_create_cq(
 
 
 	/*
 	/*
 	 * Return the address of the WC as the offset to mmap.
 	 * Return the address of the WC as the offset to mmap.
-	 * See hfi1_mmap() for details.
+	 * See rvt_mmap() for details.
 	 */
 	 */
 	if (udata && udata->outlen >= sizeof(__u64)) {
 	if (udata && udata->outlen >= sizeof(__u64)) {
 		int err;
 		int err;
 
 
-		cq->ip = hfi1_create_mmap_info(dev, sz, context, wc);
+		cq->ip = rvt_create_mmap_info(rdi, sz, context, wc);
 		if (!cq->ip) {
 		if (!cq->ip) {
 			ret = ERR_PTR(-ENOMEM);
 			ret = ERR_PTR(-ENOMEM);
 			goto bail_wc;
 			goto bail_wc;
@@ -289,23 +238,22 @@ struct ib_cq *hfi1_create_cq(
 			ret = ERR_PTR(err);
 			ret = ERR_PTR(err);
 			goto bail_ip;
 			goto bail_ip;
 		}
 		}
-	} else
-		cq->ip = NULL;
+	}
 
 
-	spin_lock(&dev->n_cqs_lock);
-	if (dev->n_cqs_allocated == hfi1_max_cqs) {
-		spin_unlock(&dev->n_cqs_lock);
+	spin_lock(&rdi->n_cqs_lock);
+	if (rdi->n_cqs_allocated == rdi->dparms.props.max_cq) {
+		spin_unlock(&rdi->n_cqs_lock);
 		ret = ERR_PTR(-ENOMEM);
 		ret = ERR_PTR(-ENOMEM);
 		goto bail_ip;
 		goto bail_ip;
 	}
 	}
 
 
-	dev->n_cqs_allocated++;
-	spin_unlock(&dev->n_cqs_lock);
+	rdi->n_cqs_allocated++;
+	spin_unlock(&rdi->n_cqs_lock);
 
 
 	if (cq->ip) {
 	if (cq->ip) {
-		spin_lock_irq(&dev->pending_lock);
-		list_add(&cq->ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
+		spin_lock_irq(&rdi->pending_lock);
+		list_add(&cq->ip->pending_mmaps, &rdi->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
 	}
 	}
 
 
 	/*
 	/*
@@ -313,14 +261,11 @@ struct ib_cq *hfi1_create_cq(
 	 * The number of entries should be >= the number requested or return
 	 * The number of entries should be >= the number requested or return
 	 * an error.
 	 * an error.
 	 */
 	 */
-	cq->dd = dd_from_dev(dev);
+	cq->rdi = rdi;
 	cq->ibcq.cqe = entries;
 	cq->ibcq.cqe = entries;
-	cq->notify = IB_CQ_NONE;
-	cq->triggered = 0;
+	cq->notify = RVT_CQ_NONE;
 	spin_lock_init(&cq->lock);
 	spin_lock_init(&cq->lock);
 	init_kthread_work(&cq->comptask, send_complete);
 	init_kthread_work(&cq->comptask, send_complete);
-	wc->head = 0;
-	wc->tail = 0;
 	cq->queue = wc;
 	cq->queue = wc;
 
 
 	ret = &cq->ibcq;
 	ret = &cq->ibcq;
@@ -338,24 +283,24 @@ done:
 }
 }
 
 
 /**
 /**
- * hfi1_destroy_cq - destroy a completion queue
+ * rvt_destroy_cq - destroy a completion queue
  * @ibcq: the completion queue to destroy.
  * @ibcq: the completion queue to destroy.
  *
  *
- * Returns 0 for success.
- *
  * Called by ib_destroy_cq() in the generic verbs code.
  * Called by ib_destroy_cq() in the generic verbs code.
+ *
+ * Return: always 0
  */
  */
-int hfi1_destroy_cq(struct ib_cq *ibcq)
+int rvt_destroy_cq(struct ib_cq *ibcq)
 {
 {
-	struct hfi1_ibdev *dev = to_idev(ibcq->device);
-	struct hfi1_cq *cq = to_icq(ibcq);
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_dev_info *rdi = cq->rdi;
 
 
 	flush_kthread_work(&cq->comptask);
 	flush_kthread_work(&cq->comptask);
-	spin_lock(&dev->n_cqs_lock);
-	dev->n_cqs_allocated--;
-	spin_unlock(&dev->n_cqs_lock);
+	spin_lock(&rdi->n_cqs_lock);
+	rdi->n_cqs_allocated--;
+	spin_unlock(&rdi->n_cqs_lock);
 	if (cq->ip)
 	if (cq->ip)
-		kref_put(&cq->ip->ref, hfi1_release_mmap_info);
+		kref_put(&cq->ip->ref, rvt_release_mmap_info);
 	else
 	else
 		vfree(cq->queue);
 		vfree(cq->queue);
 	kfree(cq);
 	kfree(cq);
@@ -364,18 +309,18 @@ int hfi1_destroy_cq(struct ib_cq *ibcq)
 }
 }
 
 
 /**
 /**
- * hfi1_req_notify_cq - change the notification type for a completion queue
+ * rvt_req_notify_cq - change the notification type for a completion queue
  * @ibcq: the completion queue
  * @ibcq: the completion queue
  * @notify_flags: the type of notification to request
  * @notify_flags: the type of notification to request
  *
  *
- * Returns 0 for success.
- *
  * This may be called from interrupt context.  Also called by
  * This may be called from interrupt context.  Also called by
  * ib_req_notify_cq() in the generic verbs code.
  * ib_req_notify_cq() in the generic verbs code.
+ *
+ * Return: 0 for success.
  */
  */
-int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
+int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
 {
 {
-	struct hfi1_cq *cq = to_icq(ibcq);
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
 	unsigned long flags;
 	unsigned long flags;
 	int ret = 0;
 	int ret = 0;
 
 
@@ -397,24 +342,23 @@ int hfi1_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags)
 }
 }
 
 
 /**
 /**
- * hfi1_resize_cq - change the size of the CQ
+ * rvt_resize_cq - change the size of the CQ
  * @ibcq: the completion queue
  * @ibcq: the completion queue
  *
  *
- * Returns 0 for success.
+ * Return: 0 for success.
  */
  */
-int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
+int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 {
 {
-	struct hfi1_cq *cq = to_icq(ibcq);
-	struct hfi1_cq_wc *old_wc;
-	struct hfi1_cq_wc *wc;
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_cq_wc *old_wc;
+	struct rvt_cq_wc *wc;
 	u32 head, tail, n;
 	u32 head, tail, n;
 	int ret;
 	int ret;
 	u32 sz;
 	u32 sz;
+	struct rvt_dev_info *rdi = cq->rdi;
 
 
-	if (cqe < 1 || cqe > hfi1_max_cqes) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (cqe < 1 || cqe > rdi->dparms.props.max_cqe)
+		return -EINVAL;
 
 
 	/*
 	/*
 	 * Need to use vmalloc() if we want to support large #s of entries.
 	 * Need to use vmalloc() if we want to support large #s of entries.
@@ -425,10 +369,8 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	else
 	else
 		sz += sizeof(struct ib_wc) * (cqe + 1);
 		sz += sizeof(struct ib_wc) * (cqe + 1);
 	wc = vmalloc_user(sz);
 	wc = vmalloc_user(sz);
-	if (!wc) {
-		ret = -ENOMEM;
-		goto bail;
-	}
+	if (!wc)
+		return -ENOMEM;
 
 
 	/* Check that we can write the offset to mmap. */
 	/* Check that we can write the offset to mmap. */
 	if (udata && udata->outlen >= sizeof(__u64)) {
 	if (udata && udata->outlen >= sizeof(__u64)) {
@@ -446,11 +388,11 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	 */
 	 */
 	old_wc = cq->queue;
 	old_wc = cq->queue;
 	head = old_wc->head;
 	head = old_wc->head;
-	if (head > (u32) cq->ibcq.cqe)
-		head = (u32) cq->ibcq.cqe;
+	if (head > (u32)cq->ibcq.cqe)
+		head = (u32)cq->ibcq.cqe;
 	tail = old_wc->tail;
 	tail = old_wc->tail;
-	if (tail > (u32) cq->ibcq.cqe)
-		tail = (u32) cq->ibcq.cqe;
+	if (tail > (u32)cq->ibcq.cqe)
+		tail = (u32)cq->ibcq.cqe;
 	if (head < tail)
 	if (head < tail)
 		n = cq->ibcq.cqe + 1 + head - tail;
 		n = cq->ibcq.cqe + 1 + head - tail;
 	else
 	else
@@ -464,7 +406,7 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 			wc->uqueue[n] = old_wc->uqueue[tail];
 			wc->uqueue[n] = old_wc->uqueue[tail];
 		else
 		else
 			wc->kqueue[n] = old_wc->kqueue[tail];
 			wc->kqueue[n] = old_wc->kqueue[tail];
-		if (tail == (u32) cq->ibcq.cqe)
+		if (tail == (u32)cq->ibcq.cqe)
 			tail = 0;
 			tail = 0;
 		else
 		else
 			tail++;
 			tail++;
@@ -478,80 +420,131 @@ int hfi1_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 	vfree(old_wc);
 	vfree(old_wc);
 
 
 	if (cq->ip) {
 	if (cq->ip) {
-		struct hfi1_ibdev *dev = to_idev(ibcq->device);
-		struct hfi1_mmap_info *ip = cq->ip;
+		struct rvt_mmap_info *ip = cq->ip;
 
 
-		hfi1_update_mmap_info(dev, ip, sz, wc);
+		rvt_update_mmap_info(rdi, ip, sz, wc);
 
 
 		/*
 		/*
 		 * Return the offset to mmap.
 		 * Return the offset to mmap.
-		 * See hfi1_mmap() for details.
+		 * See rvt_mmap() for details.
 		 */
 		 */
 		if (udata && udata->outlen >= sizeof(__u64)) {
 		if (udata && udata->outlen >= sizeof(__u64)) {
 			ret = ib_copy_to_udata(udata, &ip->offset,
 			ret = ib_copy_to_udata(udata, &ip->offset,
 					       sizeof(ip->offset));
 					       sizeof(ip->offset));
 			if (ret)
 			if (ret)
-				goto bail;
+				return ret;
 		}
 		}
 
 
-		spin_lock_irq(&dev->pending_lock);
+		spin_lock_irq(&rdi->pending_lock);
 		if (list_empty(&ip->pending_mmaps))
 		if (list_empty(&ip->pending_mmaps))
-			list_add(&ip->pending_mmaps, &dev->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
+			list_add(&ip->pending_mmaps, &rdi->pending_mmaps);
+		spin_unlock_irq(&rdi->pending_lock);
 	}
 	}
 
 
-	ret = 0;
-	goto bail;
+	return 0;
 
 
 bail_unlock:
 bail_unlock:
 	spin_unlock_irq(&cq->lock);
 	spin_unlock_irq(&cq->lock);
 bail_free:
 bail_free:
 	vfree(wc);
 	vfree(wc);
-bail:
 	return ret;
 	return ret;
 }
 }
 
 
-int hfi1_cq_init(struct hfi1_devdata *dd)
+/**
+ * rvt_poll_cq - poll for work completion entries
+ * @ibcq: the completion queue to poll
+ * @num_entries: the maximum number of entries to return
+ * @entry: pointer to array where work completions are placed
+ *
+ * This may be called from interrupt context.  Also called by ib_poll_cq()
+ * in the generic verbs code.
+ *
+ * Return: the number of completion entries polled.
+ */
+int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
+{
+	struct rvt_cq *cq = ibcq_to_rvtcq(ibcq);
+	struct rvt_cq_wc *wc;
+	unsigned long flags;
+	int npolled;
+	u32 tail;
+
+	/* The kernel can only poll a kernel completion queue */
+	if (cq->ip)
+		return -EINVAL;
+
+	spin_lock_irqsave(&cq->lock, flags);
+
+	wc = cq->queue;
+	tail = wc->tail;
+	if (tail > (u32)cq->ibcq.cqe)
+		tail = (u32)cq->ibcq.cqe;
+	for (npolled = 0; npolled < num_entries; ++npolled, ++entry) {
+		if (tail == wc->head)
+			break;
+		/* The kernel doesn't need a RMB since it has the lock. */
+		*entry = wc->kqueue[tail];
+		if (tail >= cq->ibcq.cqe)
+			tail = 0;
+		else
+			tail++;
+	}
+	wc->tail = tail;
+
+	spin_unlock_irqrestore(&cq->lock, flags);
+
+	return npolled;
+}
+
+/**
+ * rvt_driver_cq_init - Init cq resources on behalf of driver
+ * @rdi: rvt dev structure
+ *
+ * Return: 0 on success
+ */
+int rvt_driver_cq_init(struct rvt_dev_info *rdi)
 {
 {
 	int ret = 0;
 	int ret = 0;
 	int cpu;
 	int cpu;
 	struct task_struct *task;
 	struct task_struct *task;
 
 
-	if (dd->worker)
+	if (rdi->worker)
 		return 0;
 		return 0;
-	dd->worker = kzalloc(sizeof(*dd->worker), GFP_KERNEL);
-	if (!dd->worker)
+	rdi->worker = kzalloc(sizeof(*rdi->worker), GFP_KERNEL);
+	if (!rdi->worker)
 		return -ENOMEM;
 		return -ENOMEM;
-	init_kthread_worker(dd->worker);
+	init_kthread_worker(rdi->worker);
 	task = kthread_create_on_node(
 	task = kthread_create_on_node(
 		kthread_worker_fn,
 		kthread_worker_fn,
-		dd->worker,
-		dd->assigned_node_id,
-		"hfi1_cq%d", dd->unit);
-	if (IS_ERR(task))
-		goto task_fail;
-	cpu = cpumask_first(cpumask_of_node(dd->assigned_node_id));
+		rdi->worker,
+		rdi->dparms.node,
+		"%s", rdi->dparms.cq_name);
+	if (IS_ERR(task)) {
+		kfree(rdi->worker);
+		rdi->worker = NULL;
+		return PTR_ERR(task);
+	}
+
+	cpu = cpumask_first(cpumask_of_node(rdi->dparms.node));
 	kthread_bind(task, cpu);
 	kthread_bind(task, cpu);
 	wake_up_process(task);
 	wake_up_process(task);
-out:
 	return ret;
 	return ret;
-task_fail:
-	ret = PTR_ERR(task);
-	kfree(dd->worker);
-	dd->worker = NULL;
-	goto out;
 }
 }
 
 
-void hfi1_cq_exit(struct hfi1_devdata *dd)
+/**
+ * rvt_cq_exit - tear down cq reources
+ * @rdi: rvt dev structure
+ */
+void rvt_cq_exit(struct rvt_dev_info *rdi)
 {
 {
 	struct kthread_worker *worker;
 	struct kthread_worker *worker;
 
 
-	worker = dd->worker;
+	worker = rdi->worker;
 	if (!worker)
 	if (!worker)
 		return;
 		return;
 	/* blocks future queuing from send_complete() */
 	/* blocks future queuing from send_complete() */
-	dd->worker = NULL;
-	smp_wmb(); /* See hfi1_cq_enter */
+	rdi->worker = NULL;
+	smp_wmb(); /* See rdi_cq_enter */
 	flush_kthread_worker(worker);
 	flush_kthread_worker(worker);
 	kthread_stop(worker->task);
 	kthread_stop(worker->task);
 	kfree(worker);
 	kfree(worker);

+ 64 - 0
drivers/infiniband/sw/rdmavt/cq.h

@@ -0,0 +1,64 @@
+#ifndef DEF_RVTCQ_H
+#define DEF_RVTCQ_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_cq.h>
+
+struct ib_cq *rvt_create_cq(struct ib_device *ibdev,
+			    const struct ib_cq_init_attr *attr,
+			    struct ib_ucontext *context,
+			    struct ib_udata *udata);
+int rvt_destroy_cq(struct ib_cq *ibcq);
+int rvt_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
+int rvt_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+int rvt_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+int rvt_driver_cq_init(struct rvt_dev_info *rdi);
+void rvt_cq_exit(struct rvt_dev_info *rdi);
+#endif          /* DEF_RVTCQ_H */

+ 184 - 0
drivers/infiniband/sw/rdmavt/dma.c

@@ -0,0 +1,184 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+#include <linux/types.h>
+#include <linux/scatterlist.h>
+#include <rdma/ib_verbs.h>
+
+#include "dma.h"
+
+#define BAD_DMA_ADDRESS ((u64)0)
+
+/*
+ * The following functions implement driver specific replacements
+ * for the ib_dma_*() functions.
+ *
+ * These functions return kernel virtual addresses instead of
+ * device bus addresses since the driver uses the CPU to copy
+ * data instead of using hardware DMA.
+ */
+
+static int rvt_mapping_error(struct ib_device *dev, u64 dma_addr)
+{
+	return dma_addr == BAD_DMA_ADDRESS;
+}
+
+static u64 rvt_dma_map_single(struct ib_device *dev, void *cpu_addr,
+			      size_t size, enum dma_data_direction direction)
+{
+	if (WARN_ON(!valid_dma_direction(direction)))
+		return BAD_DMA_ADDRESS;
+
+	return (u64)cpu_addr;
+}
+
+static void rvt_dma_unmap_single(struct ib_device *dev, u64 addr, size_t size,
+				 enum dma_data_direction direction)
+{
+	/* This is a stub, nothing to be done here */
+}
+
+static u64 rvt_dma_map_page(struct ib_device *dev, struct page *page,
+			    unsigned long offset, size_t size,
+			    enum dma_data_direction direction)
+{
+	u64 addr;
+
+	if (WARN_ON(!valid_dma_direction(direction)))
+		return BAD_DMA_ADDRESS;
+
+	if (offset + size > PAGE_SIZE)
+		return BAD_DMA_ADDRESS;
+
+	addr = (u64)page_address(page);
+	if (addr)
+		addr += offset;
+
+	return addr;
+}
+
+static void rvt_dma_unmap_page(struct ib_device *dev, u64 addr, size_t size,
+			       enum dma_data_direction direction)
+{
+	/* This is a stub, nothing to be done here */
+}
+
+static int rvt_map_sg(struct ib_device *dev, struct scatterlist *sgl,
+		      int nents, enum dma_data_direction direction)
+{
+	struct scatterlist *sg;
+	u64 addr;
+	int i;
+	int ret = nents;
+
+	if (WARN_ON(!valid_dma_direction(direction)))
+		return 0;
+
+	for_each_sg(sgl, sg, nents, i) {
+		addr = (u64)page_address(sg_page(sg));
+		if (!addr) {
+			ret = 0;
+			break;
+		}
+		sg->dma_address = addr + sg->offset;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+		sg->dma_length = sg->length;
+#endif
+	}
+	return ret;
+}
+
+static void rvt_unmap_sg(struct ib_device *dev,
+			 struct scatterlist *sg, int nents,
+			 enum dma_data_direction direction)
+{
+	/* This is a stub, nothing to be done here */
+}
+
+static void rvt_sync_single_for_cpu(struct ib_device *dev, u64 addr,
+				    size_t size, enum dma_data_direction dir)
+{
+}
+
+static void rvt_sync_single_for_device(struct ib_device *dev, u64 addr,
+				       size_t size,
+				       enum dma_data_direction dir)
+{
+}
+
+static void *rvt_dma_alloc_coherent(struct ib_device *dev, size_t size,
+				    u64 *dma_handle, gfp_t flag)
+{
+	struct page *p;
+	void *addr = NULL;
+
+	p = alloc_pages(flag, get_order(size));
+	if (p)
+		addr = page_address(p);
+	if (dma_handle)
+		*dma_handle = (u64)addr;
+	return addr;
+}
+
+static void rvt_dma_free_coherent(struct ib_device *dev, size_t size,
+				  void *cpu_addr, u64 dma_handle)
+{
+	free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+struct ib_dma_mapping_ops rvt_default_dma_mapping_ops = {
+	.mapping_error = rvt_mapping_error,
+	.map_single = rvt_dma_map_single,
+	.unmap_single = rvt_dma_unmap_single,
+	.map_page = rvt_dma_map_page,
+	.unmap_page = rvt_dma_unmap_page,
+	.map_sg = rvt_map_sg,
+	.unmap_sg = rvt_unmap_sg,
+	.sync_single_for_cpu = rvt_sync_single_for_cpu,
+	.sync_single_for_device = rvt_sync_single_for_device,
+	.alloc_coherent = rvt_dma_alloc_coherent,
+	.free_coherent = rvt_dma_free_coherent
+};

+ 53 - 0
drivers/infiniband/sw/rdmavt/dma.h

@@ -0,0 +1,53 @@
+#ifndef DEF_RDMAVTDMA_H
+#define DEF_RDMAVTDMA_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+extern struct ib_dma_mapping_ops rvt_default_dma_mapping_ops;
+
+#endif          /* DEF_RDMAVTDMA_H */

+ 171 - 0
drivers/infiniband/sw/rdmavt/mad.c

@@ -0,0 +1,171 @@
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/ib_mad.h>
+#include "mad.h"
+#include "vt.h"
+
+/**
+ * rvt_process_mad - process an incoming MAD packet
+ * @ibdev: the infiniband device this packet came in on
+ * @mad_flags: MAD flags
+ * @port_num: the port number this packet came in on, 1 based from ib core
+ * @in_wc: the work completion entry for this packet
+ * @in_grh: the global route header for this packet
+ * @in_mad: the incoming MAD
+ * @out_mad: any outgoing MAD reply
+ *
+ * Note that the verbs framework has already done the MAD sanity checks,
+ * and hop count/pointer updating for IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
+ * MADs.
+ *
+ * This is called by the ib_mad module.
+ *
+ * Return: IB_MAD_RESULT_SUCCESS or error
+ */
+int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		    const struct ib_mad_hdr *in, size_t in_mad_size,
+		    struct ib_mad_hdr *out, size_t *out_mad_size,
+		    u16 *out_mad_pkey_index)
+{
+	/*
+	 * MAD processing is quite different between hfi1 and qib. Therfore this
+	 * is expected to be provided by the driver. Other drivers in the future
+	 * may chose to implement this but it should not be made into a
+	 * requirement.
+	 */
+	if (ibport_num_to_idx(ibdev, port_num) < 0)
+		return -EINVAL;
+
+	return IB_MAD_RESULT_FAILURE;
+}
+
+static void rvt_send_mad_handler(struct ib_mad_agent *agent,
+				 struct ib_mad_send_wc *mad_send_wc)
+{
+	ib_free_send_mad(mad_send_wc->send_buf);
+}
+
+/**
+ * rvt_create_mad_agents - create mad agents
+ * @rdi: rvt dev struct
+ *
+ * If driver needs to be notified of mad agent creation then call back
+ *
+ * Return 0 on success
+ */
+int rvt_create_mad_agents(struct rvt_dev_info *rdi)
+{
+	struct ib_mad_agent *agent;
+	struct rvt_ibport *rvp;
+	int p;
+	int ret;
+
+	for (p = 0; p < rdi->dparms.nports; p++) {
+		rvp = rdi->ports[p];
+		agent = ib_register_mad_agent(&rdi->ibdev, p + 1,
+					      IB_QPT_SMI,
+					      NULL, 0, rvt_send_mad_handler,
+					      NULL, NULL, 0);
+		if (IS_ERR(agent)) {
+			ret = PTR_ERR(agent);
+			goto err;
+		}
+
+		rvp->send_agent = agent;
+
+		if (rdi->driver_f.notify_create_mad_agent)
+			rdi->driver_f.notify_create_mad_agent(rdi, p);
+	}
+
+	return 0;
+
+err:
+	for (p = 0; p < rdi->dparms.nports; p++) {
+		rvp = rdi->ports[p];
+		if (rvp->send_agent) {
+			agent = rvp->send_agent;
+			rvp->send_agent = NULL;
+			ib_unregister_mad_agent(agent);
+			if (rdi->driver_f.notify_free_mad_agent)
+				rdi->driver_f.notify_free_mad_agent(rdi, p);
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * rvt_free_mad_agents - free up mad agents
+ * @rdi: rvt dev struct
+ *
+ * If driver needs notification of mad agent removal make the call back
+ */
+void rvt_free_mad_agents(struct rvt_dev_info *rdi)
+{
+	struct ib_mad_agent *agent;
+	struct rvt_ibport *rvp;
+	int p;
+
+	for (p = 0; p < rdi->dparms.nports; p++) {
+		rvp = rdi->ports[p];
+		if (rvp->send_agent) {
+			agent = rvp->send_agent;
+			rvp->send_agent = NULL;
+			ib_unregister_mad_agent(agent);
+		}
+		if (rvp->sm_ah) {
+			ib_destroy_ah(&rvp->sm_ah->ibah);
+			rvp->sm_ah = NULL;
+		}
+
+		if (rdi->driver_f.notify_free_mad_agent)
+			rdi->driver_f.notify_free_mad_agent(rdi, p);
+	}
+}
+

+ 60 - 0
drivers/infiniband/sw/rdmavt/mad.h

@@ -0,0 +1,60 @@
+#ifndef DEF_RVTMAD_H
+#define DEF_RVTMAD_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+int rvt_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		    const struct ib_wc *in_wc, const struct ib_grh *in_grh,
+		    const struct ib_mad_hdr *in, size_t in_mad_size,
+		    struct ib_mad_hdr *out, size_t *out_mad_size,
+		    u16 *out_mad_pkey_index);
+int rvt_create_mad_agents(struct rvt_dev_info *rdi);
+void rvt_free_mad_agents(struct rvt_dev_info *rdi);
+#endif          /* DEF_RVTMAD_H */

+ 147 - 115
drivers/staging/rdma/hfi1/verbs_mcast.c → drivers/infiniband/sw/rdmavt/mcast.c

@@ -1,12 +1,11 @@
 /*
 /*
+ * Copyright(c) 2016 Intel Corporation.
  *
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  * redistributing this file, you may do so under either license.
  *
  *
  * GPL LICENSE SUMMARY
  * GPL LICENSE SUMMARY
  *
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  *
  * BSD LICENSE
  * BSD LICENSE
  *
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * modification, are permitted provided that the following conditions
  * are met:
  * are met:
@@ -48,17 +45,36 @@
  *
  *
  */
  */
 
 
+#include <linux/slab.h>
+#include <linux/sched.h>
 #include <linux/rculist.h>
 #include <linux/rculist.h>
+#include <rdma/rdma_vt.h>
+#include <rdma/rdmavt_qp.h>
 
 
-#include "hfi.h"
+#include "mcast.h"
+
+/**
+ * rvt_driver_mcast - init resources for multicast
+ * @rdi: rvt dev struct
+ *
+ * This is per device that registers with rdmavt
+ */
+void rvt_driver_mcast_init(struct rvt_dev_info *rdi)
+{
+	/*
+	 * Anything that needs setup for multicast on a per driver or per rdi
+	 * basis should be done in here.
+	 */
+	spin_lock_init(&rdi->n_mcast_grps_lock);
+}
 
 
 /**
 /**
  * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
  * mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
  * @qp: the QP to link
  * @qp: the QP to link
  */
  */
-static struct hfi1_mcast_qp *mcast_qp_alloc(struct hfi1_qp *qp)
+static struct rvt_mcast_qp *rvt_mcast_qp_alloc(struct rvt_qp *qp)
 {
 {
-	struct hfi1_mcast_qp *mqp;
+	struct rvt_mcast_qp *mqp;
 
 
 	mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
 	mqp = kmalloc(sizeof(*mqp), GFP_KERNEL);
 	if (!mqp)
 	if (!mqp)
@@ -71,9 +87,9 @@ bail:
 	return mqp;
 	return mqp;
 }
 }
 
 
-static void mcast_qp_free(struct hfi1_mcast_qp *mqp)
+static void rvt_mcast_qp_free(struct rvt_mcast_qp *mqp)
 {
 {
-	struct hfi1_qp *qp = mqp->qp;
+	struct rvt_qp *qp = mqp->qp;
 
 
 	/* Notify hfi1_destroy_qp() if it is waiting. */
 	/* Notify hfi1_destroy_qp() if it is waiting. */
 	if (atomic_dec_and_test(&qp->refcount))
 	if (atomic_dec_and_test(&qp->refcount))
@@ -88,11 +104,11 @@ static void mcast_qp_free(struct hfi1_mcast_qp *mqp)
  *
  *
  * A list of QPs will be attached to this structure.
  * A list of QPs will be attached to this structure.
  */
  */
-static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid)
+static struct rvt_mcast *rvt_mcast_alloc(union ib_gid *mgid)
 {
 {
-	struct hfi1_mcast *mcast;
+	struct rvt_mcast *mcast;
 
 
-	mcast = kmalloc(sizeof(*mcast), GFP_KERNEL);
+	mcast = kzalloc(sizeof(*mcast), GFP_KERNEL);
 	if (!mcast)
 	if (!mcast)
 		goto bail;
 		goto bail;
 
 
@@ -100,75 +116,72 @@ static struct hfi1_mcast *mcast_alloc(union ib_gid *mgid)
 	INIT_LIST_HEAD(&mcast->qp_list);
 	INIT_LIST_HEAD(&mcast->qp_list);
 	init_waitqueue_head(&mcast->wait);
 	init_waitqueue_head(&mcast->wait);
 	atomic_set(&mcast->refcount, 0);
 	atomic_set(&mcast->refcount, 0);
-	mcast->n_attached = 0;
 
 
 bail:
 bail:
 	return mcast;
 	return mcast;
 }
 }
 
 
-static void mcast_free(struct hfi1_mcast *mcast)
+static void rvt_mcast_free(struct rvt_mcast *mcast)
 {
 {
-	struct hfi1_mcast_qp *p, *tmp;
+	struct rvt_mcast_qp *p, *tmp;
 
 
 	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
 	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
-		mcast_qp_free(p);
+		rvt_mcast_qp_free(p);
 
 
 	kfree(mcast);
 	kfree(mcast);
 }
 }
 
 
 /**
 /**
- * hfi1_mcast_find - search the global table for the given multicast GID
+ * rvt_mcast_find - search the global table for the given multicast GID
  * @ibp: the IB port structure
  * @ibp: the IB port structure
  * @mgid: the multicast GID to search for
  * @mgid: the multicast GID to search for
  *
  *
- * Returns NULL if not found.
- *
  * The caller is responsible for decrementing the reference count if found.
  * The caller is responsible for decrementing the reference count if found.
+ *
+ * Return: NULL if not found.
  */
  */
-struct hfi1_mcast *hfi1_mcast_find(struct hfi1_ibport *ibp, union ib_gid *mgid)
+struct rvt_mcast *rvt_mcast_find(struct rvt_ibport *ibp, union ib_gid *mgid)
 {
 {
 	struct rb_node *n;
 	struct rb_node *n;
 	unsigned long flags;
 	unsigned long flags;
-	struct hfi1_mcast *mcast;
+	struct rvt_mcast *found = NULL;
 
 
 	spin_lock_irqsave(&ibp->lock, flags);
 	spin_lock_irqsave(&ibp->lock, flags);
 	n = ibp->mcast_tree.rb_node;
 	n = ibp->mcast_tree.rb_node;
 	while (n) {
 	while (n) {
 		int ret;
 		int ret;
+		struct rvt_mcast *mcast;
 
 
-		mcast = rb_entry(n, struct hfi1_mcast, rb_node);
+		mcast = rb_entry(n, struct rvt_mcast, rb_node);
 
 
 		ret = memcmp(mgid->raw, mcast->mgid.raw,
 		ret = memcmp(mgid->raw, mcast->mgid.raw,
 			     sizeof(union ib_gid));
 			     sizeof(union ib_gid));
-		if (ret < 0)
+		if (ret < 0) {
 			n = n->rb_left;
 			n = n->rb_left;
-		else if (ret > 0)
+		} else if (ret > 0) {
 			n = n->rb_right;
 			n = n->rb_right;
-		else {
+		} else {
 			atomic_inc(&mcast->refcount);
 			atomic_inc(&mcast->refcount);
-			spin_unlock_irqrestore(&ibp->lock, flags);
-			goto bail;
+			found = mcast;
+			break;
 		}
 		}
 	}
 	}
 	spin_unlock_irqrestore(&ibp->lock, flags);
 	spin_unlock_irqrestore(&ibp->lock, flags);
-
-	mcast = NULL;
-
-bail:
-	return mcast;
+	return found;
 }
 }
+EXPORT_SYMBOL(rvt_mcast_find);
 
 
 /**
 /**
  * mcast_add - insert mcast GID into table and attach QP struct
  * mcast_add - insert mcast GID into table and attach QP struct
  * @mcast: the mcast GID table
  * @mcast: the mcast GID table
  * @mqp: the QP to attach
  * @mqp: the QP to attach
  *
  *
- * Return zero if both were added.  Return EEXIST if the GID was already in
+ * Return: zero if both were added.  Return EEXIST if the GID was already in
  * the table but the QP was added.  Return ESRCH if the QP was already
  * the table but the QP was added.  Return ESRCH if the QP was already
  * attached and neither structure was added.
  * attached and neither structure was added.
  */
  */
-static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
-		     struct hfi1_mcast *mcast, struct hfi1_mcast_qp *mqp)
+static int rvt_mcast_add(struct rvt_dev_info *rdi, struct rvt_ibport *ibp,
+			 struct rvt_mcast *mcast, struct rvt_mcast_qp *mqp)
 {
 {
 	struct rb_node **n = &ibp->mcast_tree.rb_node;
 	struct rb_node **n = &ibp->mcast_tree.rb_node;
 	struct rb_node *pn = NULL;
 	struct rb_node *pn = NULL;
@@ -177,11 +190,11 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
 	spin_lock_irq(&ibp->lock);
 	spin_lock_irq(&ibp->lock);
 
 
 	while (*n) {
 	while (*n) {
-		struct hfi1_mcast *tmcast;
-		struct hfi1_mcast_qp *p;
+		struct rvt_mcast *tmcast;
+		struct rvt_mcast_qp *p;
 
 
 		pn = *n;
 		pn = *n;
-		tmcast = rb_entry(pn, struct hfi1_mcast, rb_node);
+		tmcast = rb_entry(pn, struct rvt_mcast, rb_node);
 
 
 		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
 		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
 			     sizeof(union ib_gid));
 			     sizeof(union ib_gid));
@@ -201,7 +214,8 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
 				goto bail;
 				goto bail;
 			}
 			}
 		}
 		}
-		if (tmcast->n_attached == hfi1_max_mcast_qp_attached) {
+		if (tmcast->n_attached ==
+		    rdi->dparms.props.max_mcast_qp_attach) {
 			ret = ENOMEM;
 			ret = ENOMEM;
 			goto bail;
 			goto bail;
 		}
 		}
@@ -213,15 +227,15 @@ static int mcast_add(struct hfi1_ibdev *dev, struct hfi1_ibport *ibp,
 		goto bail;
 		goto bail;
 	}
 	}
 
 
-	spin_lock(&dev->n_mcast_grps_lock);
-	if (dev->n_mcast_grps_allocated == hfi1_max_mcast_grps) {
-		spin_unlock(&dev->n_mcast_grps_lock);
+	spin_lock(&rdi->n_mcast_grps_lock);
+	if (rdi->n_mcast_grps_allocated == rdi->dparms.props.max_mcast_grp) {
+		spin_unlock(&rdi->n_mcast_grps_lock);
 		ret = ENOMEM;
 		ret = ENOMEM;
 		goto bail;
 		goto bail;
 	}
 	}
 
 
-	dev->n_mcast_grps_allocated++;
-	spin_unlock(&dev->n_mcast_grps_lock);
+	rdi->n_mcast_grps_allocated++;
+	spin_unlock(&rdi->n_mcast_grps_lock);
 
 
 	mcast->n_attached++;
 	mcast->n_attached++;
 
 
@@ -239,92 +253,98 @@ bail:
 	return ret;
 	return ret;
 }
 }
 
 
-int hfi1_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+/**
+ * rvt_attach_mcast - attach a qp to a multicast group
+ * @ibqp: Infiniband qp
+ * @igd: multicast guid
+ * @lid: multicast lid
+ *
+ * Return: 0 on success
+ */
+int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 {
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_ibport *ibp;
-	struct hfi1_mcast *mcast;
-	struct hfi1_mcast_qp *mqp;
-	int ret;
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1];
+	struct rvt_mcast *mcast;
+	struct rvt_mcast_qp *mqp;
+	int ret = -ENOMEM;
 
 
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+		return -EINVAL;
 
 
 	/*
 	/*
 	 * Allocate data structures since its better to do this outside of
 	 * Allocate data structures since its better to do this outside of
 	 * spin locks and it will most likely be needed.
 	 * spin locks and it will most likely be needed.
 	 */
 	 */
-	mcast = mcast_alloc(gid);
-	if (mcast == NULL) {
-		ret = -ENOMEM;
-		goto bail;
-	}
-	mqp = mcast_qp_alloc(qp);
-	if (mqp == NULL) {
-		mcast_free(mcast);
-		ret = -ENOMEM;
-		goto bail;
-	}
-	ibp = to_iport(ibqp->device, qp->port_num);
-	switch (mcast_add(dev, ibp, mcast, mqp)) {
-	case ESRCH:
-		/* Neither was used: OK to attach the same QP twice. */
-		mcast_qp_free(mqp);
-		mcast_free(mcast);
-		break;
+	mcast = rvt_mcast_alloc(gid);
+	if (!mcast)
+		return -ENOMEM;
 
 
-	case EEXIST:            /* The mcast wasn't used */
-		mcast_free(mcast);
-		break;
+	mqp = rvt_mcast_qp_alloc(qp);
+	if (!mqp)
+		goto bail_mcast;
 
 
+	switch (rvt_mcast_add(rdi, ibp, mcast, mqp)) {
+	case ESRCH:
+		/* Neither was used: OK to attach the same QP twice. */
+		ret = 0;
+		goto bail_mqp;
+	case EEXIST: /* The mcast wasn't used */
+		ret = 0;
+		goto bail_mcast;
 	case ENOMEM:
 	case ENOMEM:
 		/* Exceeded the maximum number of mcast groups. */
 		/* Exceeded the maximum number of mcast groups. */
-		mcast_qp_free(mqp);
-		mcast_free(mcast);
 		ret = -ENOMEM;
 		ret = -ENOMEM;
-		goto bail;
-
+		goto bail_mqp;
 	default:
 	default:
 		break;
 		break;
 	}
 	}
 
 
-	ret = 0;
+	return 0;
+
+bail_mqp:
+	rvt_mcast_qp_free(mqp);
+
+bail_mcast:
+	rvt_mcast_free(mcast);
 
 
-bail:
 	return ret;
 	return ret;
 }
 }
 
 
-int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+/**
+ * rvt_detach_mcast - remove a qp from a multicast group
+ * @ibqp: Infiniband qp
+ * @igd: multicast guid
+ * @lid: multicast lid
+ *
+ * Return: 0 on success
+ */
+int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 {
 {
-	struct hfi1_qp *qp = to_iqp(ibqp);
-	struct hfi1_ibdev *dev = to_idev(ibqp->device);
-	struct hfi1_ibport *ibp = to_iport(ibqp->device, qp->port_num);
-	struct hfi1_mcast *mcast = NULL;
-	struct hfi1_mcast_qp *p, *tmp;
+	struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
+	struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
+	struct rvt_ibport *ibp = rdi->ports[qp->port_num - 1];
+	struct rvt_mcast *mcast = NULL;
+	struct rvt_mcast_qp *p, *tmp, *delp = NULL;
 	struct rb_node *n;
 	struct rb_node *n;
 	int last = 0;
 	int last = 0;
-	int ret;
+	int ret = 0;
 
 
-	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
-		ret = -EINVAL;
-		goto bail;
-	}
+	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET)
+		return -EINVAL;
 
 
 	spin_lock_irq(&ibp->lock);
 	spin_lock_irq(&ibp->lock);
 
 
 	/* Find the GID in the mcast table. */
 	/* Find the GID in the mcast table. */
 	n = ibp->mcast_tree.rb_node;
 	n = ibp->mcast_tree.rb_node;
 	while (1) {
 	while (1) {
-		if (n == NULL) {
+		if (!n) {
 			spin_unlock_irq(&ibp->lock);
 			spin_unlock_irq(&ibp->lock);
-			ret = -EINVAL;
-			goto bail;
+			return -EINVAL;
 		}
 		}
 
 
-		mcast = rb_entry(n, struct hfi1_mcast, rb_node);
+		mcast = rb_entry(n, struct rvt_mcast, rb_node);
 		ret = memcmp(gid->raw, mcast->mgid.raw,
 		ret = memcmp(gid->raw, mcast->mgid.raw,
 			     sizeof(union ib_gid));
 			     sizeof(union ib_gid));
 		if (ret < 0)
 		if (ret < 0)
@@ -345,6 +365,7 @@ int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 		 */
 		 */
 		list_del_rcu(&p->list);
 		list_del_rcu(&p->list);
 		mcast->n_attached--;
 		mcast->n_attached--;
+		delp = p;
 
 
 		/* If this was the last attached QP, remove the GID too. */
 		/* If this was the last attached QP, remove the GID too. */
 		if (list_empty(&mcast->qp_list)) {
 		if (list_empty(&mcast->qp_list)) {
@@ -355,31 +376,42 @@ int hfi1_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	}
 	}
 
 
 	spin_unlock_irq(&ibp->lock);
 	spin_unlock_irq(&ibp->lock);
+	/* QP not attached */
+	if (!delp)
+		return -EINVAL;
+
+	/*
+	 * Wait for any list walkers to finish before freeing the
+	 * list element.
+	 */
+	wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+	rvt_mcast_qp_free(delp);
 
 
-	if (p) {
-		/*
-		 * Wait for any list walkers to finish before freeing the
-		 * list element.
-		 */
-		wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
-		mcast_qp_free(p);
-	}
 	if (last) {
 	if (last) {
 		atomic_dec(&mcast->refcount);
 		atomic_dec(&mcast->refcount);
 		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
 		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
-		mcast_free(mcast);
-		spin_lock_irq(&dev->n_mcast_grps_lock);
-		dev->n_mcast_grps_allocated--;
-		spin_unlock_irq(&dev->n_mcast_grps_lock);
+		rvt_mcast_free(mcast);
+		spin_lock_irq(&rdi->n_mcast_grps_lock);
+		rdi->n_mcast_grps_allocated--;
+		spin_unlock_irq(&rdi->n_mcast_grps_lock);
 	}
 	}
 
 
-	ret = 0;
-
-bail:
-	return ret;
+	return 0;
 }
 }
 
 
-int hfi1_mcast_tree_empty(struct hfi1_ibport *ibp)
+/**
+ *rvt_mast_tree_empty - determine if any qps are attached to any mcast group
+ *@rdi: rvt dev struct
+ *
+ * Return: in use count
+ */
+int rvt_mcast_tree_empty(struct rvt_dev_info *rdi)
 {
 {
-	return ibp->mcast_tree.rb_node == NULL;
+	int i;
+	int in_use = 0;
+
+	for (i = 0; i < rdi->dparms.nports; i++)
+		if (rdi->ports[i]->mcast_tree.rb_node)
+			in_use++;
+	return in_use;
 }
 }

+ 58 - 0
drivers/infiniband/sw/rdmavt/mcast.h

@@ -0,0 +1,58 @@
+#ifndef DEF_RVTMCAST_H
+#define DEF_RVTMCAST_H
+
+/*
+ * Copyright(c) 2016 Intel Corporation.
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  - Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  - Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  - Neither the name of Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <rdma/rdma_vt.h>
+
+void rvt_driver_mcast_init(struct rvt_dev_info *rdi);
+int rvt_attach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int rvt_detach_mcast(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+int rvt_mcast_tree_empty(struct rvt_dev_info *rdi);
+
+#endif          /* DEF_RVTMCAST_H */

+ 79 - 63
drivers/staging/rdma/hfi1/mmap.c → drivers/infiniband/sw/rdmavt/mmap.c

@@ -1,12 +1,11 @@
 /*
 /*
+ * Copyright(c) 2016 Intel Corporation.
  *
  *
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * This file is provided under a dual BSD/GPLv2 license.  When using or
  * redistributing this file, you may do so under either license.
  * redistributing this file, you may do so under either license.
  *
  *
  * GPL LICENSE SUMMARY
  * GPL LICENSE SUMMARY
  *
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * This program is free software; you can redistribute it and/or modify
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
  * it under the terms of version 2 of the GNU General Public License as
  * published by the Free Software Foundation.
  * published by the Free Software Foundation.
@@ -18,8 +17,6 @@
  *
  *
  * BSD LICENSE
  * BSD LICENSE
  *
  *
- * Copyright(c) 2015 Intel Corporation.
- *
  * Redistribution and use in source and binary forms, with or without
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * modification, are permitted provided that the following conditions
  * are met:
  * are met:
@@ -48,68 +45,74 @@
  *
  *
  */
  */
 
 
-#include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/vmalloc.h>
 #include <linux/mm.h>
 #include <linux/mm.h>
-#include <linux/errno.h>
 #include <asm/pgtable.h>
 #include <asm/pgtable.h>
+#include "mmap.h"
 
 
-#include "verbs.h"
+/**
+ * rvt_mmap_init - init link list and lock for mem map
+ * @rdi: rvt dev struct
+ */
+void rvt_mmap_init(struct rvt_dev_info *rdi)
+{
+	INIT_LIST_HEAD(&rdi->pending_mmaps);
+	spin_lock_init(&rdi->pending_lock);
+	rdi->mmap_offset = PAGE_SIZE;
+	spin_lock_init(&rdi->mmap_offset_lock);
+}
 
 
 /**
 /**
- * hfi1_release_mmap_info - free mmap info structure
- * @ref: a pointer to the kref within struct hfi1_mmap_info
+ * rvt_release_mmap_info - free mmap info structure
+ * @ref: a pointer to the kref within struct rvt_mmap_info
  */
  */
-void hfi1_release_mmap_info(struct kref *ref)
+void rvt_release_mmap_info(struct kref *ref)
 {
 {
-	struct hfi1_mmap_info *ip =
-		container_of(ref, struct hfi1_mmap_info, ref);
-	struct hfi1_ibdev *dev = to_idev(ip->context->device);
+	struct rvt_mmap_info *ip =
+		container_of(ref, struct rvt_mmap_info, ref);
+	struct rvt_dev_info *rdi = ib_to_rvt(ip->context->device);
 
 
-	spin_lock_irq(&dev->pending_lock);
+	spin_lock_irq(&rdi->pending_lock);
 	list_del(&ip->pending_mmaps);
 	list_del(&ip->pending_mmaps);
-	spin_unlock_irq(&dev->pending_lock);
+	spin_unlock_irq(&rdi->pending_lock);
 
 
 	vfree(ip->obj);
 	vfree(ip->obj);
 	kfree(ip);
 	kfree(ip);
 }
 }
 
 
-/*
- * open and close keep track of how many times the CQ is mapped,
- * to avoid releasing it.
- */
-static void hfi1_vma_open(struct vm_area_struct *vma)
+static void rvt_vma_open(struct vm_area_struct *vma)
 {
 {
-	struct hfi1_mmap_info *ip = vma->vm_private_data;
+	struct rvt_mmap_info *ip = vma->vm_private_data;
 
 
 	kref_get(&ip->ref);
 	kref_get(&ip->ref);
 }
 }
 
 
-static void hfi1_vma_close(struct vm_area_struct *vma)
+static void rvt_vma_close(struct vm_area_struct *vma)
 {
 {
-	struct hfi1_mmap_info *ip = vma->vm_private_data;
+	struct rvt_mmap_info *ip = vma->vm_private_data;
 
 
-	kref_put(&ip->ref, hfi1_release_mmap_info);
+	kref_put(&ip->ref, rvt_release_mmap_info);
 }
 }
 
 
-static struct vm_operations_struct hfi1_vm_ops = {
-	.open =     hfi1_vma_open,
-	.close =    hfi1_vma_close,
+static const struct vm_operations_struct rvt_vm_ops = {
+	.open = rvt_vma_open,
+	.close = rvt_vma_close,
 };
 };
 
 
 /**
 /**
- * hfi1_mmap - create a new mmap region
+ * rvt_mmap - create a new mmap region
  * @context: the IB user context of the process making the mmap() call
  * @context: the IB user context of the process making the mmap() call
  * @vma: the VMA to be initialized
  * @vma: the VMA to be initialized
- * Return zero if the mmap is OK. Otherwise, return an errno.
+ *
+ * Return: zero if the mmap is OK. Otherwise, return an errno.
  */
  */
-int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
+int rvt_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 {
 {
-	struct hfi1_ibdev *dev = to_idev(context->device);
+	struct rvt_dev_info *rdi = ib_to_rvt(context->device);
 	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
 	unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
 	unsigned long size = vma->vm_end - vma->vm_start;
 	unsigned long size = vma->vm_end - vma->vm_start;
-	struct hfi1_mmap_info *ip, *pp;
+	struct rvt_mmap_info *ip, *pp;
 	int ret = -EINVAL;
 	int ret = -EINVAL;
 
 
 	/*
 	/*
@@ -117,53 +120,60 @@ int hfi1_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	 * Normally, this list is very short since a call to create a
 	 * Normally, this list is very short since a call to create a
 	 * CQ, QP, or SRQ is soon followed by a call to mmap().
 	 * CQ, QP, or SRQ is soon followed by a call to mmap().
 	 */
 	 */
-	spin_lock_irq(&dev->pending_lock);
-	list_for_each_entry_safe(ip, pp, &dev->pending_mmaps,
+	spin_lock_irq(&rdi->pending_lock);
+	list_for_each_entry_safe(ip, pp, &rdi->pending_mmaps,
 				 pending_mmaps) {
 				 pending_mmaps) {
 		/* Only the creator is allowed to mmap the object */
 		/* Only the creator is allowed to mmap the object */
-		if (context != ip->context || (__u64) offset != ip->offset)
+		if (context != ip->context || (__u64)offset != ip->offset)
 			continue;
 			continue;
 		/* Don't allow a mmap larger than the object. */
 		/* Don't allow a mmap larger than the object. */
 		if (size > ip->size)
 		if (size > ip->size)
 			break;
 			break;
 
 
 		list_del_init(&ip->pending_mmaps);
 		list_del_init(&ip->pending_mmaps);
-		spin_unlock_irq(&dev->pending_lock);
+		spin_unlock_irq(&rdi->pending_lock);
 
 
 		ret = remap_vmalloc_range(vma, ip->obj, 0);
 		ret = remap_vmalloc_range(vma, ip->obj, 0);
 		if (ret)
 		if (ret)
 			goto done;
 			goto done;
-		vma->vm_ops = &hfi1_vm_ops;
+		vma->vm_ops = &rvt_vm_ops;
 		vma->vm_private_data = ip;
 		vma->vm_private_data = ip;
-		hfi1_vma_open(vma);
+		rvt_vma_open(vma);
 		goto done;
 		goto done;
 	}
 	}
-	spin_unlock_irq(&dev->pending_lock);
+	spin_unlock_irq(&rdi->pending_lock);
 done:
 done:
 	return ret;
 	return ret;
 }
 }
 
 
-/*
- * Allocate information for hfi1_mmap
+/**
+ * rvt_create_mmap_info - allocate information for hfi1_mmap
+ * @rdi: rvt dev struct
+ * @size: size in bytes to map
+ * @context: user context
+ * @obj: opaque pointer to a cq, wq etc
+ *
+ * Return: rvt_mmap struct on success
  */
  */
-struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev,
-					     u32 size,
-					     struct ib_ucontext *context,
-					     void *obj) {
-	struct hfi1_mmap_info *ip;
+struct rvt_mmap_info *rvt_create_mmap_info(struct rvt_dev_info *rdi,
+					   u32 size,
+					   struct ib_ucontext *context,
+					   void *obj)
+{
+	struct rvt_mmap_info *ip;
 
 
-	ip = kmalloc(sizeof(*ip), GFP_KERNEL);
+	ip = kmalloc_node(sizeof(*ip), GFP_KERNEL, rdi->dparms.node);
 	if (!ip)
 	if (!ip)
-		goto bail;
+		return ip;
 
 
 	size = PAGE_ALIGN(size);
 	size = PAGE_ALIGN(size);
 
 
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
+	spin_lock_irq(&rdi->mmap_offset_lock);
+	if (rdi->mmap_offset == 0)
+		rdi->mmap_offset = PAGE_SIZE;
+	ip->offset = rdi->mmap_offset;
+	rdi->mmap_offset += size;
+	spin_unlock_irq(&rdi->mmap_offset_lock);
 
 
 	INIT_LIST_HEAD(&ip->pending_mmaps);
 	INIT_LIST_HEAD(&ip->pending_mmaps);
 	ip->size = size;
 	ip->size = size;
@@ -171,21 +181,27 @@ struct hfi1_mmap_info *hfi1_create_mmap_info(struct hfi1_ibdev *dev,
 	ip->obj = obj;
 	ip->obj = obj;
 	kref_init(&ip->ref);
 	kref_init(&ip->ref);
 
 
-bail:
 	return ip;
 	return ip;
 }
 }
 
 
-void hfi1_update_mmap_info(struct hfi1_ibdev *dev, struct hfi1_mmap_info *ip,
-			   u32 size, void *obj)
+/**
+ * rvt_update_mmap_info - update a mem map
+ * @rdi: rvt dev struct
+ * @ip: mmap info pointer
+ * @size: size to grow by
+ * @obj: opaque pointer to cq, wq, etc.
+ */
+void rvt_update_mmap_info(struct rvt_dev_info *rdi, struct rvt_mmap_info *ip,
+			  u32 size, void *obj)
 {
 {
 	size = PAGE_ALIGN(size);
 	size = PAGE_ALIGN(size);
 
 
-	spin_lock_irq(&dev->mmap_offset_lock);
-	if (dev->mmap_offset == 0)
-		dev->mmap_offset = PAGE_SIZE;
-	ip->offset = dev->mmap_offset;
-	dev->mmap_offset += size;
-	spin_unlock_irq(&dev->mmap_offset_lock);
+	spin_lock_irq(&rdi->mmap_offset_lock);
+	if (rdi->mmap_offset == 0)
+		rdi->mmap_offset = PAGE_SIZE;
+	ip->offset = rdi->mmap_offset;
+	rdi->mmap_offset += size;
+	spin_unlock_irq(&rdi->mmap_offset_lock);
 
 
 	ip->size = size;
 	ip->size = size;
 	ip->obj = obj;
 	ip->obj = obj;

Some files were not shown because too many files changed in this diff