Explorar o código

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (76 commits)
  IB: Update MAINTAINERS with Hal's new email address
  IB/mlx4: Implement query SRQ
  IB/mlx4: Implement query QP
  IB/cm: Send no match if a SIDR REQ does not match a listen
  IB/cm: Fix handling of duplicate SIDR REQs
  IB/cm: cm_msgs.h should include ib_cm.h
  IB/cm: Include HCA ACK delay in local ACK timeout
  IB/cm: Use spin_lock_irq() instead of spin_lock_irqsave() when possible
  IB/sa: Make sure SA queries use default P_Key
  IPoIB: Recycle loopback skbs instead of freeing and reallocating
  IB/mthca: Replace memset(<addr>, 0, PAGE_SIZE) with clear_page(<addr>)
  IPoIB/cm: Fix warning if IPV6 is not enabled
  IB/core: Take sizeof the correct pointer when calling kmalloc()
  IB/ehca: Improve latency by unlocking after triggering the hardware
  IB/ehca: Notify consumers of LID/PKEY/SM changes after nondisruptive events
  IB/ehca: Return QP pointer in poll_cq()
  IB/ehca: Change idr spinlocks into rwlocks
  IB/ehca: Refactor sync between completions and destroy_cq using atomic_t
  IB/ehca: Lock renaming, static initializers
  IB/ehca: Report RDMA atomic attributes in query_qp()
  ...
Linus Torvalds %!s(int64=18) %!d(string=hai) anos
pai
achega
0cdf6990e9
Modificáronse 100 ficheiros con 2812 adicións e 1061 borrados
  1. 8 7
      MAINTAINERS
  2. 7 8
      drivers/infiniband/Kconfig
  3. 16 3
      drivers/infiniband/core/agent.c
  4. 134 113
      drivers/infiniband/core/cm.c
  5. 1 0
      drivers/infiniband/core/cm_msgs.h
  6. 0 1
      drivers/infiniband/core/cma.c
  7. 41 9
      drivers/infiniband/core/mad.c
  8. 1 1
      drivers/infiniband/core/multicast.c
  9. 1 1
      drivers/infiniband/core/sa.h
  10. 51 36
      drivers/infiniband/core/sa_query.c
  11. 13 3
      drivers/infiniband/core/smi.c
  12. 2 0
      drivers/infiniband/core/smi.h
  13. 1 1
      drivers/infiniband/core/sysfs.c
  14. 0 1
      drivers/infiniband/core/ucm.c
  15. 1 0
      drivers/infiniband/core/umem.c
  16. 1 1
      drivers/infiniband/hw/amso1100/Kconfig
  17. 1 1
      drivers/infiniband/hw/cxgb3/Kconfig
  18. 3 3
      drivers/infiniband/hw/cxgb3/cxio_hal.c
  19. 2 1
      drivers/infiniband/hw/cxgb3/cxio_wr.h
  20. 48 60
      drivers/infiniband/hw/cxgb3/iwch_cm.c
  21. 1 0
      drivers/infiniband/hw/cxgb3/iwch_cm.h
  22. 4 3
      drivers/infiniband/hw/cxgb3/iwch_provider.c
  23. 4 3
      drivers/infiniband/hw/cxgb3/iwch_qp.c
  24. 1 1
      drivers/infiniband/hw/ehca/Kconfig
  25. 4 2
      drivers/infiniband/hw/ehca/ehca_av.c
  26. 62 13
      drivers/infiniband/hw/ehca/ehca_classes.h
  27. 2 2
      drivers/infiniband/hw/ehca/ehca_classes_pSeries.h
  28. 21 29
      drivers/infiniband/hw/ehca/ehca_cq.c
  29. 58 3
      drivers/infiniband/hw/ehca/ehca_hca.c
  30. 81 59
      drivers/infiniband/hw/ehca/ehca_irq.c
  31. 0 1
      drivers/infiniband/hw/ehca/ehca_irq.h
  32. 18 0
      drivers/infiniband/hw/ehca/ehca_iverbs.h
  33. 76 22
      drivers/infiniband/hw/ehca/ehca_main.c
  34. 533 218
      drivers/infiniband/hw/ehca/ehca_qp.c
  35. 56 29
      drivers/infiniband/hw/ehca/ehca_reqs.c
  36. 1 0
      drivers/infiniband/hw/ehca/ehca_tools.h
  37. 7 6
      drivers/infiniband/hw/ehca/ehca_uverbs.c
  38. 31 27
      drivers/infiniband/hw/ehca/hcp_if.c
  39. 0 1
      drivers/infiniband/hw/ehca/hcp_if.h
  40. 19 0
      drivers/infiniband/hw/ehca/hipz_hw.h
  41. 10 18
      drivers/infiniband/hw/ehca/ipz_pt_fn.h
  42. 1 1
      drivers/infiniband/hw/ipath/Kconfig
  43. 29 4
      drivers/infiniband/hw/ipath/ipath_common.h
  44. 5 2
      drivers/infiniband/hw/ipath/ipath_cq.c
  45. 1 1
      drivers/infiniband/hw/ipath/ipath_debug.h
  46. 35 6
      drivers/infiniband/hw/ipath/ipath_diag.c
  47. 155 32
      drivers/infiniband/hw/ipath/ipath_driver.c
  48. 268 35
      drivers/infiniband/hw/ipath/ipath_eeprom.c
  49. 148 57
      drivers/infiniband/hw/ipath/ipath_file_ops.c
  50. 7 2
      drivers/infiniband/hw/ipath/ipath_fs.c
  51. 57 44
      drivers/infiniband/hw/ipath/ipath_iba6110.c
  52. 55 37
      drivers/infiniband/hw/ipath/ipath_iba6120.c
  53. 21 5
      drivers/infiniband/hw/ipath/ipath_init_chip.c
  54. 124 17
      drivers/infiniband/hw/ipath/ipath_intr.c
  55. 78 7
      drivers/infiniband/hw/ipath/ipath_kernel.h
  56. 1 1
      drivers/infiniband/hw/ipath/ipath_keys.c
  57. 1 1
      drivers/infiniband/hw/ipath/ipath_layer.c
  58. 1 1
      drivers/infiniband/hw/ipath/ipath_layer.h
  59. 8 3
      drivers/infiniband/hw/ipath/ipath_mad.c
  60. 1 1
      drivers/infiniband/hw/ipath/ipath_mmap.c
  61. 1 1
      drivers/infiniband/hw/ipath/ipath_mr.c
  62. 8 11
      drivers/infiniband/hw/ipath/ipath_qp.c
  63. 82 34
      drivers/infiniband/hw/ipath/ipath_rc.c
  64. 1 1
      drivers/infiniband/hw/ipath/ipath_registers.h
  65. 26 10
      drivers/infiniband/hw/ipath/ipath_ruc.c
  66. 3 1
      drivers/infiniband/hw/ipath/ipath_srq.c
  67. 22 3
      drivers/infiniband/hw/ipath/ipath_stats.c
  68. 42 1
      drivers/infiniband/hw/ipath/ipath_sysfs.c
  69. 5 4
      drivers/infiniband/hw/ipath/ipath_uc.c
  70. 5 1
      drivers/infiniband/hw/ipath/ipath_ud.c
  71. 1 1
      drivers/infiniband/hw/ipath/ipath_user_pages.c
  72. 18 11
      drivers/infiniband/hw/ipath/ipath_verbs.c
  73. 2 1
      drivers/infiniband/hw/ipath/ipath_verbs.h
  74. 1 1
      drivers/infiniband/hw/ipath/ipath_verbs_mcast.c
  75. 1 1
      drivers/infiniband/hw/ipath/ipath_wc_ppc64.c
  76. 23 6
      drivers/infiniband/hw/ipath/ipath_wc_x86_64.c
  77. 0 1
      drivers/infiniband/hw/mlx4/Kconfig
  78. 5 1
      drivers/infiniband/hw/mlx4/main.c
  79. 4 0
      drivers/infiniband/hw/mlx4/mlx4_ib.h
  80. 137 0
      drivers/infiniband/hw/mlx4/qp.c
  81. 18 0
      drivers/infiniband/hw/mlx4/srq.c
  82. 1 1
      drivers/infiniband/hw/mthca/Kconfig
  83. 1 1
      drivers/infiniband/hw/mthca/mthca_allocator.c
  84. 1 1
      drivers/infiniband/hw/mthca/mthca_eq.c
  85. 1 1
      drivers/infiniband/ulp/ipoib/Kconfig
  86. 1 3
      drivers/infiniband/ulp/ipoib/ipoib_cm.c
  87. 17 16
      drivers/infiniband/ulp/ipoib/ipoib_ib.c
  88. 1 1
      drivers/infiniband/ulp/iser/Kconfig
  89. 1 1
      drivers/infiniband/ulp/srp/Kconfig
  90. 1 1
      drivers/net/cxgb3/version.h
  91. 3 0
      drivers/net/mlx4/fw.c
  92. 1 0
      drivers/net/mlx4/fw.h
  93. 1 0
      drivers/net/mlx4/main.c
  94. 1 0
      drivers/net/mlx4/mlx4.h
  95. 21 0
      drivers/net/mlx4/qp.c
  96. 30 0
      drivers/net/mlx4/srq.c
  97. 2 0
      include/linux/mlx4/device.h
  98. 3 0
      include/linux/mlx4/qp.h
  99. 0 1
      include/rdma/ib_cm.h
  100. 3 0
      include/rdma/ib_mad.h

+ 8 - 7
MAINTAINERS

@@ -370,7 +370,7 @@ P:	Tom Tucker
 M:	tom@opengridcomputing.com
 M:	tom@opengridcomputing.com
 P:	Steve Wise
 P:	Steve Wise
 M:	swise@opengridcomputing.com
 M:	swise@opengridcomputing.com
-L:	openib-general@openib.org
+L:	general@lists.openfabrics.org
 S:	Maintained
 S:	Maintained
 
 
 AOA (Apple Onboard Audio) ALSA DRIVER
 AOA (Apple Onboard Audio) ALSA DRIVER
@@ -1395,7 +1395,7 @@ P:	Hoang-Nam Nguyen
 M:	hnguyen@de.ibm.com
 M:	hnguyen@de.ibm.com
 P:	Christoph Raisch
 P:	Christoph Raisch
 M:	raisch@de.ibm.com
 M:	raisch@de.ibm.com
-L:	openib-general@openib.org
+L:	general@lists.openfabrics.org
 S:	Supported
 S:	Supported
 
 
 EMU10K1 SOUND DRIVER
 EMU10K1 SOUND DRIVER
@@ -1849,8 +1849,8 @@ M:	rolandd@cisco.com
 P:	Sean Hefty
 P:	Sean Hefty
 M:	mshefty@ichips.intel.com
 M:	mshefty@ichips.intel.com
 P:	Hal Rosenstock
 P:	Hal Rosenstock
-M:	halr@voltaire.com
-L:	openib-general@openib.org
+M:	hal.rosenstock@gmail.com 
+L:	general@lists.openfabrics.org
 W:	http://www.openib.org/
 W:	http://www.openib.org/
 T:	git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
 T:	git kernel.org:/pub/scm/linux/kernel/git/roland/infiniband.git
 S:	Supported
 S:	Supported
@@ -1988,9 +1988,10 @@ M:	jjciarla@raiz.uncu.edu.ar
 S:	Maintained
 S:	Maintained
 
 
 IPATH DRIVER:
 IPATH DRIVER:
-P:	Bryan O'Sullivan
-M:	support@pathscale.com
-L:	openib-general@openib.org
+P:	Arthur Jones
+M:	infinipath@qlogic.com
+L:	general@lists.openfabrics.org
+T:	git git://git.qlogic.com/ipath-linux-2.6
 S:	Supported
 S:	Supported
 
 
 IPMI SUBSYSTEM
 IPMI SUBSYSTEM

+ 7 - 8
drivers/infiniband/Kconfig

@@ -1,14 +1,14 @@
-menu "InfiniBand support"
-	depends on HAS_IOMEM
-
-config INFINIBAND
-	depends on PCI || BROKEN
+menuconfig INFINIBAND
 	tristate "InfiniBand support"
 	tristate "InfiniBand support"
+	depends on PCI || BROKEN
+	depends on HAS_IOMEM
 	---help---
 	---help---
 	  Core support for InfiniBand (IB).  Make sure to also select
 	  Core support for InfiniBand (IB).  Make sure to also select
 	  any protocols you wish to use as well as drivers for your
 	  any protocols you wish to use as well as drivers for your
 	  InfiniBand hardware.
 	  InfiniBand hardware.
 
 
+if INFINIBAND
+
 config INFINIBAND_USER_MAD
 config INFINIBAND_USER_MAD
 	tristate "InfiniBand userspace MAD support"
 	tristate "InfiniBand userspace MAD support"
 	depends on INFINIBAND
 	depends on INFINIBAND
@@ -20,7 +20,6 @@ config INFINIBAND_USER_MAD
 
 
 config INFINIBAND_USER_ACCESS
 config INFINIBAND_USER_ACCESS
 	tristate "InfiniBand userspace access (verbs and CM)"
 	tristate "InfiniBand userspace access (verbs and CM)"
-	depends on INFINIBAND
 	---help---
 	---help---
 	  Userspace InfiniBand access support.  This enables the
 	  Userspace InfiniBand access support.  This enables the
 	  kernel side of userspace verbs and the userspace
 	  kernel side of userspace verbs and the userspace
@@ -37,7 +36,7 @@ config INFINIBAND_USER_MEM
 
 
 config INFINIBAND_ADDR_TRANS
 config INFINIBAND_ADDR_TRANS
 	bool
 	bool
-	depends on INFINIBAND && INET
+	depends on INET
 	default y
 	default y
 
 
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/mthca/Kconfig"
@@ -54,4 +53,4 @@ source "drivers/infiniband/ulp/srp/Kconfig"
 
 
 source "drivers/infiniband/ulp/iser/Kconfig"
 source "drivers/infiniband/ulp/iser/Kconfig"
 
 
-endmenu
+endif # INFINIBAND

+ 16 - 3
drivers/infiniband/core/agent.c

@@ -3,7 +3,7 @@
  * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Infinicon Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Intel Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
  * Copyright (c) 2004, 2005 Topspin Corporation.  All rights reserved.
- * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
+ * Copyright (c) 2004-2007 Voltaire Corporation.  All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -34,7 +34,6 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  * SOFTWARE.
  *
  *
- * $Id: agent.c 1389 2004-12-27 22:56:47Z roland $
  */
  */
 
 
 #include <linux/slab.h>
 #include <linux/slab.h>
@@ -42,6 +41,7 @@
 
 
 #include "agent.h"
 #include "agent.h"
 #include "smi.h"
 #include "smi.h"
+#include "mad_priv.h"
 
 
 #define SPFX "ib_agent: "
 #define SPFX "ib_agent: "
 
 
@@ -87,8 +87,13 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
 	struct ib_mad_send_buf *send_buf;
 	struct ib_mad_send_buf *send_buf;
 	struct ib_ah *ah;
 	struct ib_ah *ah;
 	int ret;
 	int ret;
+	struct ib_mad_send_wr_private *mad_send_wr;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH)
+		port_priv = ib_get_agent_port(device, 0);
+	else
+		port_priv = ib_get_agent_port(device, port_num);
 
 
-	port_priv = ib_get_agent_port(device, port_num);
 	if (!port_priv) {
 	if (!port_priv) {
 		printk(KERN_ERR SPFX "Unable to find port agent\n");
 		printk(KERN_ERR SPFX "Unable to find port agent\n");
 		return -ENODEV;
 		return -ENODEV;
@@ -113,6 +118,14 @@ int agent_send_response(struct ib_mad *mad, struct ib_grh *grh,
 
 
 	memcpy(send_buf->mad, mad, sizeof *mad);
 	memcpy(send_buf->mad, mad, sizeof *mad);
 	send_buf->ah = ah;
 	send_buf->ah = ah;
+
+	if (device->node_type == RDMA_NODE_IB_SWITCH) {
+		mad_send_wr = container_of(send_buf,
+					   struct ib_mad_send_wr_private,
+					   send_buf);
+		mad_send_wr->send_wr.wr.ud.port_num = port_num;
+	}
+
 	if ((ret = ib_post_send_mad(send_buf, NULL))) {
 	if ((ret = ib_post_send_mad(send_buf, NULL))) {
 		printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret);
 		printk(KERN_ERR SPFX "ib_post_send_mad error:%d\n", ret);
 		goto err2;
 		goto err2;

+ 134 - 113
drivers/infiniband/core/cm.c

@@ -87,6 +87,7 @@ struct cm_port {
 struct cm_device {
 struct cm_device {
 	struct list_head list;
 	struct list_head list;
 	struct ib_device *device;
 	struct ib_device *device;
+	u8 ack_delay;
 	struct cm_port port[0];
 	struct cm_port port[0];
 };
 };
 
 
@@ -95,7 +96,7 @@ struct cm_av {
 	union ib_gid dgid;
 	union ib_gid dgid;
 	struct ib_ah_attr ah_attr;
 	struct ib_ah_attr ah_attr;
 	u16 pkey_index;
 	u16 pkey_index;
-	u8 packet_life_time;
+	u8 timeout;
 };
 };
 
 
 struct cm_work {
 struct cm_work {
@@ -154,6 +155,7 @@ struct cm_id_private {
 	u8 retry_count;
 	u8 retry_count;
 	u8 rnr_retry_count;
 	u8 rnr_retry_count;
 	u8 service_timeout;
 	u8 service_timeout;
+	u8 target_ack_delay;
 
 
 	struct list_head work_list;
 	struct list_head work_list;
 	atomic_t work_count;
 	atomic_t work_count;
@@ -293,7 +295,7 @@ static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av)
 	av->port = port;
 	av->port = port;
 	ib_init_ah_from_path(cm_dev->device, port->port_num, path,
 	ib_init_ah_from_path(cm_dev->device, port->port_num, path,
 			     &av->ah_attr);
 			     &av->ah_attr);
-	av->packet_life_time = path->packet_life_time;
+	av->timeout = path->packet_life_time + 1;
 	return 0;
 	return 0;
 }
 }
 
 
@@ -318,12 +320,10 @@ static int cm_alloc_id(struct cm_id_private *cm_id_priv)
 
 
 static void cm_free_id(__be32 local_id)
 static void cm_free_id(__be32 local_id)
 {
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&cm.lock, flags);
+	spin_lock_irq(&cm.lock);
 	idr_remove(&cm.local_id_table,
 	idr_remove(&cm.local_id_table,
 		   (__force int) (local_id ^ cm.random_id_operand));
 		   (__force int) (local_id ^ cm.random_id_operand));
-	spin_unlock_irqrestore(&cm.lock, flags);
+	spin_unlock_irq(&cm.lock);
 }
 }
 
 
 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
@@ -345,11 +345,10 @@ static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
 {
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
-	unsigned long flags;
 
 
-	spin_lock_irqsave(&cm.lock, flags);
+	spin_lock_irq(&cm.lock);
 	cm_id_priv = cm_get_id(local_id, remote_id);
 	cm_id_priv = cm_get_id(local_id, remote_id);
-	spin_unlock_irqrestore(&cm.lock, flags);
+	spin_unlock_irq(&cm.lock);
 
 
 	return cm_id_priv;
 	return cm_id_priv;
 }
 }
@@ -646,6 +645,25 @@ static inline int cm_convert_to_ms(int iba_time)
 	return 1 << max(iba_time - 8, 0);
 	return 1 << max(iba_time - 8, 0);
 }
 }
 
 
+/*
+ * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
+ * Because of how ack_timeout is stored, adding one doubles the timeout.
+ * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
+ * increment it (round up) only if the other is within 50%.
+ */
+static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
+{
+	int ack_timeout = packet_life_time + 1;
+
+	if (ack_timeout >= ca_ack_delay)
+		ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
+	else
+		ack_timeout = ca_ack_delay +
+			      (ack_timeout >= (ca_ack_delay - 1));
+
+	return min(31, ack_timeout);
+}
+
 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
 {
 {
 	if (timewait_info->inserted_remote_id) {
 	if (timewait_info->inserted_remote_id) {
@@ -689,7 +707,7 @@ static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
 	 * timewait before notifying the user that we've exited timewait.
 	 * timewait before notifying the user that we've exited timewait.
 	 */
 	 */
 	cm_id_priv->id.state = IB_CM_TIMEWAIT;
 	cm_id_priv->id.state = IB_CM_TIMEWAIT;
-	wait_time = cm_convert_to_ms(cm_id_priv->av.packet_life_time + 1);
+	wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
 	queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
 	queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
 			   msecs_to_jiffies(wait_time));
 			   msecs_to_jiffies(wait_time));
 	cm_id_priv->timewait_info = NULL;
 	cm_id_priv->timewait_info = NULL;
@@ -713,31 +731,30 @@ static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
 {
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct cm_work *work;
 	struct cm_work *work;
-	unsigned long flags;
 
 
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 	cm_id_priv = container_of(cm_id, struct cm_id_private, id);
 retest:
 retest:
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	switch (cm_id->state) {
 	switch (cm_id->state) {
 	case IB_CM_LISTEN:
 	case IB_CM_LISTEN:
 		cm_id->state = IB_CM_IDLE;
 		cm_id->state = IB_CM_IDLE;
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
-		spin_lock_irqsave(&cm.lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
+		spin_lock_irq(&cm.lock);
 		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
 		rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
-		spin_unlock_irqrestore(&cm.lock, flags);
+		spin_unlock_irq(&cm.lock);
 		break;
 		break;
 	case IB_CM_SIDR_REQ_SENT:
 	case IB_CM_SIDR_REQ_SENT:
 		cm_id->state = IB_CM_IDLE;
 		cm_id->state = IB_CM_IDLE;
 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		break;
 		break;
 	case IB_CM_SIDR_REQ_RCVD:
 	case IB_CM_SIDR_REQ_RCVD:
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
 		cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
 		break;
 		break;
 	case IB_CM_REQ_SENT:
 	case IB_CM_REQ_SENT:
 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
 		ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
 			       &cm_id_priv->id.device->node_guid,
 			       &cm_id_priv->id.device->node_guid,
 			       sizeof cm_id_priv->id.device->node_guid,
 			       sizeof cm_id_priv->id.device->node_guid,
@@ -747,9 +764,9 @@ retest:
 		if (err == -ENOMEM) {
 		if (err == -ENOMEM) {
 			/* Do not reject to allow future retries. */
 			/* Do not reject to allow future retries. */
 			cm_reset_to_idle(cm_id_priv);
 			cm_reset_to_idle(cm_id_priv);
-			spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+			spin_unlock_irq(&cm_id_priv->lock);
 		} else {
 		} else {
-			spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+			spin_unlock_irq(&cm_id_priv->lock);
 			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
 			ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
 				       NULL, 0, NULL, 0);
 				       NULL, 0, NULL, 0);
 		}
 		}
@@ -762,25 +779,25 @@ retest:
 	case IB_CM_MRA_REQ_SENT:
 	case IB_CM_MRA_REQ_SENT:
 	case IB_CM_REP_RCVD:
 	case IB_CM_REP_RCVD:
 	case IB_CM_MRA_REP_SENT:
 	case IB_CM_MRA_REP_SENT:
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
 		ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
 			       NULL, 0, NULL, 0);
 			       NULL, 0, NULL, 0);
 		break;
 		break;
 	case IB_CM_ESTABLISHED:
 	case IB_CM_ESTABLISHED:
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		ib_send_cm_dreq(cm_id, NULL, 0);
 		ib_send_cm_dreq(cm_id, NULL, 0);
 		goto retest;
 		goto retest;
 	case IB_CM_DREQ_SENT:
 	case IB_CM_DREQ_SENT:
 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 		cm_enter_timewait(cm_id_priv);
 		cm_enter_timewait(cm_id_priv);
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		break;
 		break;
 	case IB_CM_DREQ_RCVD:
 	case IB_CM_DREQ_RCVD:
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		ib_send_cm_drep(cm_id, NULL, 0);
 		ib_send_cm_drep(cm_id, NULL, 0);
 		break;
 		break;
 	default:
 	default:
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		break;
 		break;
 	}
 	}
 
 
@@ -912,7 +929,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
 	cm_req_set_primary_sl(req_msg, param->primary_path->sl);
 	cm_req_set_primary_sl(req_msg, param->primary_path->sl);
 	cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */
 	cm_req_set_primary_subnet_local(req_msg, 1); /* local only... */
 	cm_req_set_primary_local_ack_timeout(req_msg,
 	cm_req_set_primary_local_ack_timeout(req_msg,
-		min(31, param->primary_path->packet_life_time + 1));
+		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
+			       param->primary_path->packet_life_time));
 
 
 	if (param->alternate_path) {
 	if (param->alternate_path) {
 		req_msg->alt_local_lid = param->alternate_path->slid;
 		req_msg->alt_local_lid = param->alternate_path->slid;
@@ -927,7 +945,8 @@ static void cm_format_req(struct cm_req_msg *req_msg,
 		cm_req_set_alt_sl(req_msg, param->alternate_path->sl);
 		cm_req_set_alt_sl(req_msg, param->alternate_path->sl);
 		cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */
 		cm_req_set_alt_subnet_local(req_msg, 1); /* local only... */
 		cm_req_set_alt_local_ack_timeout(req_msg,
 		cm_req_set_alt_local_ack_timeout(req_msg,
-			min(31, param->alternate_path->packet_life_time + 1));
+			cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
+				       param->alternate_path->packet_life_time));
 	}
 	}
 
 
 	if (param->private_data && param->private_data_len)
 	if (param->private_data && param->private_data_len)
@@ -1169,7 +1188,6 @@ static void cm_format_req_event(struct cm_work *work,
 static void cm_process_work(struct cm_id_private *cm_id_priv,
 static void cm_process_work(struct cm_id_private *cm_id_priv,
 			    struct cm_work *work)
 			    struct cm_work *work)
 {
 {
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	/* We will typically only have the current event to report. */
 	/* We will typically only have the current event to report. */
@@ -1177,9 +1195,9 @@ static void cm_process_work(struct cm_id_private *cm_id_priv,
 	cm_free_work(work);
 	cm_free_work(work);
 
 
 	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
 	while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
-		spin_lock_irqsave(&cm_id_priv->lock, flags);
+		spin_lock_irq(&cm_id_priv->lock);
 		work = cm_dequeue_work(cm_id_priv);
 		work = cm_dequeue_work(cm_id_priv);
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		BUG_ON(!work);
 		BUG_ON(!work);
 		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
 		ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
 						&work->cm_event);
 						&work->cm_event);
@@ -1250,7 +1268,6 @@ static void cm_dup_req_handler(struct cm_work *work,
 			       struct cm_id_private *cm_id_priv)
 			       struct cm_id_private *cm_id_priv)
 {
 {
 	struct ib_mad_send_buf *msg = NULL;
 	struct ib_mad_send_buf *msg = NULL;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	/* Quick state check to discard duplicate REQs. */
 	/* Quick state check to discard duplicate REQs. */
@@ -1261,7 +1278,7 @@ static void cm_dup_req_handler(struct cm_work *work,
 	if (ret)
 	if (ret)
 		return;
 		return;
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	switch (cm_id_priv->id.state) {
 	switch (cm_id_priv->id.state) {
 	case IB_CM_MRA_REQ_SENT:
 	case IB_CM_MRA_REQ_SENT:
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
 		cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
@@ -1276,14 +1293,14 @@ static void cm_dup_req_handler(struct cm_work *work,
 	default:
 	default:
 		goto unlock;
 		goto unlock;
 	}
 	}
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	ret = ib_post_send_mad(msg, NULL);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 	if (ret)
 		goto free;
 		goto free;
 	return;
 	return;
 
 
-unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+unlock:	spin_unlock_irq(&cm_id_priv->lock);
 free:	cm_free_msg(msg);
 free:	cm_free_msg(msg);
 }
 }
 
 
@@ -1293,17 +1310,16 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
 	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
 	struct cm_timewait_info *timewait_info;
 	struct cm_timewait_info *timewait_info;
 	struct cm_req_msg *req_msg;
 	struct cm_req_msg *req_msg;
-	unsigned long flags;
 
 
 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 
 
 	/* Check for possible duplicate REQ. */
 	/* Check for possible duplicate REQ. */
-	spin_lock_irqsave(&cm.lock, flags);
+	spin_lock_irq(&cm.lock);
 	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
 	timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
 	if (timewait_info) {
 	if (timewait_info) {
 		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
 		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
 					   timewait_info->work.remote_id);
 					   timewait_info->work.remote_id);
-		spin_unlock_irqrestore(&cm.lock, flags);
+		spin_unlock_irq(&cm.lock);
 		if (cur_cm_id_priv) {
 		if (cur_cm_id_priv) {
 			cm_dup_req_handler(work, cur_cm_id_priv);
 			cm_dup_req_handler(work, cur_cm_id_priv);
 			cm_deref_id(cur_cm_id_priv);
 			cm_deref_id(cur_cm_id_priv);
@@ -1315,7 +1331,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
 	if (timewait_info) {
 	if (timewait_info) {
 		cm_cleanup_timewait(cm_id_priv->timewait_info);
 		cm_cleanup_timewait(cm_id_priv->timewait_info);
-		spin_unlock_irqrestore(&cm.lock, flags);
+		spin_unlock_irq(&cm.lock);
 		cm_issue_rej(work->port, work->mad_recv_wc,
 		cm_issue_rej(work->port, work->mad_recv_wc,
 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
 			     NULL, 0);
 			     NULL, 0);
@@ -1328,7 +1344,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 					   req_msg->private_data);
 					   req_msg->private_data);
 	if (!listen_cm_id_priv) {
 	if (!listen_cm_id_priv) {
 		cm_cleanup_timewait(cm_id_priv->timewait_info);
 		cm_cleanup_timewait(cm_id_priv->timewait_info);
-		spin_unlock_irqrestore(&cm.lock, flags);
+		spin_unlock_irq(&cm.lock);
 		cm_issue_rej(work->port, work->mad_recv_wc,
 		cm_issue_rej(work->port, work->mad_recv_wc,
 			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
 			     IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
 			     NULL, 0);
 			     NULL, 0);
@@ -1338,7 +1354,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 	atomic_inc(&cm_id_priv->refcount);
 	atomic_inc(&cm_id_priv->refcount);
 	cm_id_priv->id.state = IB_CM_REQ_RCVD;
 	cm_id_priv->id.state = IB_CM_REQ_RCVD;
 	atomic_inc(&cm_id_priv->work_count);
 	atomic_inc(&cm_id_priv->work_count);
-	spin_unlock_irqrestore(&cm.lock, flags);
+	spin_unlock_irq(&cm.lock);
 out:
 out:
 	return listen_cm_id_priv;
 	return listen_cm_id_priv;
 }
 }
@@ -1440,7 +1456,8 @@ static void cm_format_rep(struct cm_rep_msg *rep_msg,
 	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
 	cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
 	rep_msg->resp_resources = param->responder_resources;
 	rep_msg->resp_resources = param->responder_resources;
 	rep_msg->initiator_depth = param->initiator_depth;
 	rep_msg->initiator_depth = param->initiator_depth;
-	cm_rep_set_target_ack_delay(rep_msg, param->target_ack_delay);
+	cm_rep_set_target_ack_delay(rep_msg,
+				    cm_id_priv->av.port->cm_dev->ack_delay);
 	cm_rep_set_failover(rep_msg, param->failover_accepted);
 	cm_rep_set_failover(rep_msg, param->failover_accepted);
 	cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
 	cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
 	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
 	cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
@@ -1591,7 +1608,6 @@ static void cm_dup_rep_handler(struct cm_work *work)
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct cm_rep_msg *rep_msg;
 	struct cm_rep_msg *rep_msg;
 	struct ib_mad_send_buf *msg = NULL;
 	struct ib_mad_send_buf *msg = NULL;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
 	rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
@@ -1604,7 +1620,7 @@ static void cm_dup_rep_handler(struct cm_work *work)
 	if (ret)
 	if (ret)
 		goto deref;
 		goto deref;
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
 	if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
 		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
 		cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
 			      cm_id_priv->private_data,
 			      cm_id_priv->private_data,
@@ -1616,14 +1632,14 @@ static void cm_dup_rep_handler(struct cm_work *work)
 			      cm_id_priv->private_data_len);
 			      cm_id_priv->private_data_len);
 	else
 	else
 		goto unlock;
 		goto unlock;
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	ret = ib_post_send_mad(msg, NULL);
 	ret = ib_post_send_mad(msg, NULL);
 	if (ret)
 	if (ret)
 		goto free;
 		goto free;
 	goto deref;
 	goto deref;
 
 
-unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+unlock:	spin_unlock_irq(&cm_id_priv->lock);
 free:	cm_free_msg(msg);
 free:	cm_free_msg(msg);
 deref:	cm_deref_id(cm_id_priv);
 deref:	cm_deref_id(cm_id_priv);
 }
 }
@@ -1632,7 +1648,6 @@ static int cm_rep_handler(struct cm_work *work)
 {
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct cm_rep_msg *rep_msg;
 	struct cm_rep_msg *rep_msg;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1644,13 +1659,13 @@ static int cm_rep_handler(struct cm_work *work)
 
 
 	cm_format_rep_event(work);
 	cm_format_rep_event(work);
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	switch (cm_id_priv->id.state) {
 	switch (cm_id_priv->id.state) {
 	case IB_CM_REQ_SENT:
 	case IB_CM_REQ_SENT:
 	case IB_CM_MRA_REQ_RCVD:
 	case IB_CM_MRA_REQ_RCVD:
 		break;
 		break;
 	default:
 	default:
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		ret = -EINVAL;
 		ret = -EINVAL;
 		goto error;
 		goto error;
 	}
 	}
@@ -1663,7 +1678,7 @@ static int cm_rep_handler(struct cm_work *work)
 	/* Check for duplicate REP. */
 	/* Check for duplicate REP. */
 	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
 	if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
 		spin_unlock(&cm.lock);
 		spin_unlock(&cm.lock);
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		ret = -EINVAL;
 		ret = -EINVAL;
 		goto error;
 		goto error;
 	}
 	}
@@ -1673,7 +1688,7 @@ static int cm_rep_handler(struct cm_work *work)
 			 &cm.remote_id_table);
 			 &cm.remote_id_table);
 		cm_id_priv->timewait_info->inserted_remote_id = 0;
 		cm_id_priv->timewait_info->inserted_remote_id = 0;
 		spin_unlock(&cm.lock);
 		spin_unlock(&cm.lock);
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		cm_issue_rej(work->port, work->mad_recv_wc,
 		cm_issue_rej(work->port, work->mad_recv_wc,
 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
 			     NULL, 0);
 			     NULL, 0);
@@ -1689,6 +1704,13 @@ static int cm_rep_handler(struct cm_work *work)
 	cm_id_priv->responder_resources = rep_msg->initiator_depth;
 	cm_id_priv->responder_resources = rep_msg->initiator_depth;
 	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
 	cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
 	cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
 	cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
+	cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
+	cm_id_priv->av.timeout =
+			cm_ack_timeout(cm_id_priv->target_ack_delay,
+				       cm_id_priv->av.timeout - 1);
+	cm_id_priv->alt_av.timeout =
+			cm_ack_timeout(cm_id_priv->target_ack_delay,
+				       cm_id_priv->alt_av.timeout - 1);
 
 
 	/* todo: handle peer_to_peer */
 	/* todo: handle peer_to_peer */
 
 
@@ -1696,7 +1718,7 @@ static int cm_rep_handler(struct cm_work *work)
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		list_add_tail(&work->list, &cm_id_priv->work_list);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	if (ret)
 	if (ret)
 		cm_process_work(cm_id_priv, work);
 		cm_process_work(cm_id_priv, work);
@@ -1712,7 +1734,6 @@ error:
 static int cm_establish_handler(struct cm_work *work)
 static int cm_establish_handler(struct cm_work *work)
 {
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	/* See comment in cm_establish about lookup. */
 	/* See comment in cm_establish about lookup. */
@@ -1720,9 +1741,9 @@ static int cm_establish_handler(struct cm_work *work)
 	if (!cm_id_priv)
 	if (!cm_id_priv)
 		return -EINVAL;
 		return -EINVAL;
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		goto out;
 		goto out;
 	}
 	}
 
 
@@ -1730,7 +1751,7 @@ static int cm_establish_handler(struct cm_work *work)
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		list_add_tail(&work->list, &cm_id_priv->work_list);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	if (ret)
 	if (ret)
 		cm_process_work(cm_id_priv, work);
 		cm_process_work(cm_id_priv, work);
@@ -1746,7 +1767,6 @@ static int cm_rtu_handler(struct cm_work *work)
 {
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct cm_rtu_msg *rtu_msg;
 	struct cm_rtu_msg *rtu_msg;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
 	rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1757,10 +1777,10 @@ static int cm_rtu_handler(struct cm_work *work)
 
 
 	work->cm_event.private_data = &rtu_msg->private_data;
 	work->cm_event.private_data = &rtu_msg->private_data;
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
 	if (cm_id_priv->id.state != IB_CM_REP_SENT &&
 	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
 	    cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		goto out;
 		goto out;
 	}
 	}
 	cm_id_priv->id.state = IB_CM_ESTABLISHED;
 	cm_id_priv->id.state = IB_CM_ESTABLISHED;
@@ -1769,7 +1789,7 @@ static int cm_rtu_handler(struct cm_work *work)
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		list_add_tail(&work->list, &cm_id_priv->work_list);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	if (ret)
 	if (ret)
 		cm_process_work(cm_id_priv, work);
 		cm_process_work(cm_id_priv, work);
@@ -1932,7 +1952,6 @@ static int cm_dreq_handler(struct cm_work *work)
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct cm_dreq_msg *dreq_msg;
 	struct cm_dreq_msg *dreq_msg;
 	struct ib_mad_send_buf *msg = NULL;
 	struct ib_mad_send_buf *msg = NULL;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
 	dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -1945,7 +1964,7 @@ static int cm_dreq_handler(struct cm_work *work)
 
 
 	work->cm_event.private_data = &dreq_msg->private_data;
 	work->cm_event.private_data = &dreq_msg->private_data;
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
 	if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
 		goto unlock;
 		goto unlock;
 
 
@@ -1964,7 +1983,7 @@ static int cm_dreq_handler(struct cm_work *work)
 		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
 		cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
 			       cm_id_priv->private_data,
 			       cm_id_priv->private_data,
 			       cm_id_priv->private_data_len);
 			       cm_id_priv->private_data_len);
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 
 
 		if (ib_post_send_mad(msg, NULL))
 		if (ib_post_send_mad(msg, NULL))
 			cm_free_msg(msg);
 			cm_free_msg(msg);
@@ -1977,7 +1996,7 @@ static int cm_dreq_handler(struct cm_work *work)
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		list_add_tail(&work->list, &cm_id_priv->work_list);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	if (ret)
 	if (ret)
 		cm_process_work(cm_id_priv, work);
 		cm_process_work(cm_id_priv, work);
@@ -1985,7 +2004,7 @@ static int cm_dreq_handler(struct cm_work *work)
 		cm_deref_id(cm_id_priv);
 		cm_deref_id(cm_id_priv);
 	return 0;
 	return 0;
 
 
-unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+unlock:	spin_unlock_irq(&cm_id_priv->lock);
 deref:	cm_deref_id(cm_id_priv);
 deref:	cm_deref_id(cm_id_priv);
 	return -EINVAL;
 	return -EINVAL;
 }
 }
@@ -1994,7 +2013,6 @@ static int cm_drep_handler(struct cm_work *work)
 {
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct cm_drep_msg *drep_msg;
 	struct cm_drep_msg *drep_msg;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
 	drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2005,10 +2023,10 @@ static int cm_drep_handler(struct cm_work *work)
 
 
 	work->cm_event.private_data = &drep_msg->private_data;
 	work->cm_event.private_data = &drep_msg->private_data;
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
 	if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
 	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
 	    cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		goto out;
 		goto out;
 	}
 	}
 	cm_enter_timewait(cm_id_priv);
 	cm_enter_timewait(cm_id_priv);
@@ -2017,7 +2035,7 @@ static int cm_drep_handler(struct cm_work *work)
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		list_add_tail(&work->list, &cm_id_priv->work_list);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	if (ret)
 	if (ret)
 		cm_process_work(cm_id_priv, work);
 		cm_process_work(cm_id_priv, work);
@@ -2107,17 +2125,16 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 {
 {
 	struct cm_timewait_info *timewait_info;
 	struct cm_timewait_info *timewait_info;
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
-	unsigned long flags;
 	__be32 remote_id;
 	__be32 remote_id;
 
 
 	remote_id = rej_msg->local_comm_id;
 	remote_id = rej_msg->local_comm_id;
 
 
 	if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
 	if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
-		spin_lock_irqsave(&cm.lock, flags);
+		spin_lock_irq(&cm.lock);
 		timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
 		timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
 						  remote_id);
 						  remote_id);
 		if (!timewait_info) {
 		if (!timewait_info) {
-			spin_unlock_irqrestore(&cm.lock, flags);
+			spin_unlock_irq(&cm.lock);
 			return NULL;
 			return NULL;
 		}
 		}
 		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
 		cm_id_priv = idr_find(&cm.local_id_table, (__force int)
@@ -2129,7 +2146,7 @@ static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
 			else
 			else
 				cm_id_priv = NULL;
 				cm_id_priv = NULL;
 		}
 		}
-		spin_unlock_irqrestore(&cm.lock, flags);
+		spin_unlock_irq(&cm.lock);
 	} else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
 	} else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
 		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
 		cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
 	else
 	else
@@ -2142,7 +2159,6 @@ static int cm_rej_handler(struct cm_work *work)
 {
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct cm_rej_msg *rej_msg;
 	struct cm_rej_msg *rej_msg;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
 	rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2152,7 +2168,7 @@ static int cm_rej_handler(struct cm_work *work)
 
 
 	cm_format_rej_event(work);
 	cm_format_rej_event(work);
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	switch (cm_id_priv->id.state) {
 	switch (cm_id_priv->id.state) {
 	case IB_CM_REQ_SENT:
 	case IB_CM_REQ_SENT:
 	case IB_CM_MRA_REQ_RCVD:
 	case IB_CM_MRA_REQ_RCVD:
@@ -2176,7 +2192,7 @@ static int cm_rej_handler(struct cm_work *work)
 		cm_enter_timewait(cm_id_priv);
 		cm_enter_timewait(cm_id_priv);
 		break;
 		break;
 	default:
 	default:
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		ret = -EINVAL;
 		ret = -EINVAL;
 		goto out;
 		goto out;
 	}
 	}
@@ -2184,7 +2200,7 @@ static int cm_rej_handler(struct cm_work *work)
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		list_add_tail(&work->list, &cm_id_priv->work_list);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	if (ret)
 	if (ret)
 		cm_process_work(cm_id_priv, work);
 		cm_process_work(cm_id_priv, work);
@@ -2295,7 +2311,6 @@ static int cm_mra_handler(struct cm_work *work)
 {
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct cm_mra_msg *mra_msg;
 	struct cm_mra_msg *mra_msg;
-	unsigned long flags;
 	int timeout, ret;
 	int timeout, ret;
 
 
 	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
 	mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2307,9 +2322,9 @@ static int cm_mra_handler(struct cm_work *work)
 	work->cm_event.param.mra_rcvd.service_timeout =
 	work->cm_event.param.mra_rcvd.service_timeout =
 					cm_mra_get_service_timeout(mra_msg);
 					cm_mra_get_service_timeout(mra_msg);
 	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
 	timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
-		  cm_convert_to_ms(cm_id_priv->av.packet_life_time);
+		  cm_convert_to_ms(cm_id_priv->av.timeout);
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	switch (cm_id_priv->id.state) {
 	switch (cm_id_priv->id.state) {
 	case IB_CM_REQ_SENT:
 	case IB_CM_REQ_SENT:
 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
 		if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
@@ -2342,7 +2357,7 @@ static int cm_mra_handler(struct cm_work *work)
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		list_add_tail(&work->list, &cm_id_priv->work_list);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	if (ret)
 	if (ret)
 		cm_process_work(cm_id_priv, work);
 		cm_process_work(cm_id_priv, work);
@@ -2350,7 +2365,7 @@ static int cm_mra_handler(struct cm_work *work)
 		cm_deref_id(cm_id_priv);
 		cm_deref_id(cm_id_priv);
 	return 0;
 	return 0;
 out:
 out:
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 	cm_deref_id(cm_id_priv);
 	cm_deref_id(cm_id_priv);
 	return -EINVAL;
 	return -EINVAL;
 }
 }
@@ -2379,7 +2394,8 @@ static void cm_format_lap(struct cm_lap_msg *lap_msg,
 	cm_lap_set_sl(lap_msg, alternate_path->sl);
 	cm_lap_set_sl(lap_msg, alternate_path->sl);
 	cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
 	cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
 	cm_lap_set_local_ack_timeout(lap_msg,
 	cm_lap_set_local_ack_timeout(lap_msg,
-		min(31, alternate_path->packet_life_time + 1));
+		cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
+			       alternate_path->packet_life_time));
 
 
 	if (private_data && private_data_len)
 	if (private_data && private_data_len)
 		memcpy(lap_msg->private_data, private_data, private_data_len);
 		memcpy(lap_msg->private_data, private_data, private_data_len);
@@ -2410,6 +2426,9 @@ int ib_send_cm_lap(struct ib_cm_id *cm_id,
 	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
 	ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av);
 	if (ret)
 	if (ret)
 		goto out;
 		goto out;
+	cm_id_priv->alt_av.timeout =
+			cm_ack_timeout(cm_id_priv->target_ack_delay,
+				       cm_id_priv->alt_av.timeout - 1);
 
 
 	ret = cm_alloc_msg(cm_id_priv, &msg);
 	ret = cm_alloc_msg(cm_id_priv, &msg);
 	if (ret)
 	if (ret)
@@ -2465,7 +2484,6 @@ static int cm_lap_handler(struct cm_work *work)
 	struct cm_lap_msg *lap_msg;
 	struct cm_lap_msg *lap_msg;
 	struct ib_cm_lap_event_param *param;
 	struct ib_cm_lap_event_param *param;
 	struct ib_mad_send_buf *msg = NULL;
 	struct ib_mad_send_buf *msg = NULL;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	/* todo: verify LAP request and send reject APR if invalid. */
 	/* todo: verify LAP request and send reject APR if invalid. */
@@ -2480,7 +2498,7 @@ static int cm_lap_handler(struct cm_work *work)
 	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
 	cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
 	work->cm_event.private_data = &lap_msg->private_data;
 	work->cm_event.private_data = &lap_msg->private_data;
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
 		goto unlock;
 		goto unlock;
 
 
@@ -2497,7 +2515,7 @@ static int cm_lap_handler(struct cm_work *work)
 			      cm_id_priv->service_timeout,
 			      cm_id_priv->service_timeout,
 			      cm_id_priv->private_data,
 			      cm_id_priv->private_data,
 			      cm_id_priv->private_data_len);
 			      cm_id_priv->private_data_len);
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 
 
 		if (ib_post_send_mad(msg, NULL))
 		if (ib_post_send_mad(msg, NULL))
 			cm_free_msg(msg);
 			cm_free_msg(msg);
@@ -2515,7 +2533,7 @@ static int cm_lap_handler(struct cm_work *work)
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		list_add_tail(&work->list, &cm_id_priv->work_list);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	if (ret)
 	if (ret)
 		cm_process_work(cm_id_priv, work);
 		cm_process_work(cm_id_priv, work);
@@ -2523,7 +2541,7 @@ static int cm_lap_handler(struct cm_work *work)
 		cm_deref_id(cm_id_priv);
 		cm_deref_id(cm_id_priv);
 	return 0;
 	return 0;
 
 
-unlock:	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+unlock:	spin_unlock_irq(&cm_id_priv->lock);
 deref:	cm_deref_id(cm_id_priv);
 deref:	cm_deref_id(cm_id_priv);
 	return -EINVAL;
 	return -EINVAL;
 }
 }
@@ -2598,7 +2616,6 @@ static int cm_apr_handler(struct cm_work *work)
 {
 {
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct cm_apr_msg *apr_msg;
 	struct cm_apr_msg *apr_msg;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
 	apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
@@ -2612,11 +2629,11 @@ static int cm_apr_handler(struct cm_work *work)
 	work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
 	work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
 	work->cm_event.private_data = &apr_msg->private_data;
 	work->cm_event.private_data = &apr_msg->private_data;
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
 	if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
 	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
 	    (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
 	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
 	     cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		goto out;
 		goto out;
 	}
 	}
 	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
 	cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
@@ -2626,7 +2643,7 @@ static int cm_apr_handler(struct cm_work *work)
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	ret = atomic_inc_and_test(&cm_id_priv->work_count);
 	if (!ret)
 	if (!ret)
 		list_add_tail(&work->list, &cm_id_priv->work_list);
 		list_add_tail(&work->list, &cm_id_priv->work_list);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	if (ret)
 	if (ret)
 		cm_process_work(cm_id_priv, work);
 		cm_process_work(cm_id_priv, work);
@@ -2761,7 +2778,6 @@ static int cm_sidr_req_handler(struct cm_work *work)
 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
 	struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
 	struct cm_sidr_req_msg *sidr_req_msg;
 	struct cm_sidr_req_msg *sidr_req_msg;
 	struct ib_wc *wc;
 	struct ib_wc *wc;
-	unsigned long flags;
 
 
 	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
 	cm_id = ib_create_cm_id(work->port->cm_dev->device, NULL, NULL);
 	if (IS_ERR(cm_id))
 	if (IS_ERR(cm_id))
@@ -2778,27 +2794,26 @@ static int cm_sidr_req_handler(struct cm_work *work)
 				work->mad_recv_wc->recv_buf.grh,
 				work->mad_recv_wc->recv_buf.grh,
 				&cm_id_priv->av);
 				&cm_id_priv->av);
 	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
 	cm_id_priv->id.remote_id = sidr_req_msg->request_id;
-	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
 	cm_id_priv->tid = sidr_req_msg->hdr.tid;
 	cm_id_priv->tid = sidr_req_msg->hdr.tid;
 	atomic_inc(&cm_id_priv->work_count);
 	atomic_inc(&cm_id_priv->work_count);
 
 
-	spin_lock_irqsave(&cm.lock, flags);
+	spin_lock_irq(&cm.lock);
 	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
 	cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
 	if (cur_cm_id_priv) {
 	if (cur_cm_id_priv) {
-		spin_unlock_irqrestore(&cm.lock, flags);
+		spin_unlock_irq(&cm.lock);
 		goto out; /* Duplicate message. */
 		goto out; /* Duplicate message. */
 	}
 	}
+	cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
 	cur_cm_id_priv = cm_find_listen(cm_id->device,
 	cur_cm_id_priv = cm_find_listen(cm_id->device,
 					sidr_req_msg->service_id,
 					sidr_req_msg->service_id,
 					sidr_req_msg->private_data);
 					sidr_req_msg->private_data);
 	if (!cur_cm_id_priv) {
 	if (!cur_cm_id_priv) {
-		rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
-		spin_unlock_irqrestore(&cm.lock, flags);
-		/* todo: reply with no match */
+		spin_unlock_irq(&cm.lock);
+		cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
 		goto out; /* No match. */
 		goto out; /* No match. */
 	}
 	}
 	atomic_inc(&cur_cm_id_priv->refcount);
 	atomic_inc(&cur_cm_id_priv->refcount);
-	spin_unlock_irqrestore(&cm.lock, flags);
+	spin_unlock_irq(&cm.lock);
 
 
 	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
 	cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
 	cm_id_priv->id.context = cur_cm_id_priv->id.context;
 	cm_id_priv->id.context = cur_cm_id_priv->id.context;
@@ -2899,7 +2914,6 @@ static int cm_sidr_rep_handler(struct cm_work *work)
 {
 {
 	struct cm_sidr_rep_msg *sidr_rep_msg;
 	struct cm_sidr_rep_msg *sidr_rep_msg;
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
-	unsigned long flags;
 
 
 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
 	sidr_rep_msg = (struct cm_sidr_rep_msg *)
 				work->mad_recv_wc->recv_buf.mad;
 				work->mad_recv_wc->recv_buf.mad;
@@ -2907,14 +2921,14 @@ static int cm_sidr_rep_handler(struct cm_work *work)
 	if (!cm_id_priv)
 	if (!cm_id_priv)
 		return -EINVAL; /* Unmatched reply. */
 		return -EINVAL; /* Unmatched reply. */
 
 
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
 	if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
-		spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+		spin_unlock_irq(&cm_id_priv->lock);
 		goto out;
 		goto out;
 	}
 	}
 	cm_id_priv->id.state = IB_CM_IDLE;
 	cm_id_priv->id.state = IB_CM_IDLE;
 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
 	ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 
 
 	cm_format_sidr_rep_event(work);
 	cm_format_sidr_rep_event(work);
 	cm_process_work(cm_id_priv, work);
 	cm_process_work(cm_id_priv, work);
@@ -2930,14 +2944,13 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
 	struct cm_id_private *cm_id_priv;
 	struct cm_id_private *cm_id_priv;
 	struct ib_cm_event cm_event;
 	struct ib_cm_event cm_event;
 	enum ib_cm_state state;
 	enum ib_cm_state state;
-	unsigned long flags;
 	int ret;
 	int ret;
 
 
 	memset(&cm_event, 0, sizeof cm_event);
 	memset(&cm_event, 0, sizeof cm_event);
 	cm_id_priv = msg->context[0];
 	cm_id_priv = msg->context[0];
 
 
 	/* Discard old sends or ones without a response. */
 	/* Discard old sends or ones without a response. */
-	spin_lock_irqsave(&cm_id_priv->lock, flags);
+	spin_lock_irq(&cm_id_priv->lock);
 	state = (enum ib_cm_state) (unsigned long) msg->context[1];
 	state = (enum ib_cm_state) (unsigned long) msg->context[1];
 	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
 	if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
 		goto discard;
 		goto discard;
@@ -2964,7 +2977,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
 	default:
 	default:
 		goto discard;
 		goto discard;
 	}
 	}
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 	cm_event.param.send_status = wc_status;
 	cm_event.param.send_status = wc_status;
 
 
 	/* No other events can occur on the cm_id at this point. */
 	/* No other events can occur on the cm_id at this point. */
@@ -2974,7 +2987,7 @@ static void cm_process_send_error(struct ib_mad_send_buf *msg,
 		ib_destroy_cm_id(&cm_id_priv->id);
 		ib_destroy_cm_id(&cm_id_priv->id);
 	return;
 	return;
 discard:
 discard:
-	spin_unlock_irqrestore(&cm_id_priv->lock, flags);
+	spin_unlock_irq(&cm_id_priv->lock);
 	cm_free_msg(msg);
 	cm_free_msg(msg);
 }
 }
 
 
@@ -3269,8 +3282,7 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
 			*qp_attr_mask |= IB_QP_ALT_PATH;
 			*qp_attr_mask |= IB_QP_ALT_PATH;
 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
-			qp_attr->alt_timeout =
-					cm_id_priv->alt_av.packet_life_time + 1;
+			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
 		}
 		}
 		ret = 0;
 		ret = 0;
@@ -3308,8 +3320,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
 				*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
 				*qp_attr_mask |= IB_QP_TIMEOUT | IB_QP_RETRY_CNT |
 						 IB_QP_RNR_RETRY |
 						 IB_QP_RNR_RETRY |
 						 IB_QP_MAX_QP_RD_ATOMIC;
 						 IB_QP_MAX_QP_RD_ATOMIC;
-				qp_attr->timeout =
-					cm_id_priv->av.packet_life_time + 1;
+				qp_attr->timeout = cm_id_priv->av.timeout;
 				qp_attr->retry_cnt = cm_id_priv->retry_count;
 				qp_attr->retry_cnt = cm_id_priv->retry_count;
 				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
 				qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
 				qp_attr->max_rd_atomic =
 				qp_attr->max_rd_atomic =
@@ -3323,8 +3334,7 @@ static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
 			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
 			*qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
 			qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
 			qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
-			qp_attr->alt_timeout =
-				cm_id_priv->alt_av.packet_life_time + 1;
+			qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
 			qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
 			qp_attr->path_mig_state = IB_MIG_REARM;
 			qp_attr->path_mig_state = IB_MIG_REARM;
 		}
 		}
@@ -3364,6 +3374,16 @@ int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
 }
 }
 EXPORT_SYMBOL(ib_cm_init_qp_attr);
 EXPORT_SYMBOL(ib_cm_init_qp_attr);
 
 
+void cm_get_ack_delay(struct cm_device *cm_dev)
+{
+	struct ib_device_attr attr;
+
+	if (ib_query_device(cm_dev->device, &attr))
+		cm_dev->ack_delay = 0; /* acks will rely on packet life time */
+	else
+		cm_dev->ack_delay = attr.local_ca_ack_delay;
+}
+
 static void cm_add_one(struct ib_device *device)
 static void cm_add_one(struct ib_device *device)
 {
 {
 	struct cm_device *cm_dev;
 	struct cm_device *cm_dev;
@@ -3388,6 +3408,7 @@ static void cm_add_one(struct ib_device *device)
 		return;
 		return;
 
 
 	cm_dev->device = device;
 	cm_dev->device = device;
+	cm_get_ack_delay(cm_dev);
 
 
 	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
 	set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
 	for (i = 1; i <= device->phys_port_cnt; i++) {
 	for (i = 1; i <= device->phys_port_cnt; i++) {

+ 1 - 0
drivers/infiniband/core/cm_msgs.h

@@ -35,6 +35,7 @@
 #define CM_MSGS_H
 #define CM_MSGS_H
 
 
 #include <rdma/ib_mad.h>
 #include <rdma/ib_mad.h>
+#include <rdma/ib_cm.h>
 
 
 /*
 /*
  * Parameters to routines below should be in network-byte order, and values
  * Parameters to routines below should be in network-byte order, and values

+ 0 - 1
drivers/infiniband/core/cma.c

@@ -2326,7 +2326,6 @@ static int cma_accept_ib(struct rdma_id_private *id_priv,
 	rep.private_data_len = conn_param->private_data_len;
 	rep.private_data_len = conn_param->private_data_len;
 	rep.responder_resources = conn_param->responder_resources;
 	rep.responder_resources = conn_param->responder_resources;
 	rep.initiator_depth = conn_param->initiator_depth;
 	rep.initiator_depth = conn_param->initiator_depth;
-	rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
 	rep.failover_accepted = 0;
 	rep.failover_accepted = 0;
 	rep.flow_control = conn_param->flow_control;
 	rep.flow_control = conn_param->flow_control;
 	rep.rnr_retry_count = conn_param->rnr_retry_count;
 	rep.rnr_retry_count = conn_param->rnr_retry_count;

+ 41 - 9
drivers/infiniband/core/mad.c

@@ -675,10 +675,16 @@ static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
 	struct ib_mad_port_private *port_priv;
 	struct ib_mad_port_private *port_priv;
 	struct ib_mad_agent_private *recv_mad_agent = NULL;
 	struct ib_mad_agent_private *recv_mad_agent = NULL;
 	struct ib_device *device = mad_agent_priv->agent.device;
 	struct ib_device *device = mad_agent_priv->agent.device;
-	u8 port_num = mad_agent_priv->agent.port_num;
+	u8 port_num;
 	struct ib_wc mad_wc;
 	struct ib_wc mad_wc;
 	struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
 	struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
 
 
+	if (device->node_type == RDMA_NODE_IB_SWITCH &&
+	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+		port_num = send_wr->wr.ud.port_num;
+	else
+		port_num = mad_agent_priv->agent.port_num;
+
 	/*
 	/*
 	 * Directed route handling starts if the initial LID routed part of
 	 * Directed route handling starts if the initial LID routed part of
 	 * a request or the ending LID routed part of a response is empty.
 	 * a request or the ending LID routed part of a response is empty.
@@ -1839,6 +1845,7 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
 	struct ib_mad_private *recv, *response;
 	struct ib_mad_private *recv, *response;
 	struct ib_mad_list_head *mad_list;
 	struct ib_mad_list_head *mad_list;
 	struct ib_mad_agent_private *mad_agent;
 	struct ib_mad_agent_private *mad_agent;
+	int port_num;
 
 
 	response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
 	response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
 	if (!response)
 	if (!response)
@@ -1872,25 +1879,50 @@ static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
 	if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
 	if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
 		goto out;
 		goto out;
 
 
+	if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH)
+		port_num = wc->port_num;
+	else
+		port_num = port_priv->port_num;
+
 	if (recv->mad.mad.mad_hdr.mgmt_class ==
 	if (recv->mad.mad.mad_hdr.mgmt_class ==
 	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
 	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+		enum smi_forward_action retsmi;
+
 		if (smi_handle_dr_smp_recv(&recv->mad.smp,
 		if (smi_handle_dr_smp_recv(&recv->mad.smp,
 					   port_priv->device->node_type,
 					   port_priv->device->node_type,
-					   port_priv->port_num,
+					   port_num,
 					   port_priv->device->phys_port_cnt) ==
 					   port_priv->device->phys_port_cnt) ==
 					   IB_SMI_DISCARD)
 					   IB_SMI_DISCARD)
 			goto out;
 			goto out;
 
 
-		if (smi_check_forward_dr_smp(&recv->mad.smp) == IB_SMI_LOCAL)
+		retsmi = smi_check_forward_dr_smp(&recv->mad.smp);
+		if (retsmi == IB_SMI_LOCAL)
 			goto local;
 			goto local;
 
 
-		if (smi_handle_dr_smp_send(&recv->mad.smp,
-					   port_priv->device->node_type,
-					   port_priv->port_num) == IB_SMI_DISCARD)
-			goto out;
+		if (retsmi == IB_SMI_SEND) { /* don't forward */
+			if (smi_handle_dr_smp_send(&recv->mad.smp,
+						   port_priv->device->node_type,
+						   port_num) == IB_SMI_DISCARD)
+				goto out;
+
+			if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
+				goto out;
+		} else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) {
+			/* forward case for switches */
+			memcpy(response, recv, sizeof(*response));
+			response->header.recv_wc.wc = &response->header.wc;
+			response->header.recv_wc.recv_buf.mad = &response->mad.mad;
+			response->header.recv_wc.recv_buf.grh = &response->grh;
+
+			if (!agent_send_response(&response->mad.mad,
+						 &response->grh, wc,
+						 port_priv->device,
+						 smi_get_fwd_port(&recv->mad.smp),
+						 qp_info->qp->qp_num))
+				response = NULL;
 
 
-		if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
 			goto out;
 			goto out;
+		}
 	}
 	}
 
 
 local:
 local:
@@ -1919,7 +1951,7 @@ local:
 				agent_send_response(&response->mad.mad,
 				agent_send_response(&response->mad.mad,
 						    &recv->grh, wc,
 						    &recv->grh, wc,
 						    port_priv->device,
 						    port_priv->device,
-						    port_priv->port_num,
+						    port_num,
 						    qp_info->qp->qp_num);
 						    qp_info->qp->qp_num);
 				goto out;
 				goto out;
 			}
 			}

+ 1 - 1
drivers/infiniband/core/multicast.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 Intel Corporation.  All rights reserved.
+ * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * licenses.  You may choose to be licensed under the terms of the GNU

+ 1 - 1
drivers/infiniband/core/sa.h

@@ -1,6 +1,6 @@
 /*
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two

+ 51 - 36
drivers/infiniband/core/sa_query.c

@@ -1,6 +1,6 @@
 /*
 /*
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
- * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
+ * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -56,6 +56,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 struct ib_sa_sm_ah {
 struct ib_sa_sm_ah {
 	struct ib_ah        *ah;
 	struct ib_ah        *ah;
 	struct kref          ref;
 	struct kref          ref;
+	u16		     pkey_index;
 	u8		     src_path_mask;
 	u8		     src_path_mask;
 };
 };
 
 
@@ -382,6 +383,13 @@ static void update_sm_ah(struct work_struct *work)
 	kref_init(&new_ah->ref);
 	kref_init(&new_ah->ref);
 	new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
 	new_ah->src_path_mask = (1 << port_attr.lmc) - 1;
 
 
+	new_ah->pkey_index = 0;
+	if (ib_find_pkey(port->agent->device, port->port_num,
+			 IB_DEFAULT_PKEY_FULL, &new_ah->pkey_index) &&
+	    ib_find_pkey(port->agent->device, port->port_num,
+			 IB_DEFAULT_PKEY_PARTIAL, &new_ah->pkey_index))
+		printk(KERN_ERR "Couldn't find index for default PKey\n");
+
 	memset(&ah_attr, 0, sizeof ah_attr);
 	memset(&ah_attr, 0, sizeof ah_attr);
 	ah_attr.dlid     = port_attr.sm_lid;
 	ah_attr.dlid     = port_attr.sm_lid;
 	ah_attr.sl       = port_attr.sm_sl;
 	ah_attr.sl       = port_attr.sm_sl;
@@ -512,6 +520,35 @@ int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
 }
 }
 EXPORT_SYMBOL(ib_init_ah_from_path);
 EXPORT_SYMBOL(ib_init_ah_from_path);
 
 
+static int alloc_mad(struct ib_sa_query *query, gfp_t gfp_mask)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&query->port->ah_lock, flags);
+	kref_get(&query->port->sm_ah->ref);
+	query->sm_ah = query->port->sm_ah;
+	spin_unlock_irqrestore(&query->port->ah_lock, flags);
+
+	query->mad_buf = ib_create_send_mad(query->port->agent, 1,
+					    query->sm_ah->pkey_index,
+					    0, IB_MGMT_SA_HDR, IB_MGMT_SA_DATA,
+					    gfp_mask);
+	if (!query->mad_buf) {
+		kref_put(&query->sm_ah->ref, free_sm_ah);
+		return -ENOMEM;
+	}
+
+	query->mad_buf->ah = query->sm_ah->ah;
+
+	return 0;
+}
+
+static void free_mad(struct ib_sa_query *query)
+{
+	ib_free_send_mad(query->mad_buf);
+	kref_put(&query->sm_ah->ref, free_sm_ah);
+}
+
 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
 {
 {
 	unsigned long flags;
 	unsigned long flags;
@@ -548,20 +585,11 @@ retry:
 	query->mad_buf->context[0] = query;
 	query->mad_buf->context[0] = query;
 	query->id = id;
 	query->id = id;
 
 
-	spin_lock_irqsave(&query->port->ah_lock, flags);
-	kref_get(&query->port->sm_ah->ref);
-	query->sm_ah = query->port->sm_ah;
-	spin_unlock_irqrestore(&query->port->ah_lock, flags);
-
-	query->mad_buf->ah = query->sm_ah->ah;
-
 	ret = ib_post_send_mad(query->mad_buf, NULL);
 	ret = ib_post_send_mad(query->mad_buf, NULL);
 	if (ret) {
 	if (ret) {
 		spin_lock_irqsave(&idr_lock, flags);
 		spin_lock_irqsave(&idr_lock, flags);
 		idr_remove(&query_idr, id);
 		idr_remove(&query_idr, id);
 		spin_unlock_irqrestore(&idr_lock, flags);
 		spin_unlock_irqrestore(&idr_lock, flags);
-
-		kref_put(&query->sm_ah->ref, free_sm_ah);
 	}
 	}
 
 
 	/*
 	/*
@@ -647,13 +675,10 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
 	if (!query)
 	if (!query)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
-						     0, IB_MGMT_SA_HDR,
-						     IB_MGMT_SA_DATA, gfp_mask);
-	if (!query->sa_query.mad_buf) {
-		ret = -ENOMEM;
+	query->sa_query.port     = port;
+	ret = alloc_mad(&query->sa_query, gfp_mask);
+	if (ret)
 		goto err1;
 		goto err1;
-	}
 
 
 	ib_sa_client_get(client);
 	ib_sa_client_get(client);
 	query->sa_query.client = client;
 	query->sa_query.client = client;
@@ -665,7 +690,6 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
 
 
 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
 	query->sa_query.release  = ib_sa_path_rec_release;
 	query->sa_query.release  = ib_sa_path_rec_release;
-	query->sa_query.port     = port;
 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
 	mad->sa_hdr.comp_mask	 = comp_mask;
 	mad->sa_hdr.comp_mask	 = comp_mask;
@@ -683,7 +707,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client,
 err2:
 err2:
 	*sa_query = NULL;
 	*sa_query = NULL;
 	ib_sa_client_put(query->sa_query.client);
 	ib_sa_client_put(query->sa_query.client);
-	ib_free_send_mad(query->sa_query.mad_buf);
+	free_mad(&query->sa_query);
 
 
 err1:
 err1:
 	kfree(query);
 	kfree(query);
@@ -773,13 +797,10 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
 	if (!query)
 	if (!query)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
-						     0, IB_MGMT_SA_HDR,
-						     IB_MGMT_SA_DATA, gfp_mask);
-	if (!query->sa_query.mad_buf) {
-		ret = -ENOMEM;
+	query->sa_query.port     = port;
+	ret = alloc_mad(&query->sa_query, gfp_mask);
+	if (ret)
 		goto err1;
 		goto err1;
-	}
 
 
 	ib_sa_client_get(client);
 	ib_sa_client_get(client);
 	query->sa_query.client = client;
 	query->sa_query.client = client;
@@ -791,7 +812,6 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
 
 
 	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
 	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
 	query->sa_query.release  = ib_sa_service_rec_release;
 	query->sa_query.release  = ib_sa_service_rec_release;
-	query->sa_query.port     = port;
 	mad->mad_hdr.method	 = method;
 	mad->mad_hdr.method	 = method;
 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
 	mad->sa_hdr.comp_mask	 = comp_mask;
 	mad->sa_hdr.comp_mask	 = comp_mask;
@@ -810,7 +830,7 @@ int ib_sa_service_rec_query(struct ib_sa_client *client,
 err2:
 err2:
 	*sa_query = NULL;
 	*sa_query = NULL;
 	ib_sa_client_put(query->sa_query.client);
 	ib_sa_client_put(query->sa_query.client);
-	ib_free_send_mad(query->sa_query.mad_buf);
+	free_mad(&query->sa_query);
 
 
 err1:
 err1:
 	kfree(query);
 	kfree(query);
@@ -869,13 +889,10 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
 	if (!query)
 	if (!query)
 		return -ENOMEM;
 		return -ENOMEM;
 
 
-	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
-						     0, IB_MGMT_SA_HDR,
-						     IB_MGMT_SA_DATA, gfp_mask);
-	if (!query->sa_query.mad_buf) {
-		ret = -ENOMEM;
+	query->sa_query.port     = port;
+	ret = alloc_mad(&query->sa_query, gfp_mask);
+	if (ret)
 		goto err1;
 		goto err1;
-	}
 
 
 	ib_sa_client_get(client);
 	ib_sa_client_get(client);
 	query->sa_query.client = client;
 	query->sa_query.client = client;
@@ -887,7 +904,6 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
 
 
 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
 	query->sa_query.release  = ib_sa_mcmember_rec_release;
 	query->sa_query.release  = ib_sa_mcmember_rec_release;
-	query->sa_query.port     = port;
 	mad->mad_hdr.method	 = method;
 	mad->mad_hdr.method	 = method;
 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
 	mad->sa_hdr.comp_mask	 = comp_mask;
 	mad->sa_hdr.comp_mask	 = comp_mask;
@@ -906,7 +922,7 @@ int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
 err2:
 err2:
 	*sa_query = NULL;
 	*sa_query = NULL;
 	ib_sa_client_put(query->sa_query.client);
 	ib_sa_client_put(query->sa_query.client);
-	ib_free_send_mad(query->sa_query.mad_buf);
+	free_mad(&query->sa_query);
 
 
 err1:
 err1:
 	kfree(query);
 	kfree(query);
@@ -939,8 +955,7 @@ static void send_handler(struct ib_mad_agent *agent,
 	idr_remove(&query_idr, query->id);
 	idr_remove(&query_idr, query->id);
 	spin_unlock_irqrestore(&idr_lock, flags);
 	spin_unlock_irqrestore(&idr_lock, flags);
 
 
-	ib_free_send_mad(mad_send_wc->send_buf);
-	kref_put(&query->sm_ah->ref, free_sm_ah);
+	free_mad(query);
 	ib_sa_client_put(query->client);
 	ib_sa_client_put(query->client);
 	query->release(query);
 	query->release(query);
 }
 }

+ 13 - 3
drivers/infiniband/core/smi.c

@@ -192,7 +192,7 @@ enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
 			}
 			}
 			/* smp->hop_ptr updated when sending */
 			/* smp->hop_ptr updated when sending */
 			return (node_type == RDMA_NODE_IB_SWITCH ?
 			return (node_type == RDMA_NODE_IB_SWITCH ?
-				IB_SMI_HANDLE: IB_SMI_DISCARD);
+				IB_SMI_HANDLE : IB_SMI_DISCARD);
 		}
 		}
 
 
 		/* C14-13:4 -- hop_ptr = 0 -> give to SM */
 		/* C14-13:4 -- hop_ptr = 0 -> give to SM */
@@ -211,7 +211,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
 	if (!ib_get_smp_direction(smp)) {
 	if (!ib_get_smp_direction(smp)) {
 		/* C14-9:2 -- intermediate hop */
 		/* C14-9:2 -- intermediate hop */
 		if (hop_ptr && hop_ptr < hop_cnt)
 		if (hop_ptr && hop_ptr < hop_cnt)
-			return IB_SMI_SEND;
+			return IB_SMI_FORWARD;
 
 
 		/* C14-9:3 -- at the end of the DR segment of path */
 		/* C14-9:3 -- at the end of the DR segment of path */
 		if (hop_ptr == hop_cnt)
 		if (hop_ptr == hop_cnt)
@@ -224,7 +224,7 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
 	} else {
 	} else {
 		/* C14-13:2  -- intermediate hop */
 		/* C14-13:2  -- intermediate hop */
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt)
 		if (2 <= hop_ptr && hop_ptr <= hop_cnt)
-			return IB_SMI_SEND;
+			return IB_SMI_FORWARD;
 
 
 		/* C14-13:3 -- at the end of the DR segment of path */
 		/* C14-13:3 -- at the end of the DR segment of path */
 		if (hop_ptr == 1)
 		if (hop_ptr == 1)
@@ -233,3 +233,13 @@ enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp)
 	}
 	}
 	return IB_SMI_LOCAL;
 	return IB_SMI_LOCAL;
 }
 }
+
+/*
+ * Return the forwarding port number from initial_path for outgoing SMP and
+ * from return_path for returning SMP
+ */
+int smi_get_fwd_port(struct ib_smp *smp)
+{
+	return (!ib_get_smp_direction(smp) ? smp->initial_path[smp->hop_ptr+1] :
+		smp->return_path[smp->hop_ptr-1]);
+}

+ 2 - 0
drivers/infiniband/core/smi.h

@@ -48,10 +48,12 @@ enum smi_action {
 enum smi_forward_action {
 enum smi_forward_action {
 	IB_SMI_LOCAL,	/* SMP should be completed up the stack */
 	IB_SMI_LOCAL,	/* SMP should be completed up the stack */
 	IB_SMI_SEND,	/* received DR SMP should be forwarded to the send queue */
 	IB_SMI_SEND,	/* received DR SMP should be forwarded to the send queue */
+	IB_SMI_FORWARD	/* SMP should be forwarded (for switches only) */
 };
 };
 
 
 enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
 enum smi_action smi_handle_dr_smp_recv(struct ib_smp *smp, u8 node_type,
 				       int port_num, int phys_port_cnt);
 				       int port_num, int phys_port_cnt);
+int smi_get_fwd_port(struct ib_smp *smp);
 extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
 extern enum smi_forward_action smi_check_forward_dr_smp(struct ib_smp *smp);
 extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
 extern enum smi_action smi_handle_dr_smp_send(struct ib_smp *smp,
 					      u8 node_type, int port_num);
 					      u8 node_type, int port_num);

+ 1 - 1
drivers/infiniband/core/sysfs.c

@@ -311,7 +311,7 @@ static ssize_t show_pma_counter(struct ib_port *p, struct port_attribute *attr,
 		return sprintf(buf, "N/A (no PMA)\n");
 		return sprintf(buf, "N/A (no PMA)\n");
 
 
 	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
 	in_mad  = kzalloc(sizeof *in_mad, GFP_KERNEL);
-	out_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
+	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
 	if (!in_mad || !out_mad) {
 	if (!in_mad || !out_mad) {
 		ret = -ENOMEM;
 		ret = -ENOMEM;
 		goto out;
 		goto out;

+ 0 - 1
drivers/infiniband/core/ucm.c

@@ -823,7 +823,6 @@ static ssize_t ib_ucm_send_rep(struct ib_ucm_file *file,
 	param.private_data_len    = cmd.len;
 	param.private_data_len    = cmd.len;
 	param.responder_resources = cmd.responder_resources;
 	param.responder_resources = cmd.responder_resources;
 	param.initiator_depth     = cmd.initiator_depth;
 	param.initiator_depth     = cmd.initiator_depth;
-	param.target_ack_delay    = cmd.target_ack_delay;
 	param.failover_accepted   = cmd.failover_accepted;
 	param.failover_accepted   = cmd.failover_accepted;
 	param.flow_control        = cmd.flow_control;
 	param.flow_control        = cmd.flow_control;
 	param.rnr_retry_count     = cmd.rnr_retry_count;
 	param.rnr_retry_count     = cmd.rnr_retry_count;

+ 1 - 0
drivers/infiniband/core/umem.c

@@ -121,6 +121,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 
 
 	cur_base = addr & PAGE_MASK;
 	cur_base = addr & PAGE_MASK;
 
 
+	ret = 0;
 	while (npages) {
 	while (npages) {
 		ret = get_user_pages(current, current->mm, cur_base,
 		ret = get_user_pages(current, current->mm, cur_base,
 				     min_t(int, npages,
 				     min_t(int, npages,

+ 1 - 1
drivers/infiniband/hw/amso1100/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_AMSO1100
 config INFINIBAND_AMSO1100
 	tristate "Ammasso 1100 HCA support"
 	tristate "Ammasso 1100 HCA support"
-	depends on PCI && INET && INFINIBAND
+	depends on PCI && INET
 	---help---
 	---help---
 	  This is a low-level driver for the Ammasso 1100 host
 	  This is a low-level driver for the Ammasso 1100 host
 	  channel adapter (HCA).
 	  channel adapter (HCA).

+ 1 - 1
drivers/infiniband/hw/cxgb3/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_CXGB3
 config INFINIBAND_CXGB3
 	tristate "Chelsio RDMA Driver"
 	tristate "Chelsio RDMA Driver"
-	depends on CHELSIO_T3 && INFINIBAND && INET
+	depends on CHELSIO_T3 && INET
 	select GENERIC_ALLOCATOR
 	select GENERIC_ALLOCATOR
 	---help---
 	---help---
 	  This is an iWARP/RDMA driver for the Chelsio T3 1GbE and
 	  This is an iWARP/RDMA driver for the Chelsio T3 1GbE and

+ 3 - 3
drivers/infiniband/hw/cxgb3/cxio_hal.c

@@ -144,7 +144,7 @@ static int cxio_hal_clear_qp_ctx(struct cxio_rdev *rdev_p, u32 qpid)
 	}
 	}
 	wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
 	wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
 	memset(wqe, 0, sizeof(*wqe));
 	memset(wqe, 0, sizeof(*wqe));
-	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 1, qpid, 7);
+	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 3, 0, qpid, 7);
 	wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
 	wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
 	sge_cmd = qpid << 8 | 3;
 	sge_cmd = qpid << 8 | 3;
 	wqe->sge_cmd = cpu_to_be64(sge_cmd);
 	wqe->sge_cmd = cpu_to_be64(sge_cmd);
@@ -548,7 +548,7 @@ static int cxio_hal_init_ctrl_qp(struct cxio_rdev *rdev_p)
 			V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
 			V_EC_UP_TOKEN(T3_CTL_QP_TID) | F_EC_VALID)) << 32;
 	wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
 	wqe = (struct t3_modify_qp_wr *) skb_put(skb, sizeof(*wqe));
 	memset(wqe, 0, sizeof(*wqe));
 	memset(wqe, 0, sizeof(*wqe));
-	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 1,
+	build_fw_riwrh((struct fw_riwrh *) wqe, T3_WR_QP_MOD, 0, 0,
 		       T3_CTL_QP_TID, 7);
 		       T3_CTL_QP_TID, 7);
 	wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
 	wqe->flags = cpu_to_be32(MODQP_WRITE_EC);
 	sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
 	sge_cmd = (3ULL << 56) | FW_RI_SGEEC_START << 8 | 3;
@@ -833,7 +833,7 @@ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
 	wqe->ird = cpu_to_be32(attr->ird);
 	wqe->ird = cpu_to_be32(attr->ird);
 	wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
 	wqe->qp_dma_addr = cpu_to_be64(attr->qp_dma_addr);
 	wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
 	wqe->qp_dma_size = cpu_to_be32(attr->qp_dma_size);
-	wqe->rsvd = 0;
+	wqe->irs = cpu_to_be32(attr->irs);
 	skb->priority = 0;	/* 0=>ToeQ; 1=>CtrlQ */
 	skb->priority = 0;	/* 0=>ToeQ; 1=>CtrlQ */
 	return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
 	return (cxgb3_ofld_send(rdev_p->t3cdev_p, skb));
 }
 }

+ 2 - 1
drivers/infiniband/hw/cxgb3/cxio_wr.h

@@ -294,6 +294,7 @@ struct t3_rdma_init_attr {
 	u64 qp_dma_addr;
 	u64 qp_dma_addr;
 	u32 qp_dma_size;
 	u32 qp_dma_size;
 	u32 flags;
 	u32 flags;
+	u32 irs;
 };
 };
 
 
 struct t3_rdma_init_wr {
 struct t3_rdma_init_wr {
@@ -314,7 +315,7 @@ struct t3_rdma_init_wr {
 	__be32 ird;
 	__be32 ird;
 	__be64 qp_dma_addr;	/* 7 */
 	__be64 qp_dma_addr;	/* 7 */
 	__be32 qp_dma_size;	/* 8 */
 	__be32 qp_dma_size;	/* 8 */
-	u32 rsvd;
+	u32 irs;
 };
 };
 
 
 struct t3_genbit {
 struct t3_genbit {

+ 48 - 60
drivers/infiniband/hw/cxgb3/iwch_cm.c

@@ -254,8 +254,6 @@ static void release_ep_resources(struct iwch_ep *ep)
 	cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
 	cxgb3_remove_tid(ep->com.tdev, (void *)ep, ep->hwtid);
 	dst_release(ep->dst);
 	dst_release(ep->dst);
 	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
 	l2t_release(L2DATA(ep->com.tdev), ep->l2t);
-	if (ep->com.tdev->type == T3B)
-		release_tid(ep->com.tdev, ep->hwtid, NULL);
 	put_ep(&ep->com);
 	put_ep(&ep->com);
 }
 }
 
 
@@ -515,7 +513,7 @@ static void send_mpa_req(struct iwch_ep *ep, struct sk_buff *skb)
 	req->len = htonl(len);
 	req->len = htonl(len);
 	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 			   V_TX_SNDBUF(snd_win>>15));
 			   V_TX_SNDBUF(snd_win>>15));
-	req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
+	req->flags = htonl(F_TX_INIT);
 	req->sndseq = htonl(ep->snd_seq);
 	req->sndseq = htonl(ep->snd_seq);
 	BUG_ON(ep->mpa_skb);
 	BUG_ON(ep->mpa_skb);
 	ep->mpa_skb = skb;
 	ep->mpa_skb = skb;
@@ -566,7 +564,7 @@ static int send_mpa_reject(struct iwch_ep *ep, const void *pdata, u8 plen)
 	req->len = htonl(mpalen);
 	req->len = htonl(mpalen);
 	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 			   V_TX_SNDBUF(snd_win>>15));
 			   V_TX_SNDBUF(snd_win>>15));
-	req->flags = htonl(F_TX_IMM_ACK|F_TX_INIT);
+	req->flags = htonl(F_TX_INIT);
 	req->sndseq = htonl(ep->snd_seq);
 	req->sndseq = htonl(ep->snd_seq);
 	BUG_ON(ep->mpa_skb);
 	BUG_ON(ep->mpa_skb);
 	ep->mpa_skb = skb;
 	ep->mpa_skb = skb;
@@ -618,7 +616,7 @@ static int send_mpa_reply(struct iwch_ep *ep, const void *pdata, u8 plen)
 	req->len = htonl(len);
 	req->len = htonl(len);
 	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 	req->param = htonl(V_TX_PORT(ep->l2t->smt_idx) |
 			   V_TX_SNDBUF(snd_win>>15));
 			   V_TX_SNDBUF(snd_win>>15));
-	req->flags = htonl(F_TX_MORE | F_TX_IMM_ACK | F_TX_INIT);
+	req->flags = htonl(F_TX_INIT);
 	req->sndseq = htonl(ep->snd_seq);
 	req->sndseq = htonl(ep->snd_seq);
 	ep->mpa_skb = skb;
 	ep->mpa_skb = skb;
 	state_set(&ep->com, MPA_REP_SENT);
 	state_set(&ep->com, MPA_REP_SENT);
@@ -641,6 +639,7 @@ static int act_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
 	cxgb3_insert_tid(ep->com.tdev, &t3c_client, ep, tid);
 
 
 	ep->snd_seq = ntohl(req->snd_isn);
 	ep->snd_seq = ntohl(req->snd_isn);
+	ep->rcv_seq = ntohl(req->rcv_isn);
 
 
 	set_emss(ep, ntohs(req->tcp_opt));
 	set_emss(ep, ntohs(req->tcp_opt));
 
 
@@ -1023,6 +1022,9 @@ static int rx_data(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	skb_pull(skb, sizeof(*hdr));
 	skb_pull(skb, sizeof(*hdr));
 	skb_trim(skb, dlen);
 	skb_trim(skb, dlen);
 
 
+	ep->rcv_seq += dlen;
+	BUG_ON(ep->rcv_seq != (ntohl(hdr->seq) + dlen));
+
 	switch (state_read(&ep->com)) {
 	switch (state_read(&ep->com)) {
 	case MPA_REQ_SENT:
 	case MPA_REQ_SENT:
 		process_mpa_reply(ep, skb);
 		process_mpa_reply(ep, skb);
@@ -1060,7 +1062,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	struct iwch_ep *ep = ctx;
 	struct iwch_ep *ep = ctx;
 	struct cpl_wr_ack *hdr = cplhdr(skb);
 	struct cpl_wr_ack *hdr = cplhdr(skb);
 	unsigned int credits = ntohs(hdr->credits);
 	unsigned int credits = ntohs(hdr->credits);
-	enum iwch_qp_attr_mask  mask;
 
 
 	PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
 	PDBG("%s ep %p credits %u\n", __FUNCTION__, ep, credits);
 
 
@@ -1072,30 +1073,6 @@ static int tx_ack(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	ep->mpa_skb = NULL;
 	ep->mpa_skb = NULL;
 	dst_confirm(ep->dst);
 	dst_confirm(ep->dst);
 	if (state_read(&ep->com) == MPA_REP_SENT) {
 	if (state_read(&ep->com) == MPA_REP_SENT) {
-		struct iwch_qp_attributes attrs;
-
-		/* bind QP to EP and move to RTS */
-		attrs.mpa_attr = ep->mpa_attr;
-		attrs.max_ird = ep->ord;
-		attrs.max_ord = ep->ord;
-		attrs.llp_stream_handle = ep;
-		attrs.next_state = IWCH_QP_STATE_RTS;
-
-		/* bind QP and TID with INIT_WR */
-		mask = IWCH_QP_ATTR_NEXT_STATE |
-				     IWCH_QP_ATTR_LLP_STREAM_HANDLE |
-				     IWCH_QP_ATTR_MPA_ATTR |
-				     IWCH_QP_ATTR_MAX_IRD |
-				     IWCH_QP_ATTR_MAX_ORD;
-
-		ep->com.rpl_err = iwch_modify_qp(ep->com.qp->rhp,
-				     ep->com.qp, mask, &attrs, 1);
-
-		if (!ep->com.rpl_err) {
-			state_set(&ep->com, FPDU_MODE);
-			established_upcall(ep);
-		}
-
 		ep->com.rpl_done = 1;
 		ep->com.rpl_done = 1;
 		PDBG("waking up ep %p\n", ep);
 		PDBG("waking up ep %p\n", ep);
 		wake_up(&ep->com.waitq);
 		wake_up(&ep->com.waitq);
@@ -1124,6 +1101,15 @@ static int abort_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	return CPL_RET_BUF_DONE;
 	return CPL_RET_BUF_DONE;
 }
 }
 
 
+/*
+ * Return whether a failed active open has allocated a TID
+ */
+static inline int act_open_has_tid(int status)
+{
+	return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST &&
+	       status != CPL_ERR_ARP_MISS;
+}
+
 static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 {
 {
 	struct iwch_ep *ep = ctx;
 	struct iwch_ep *ep = ctx;
@@ -1133,7 +1119,7 @@ static int act_open_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	     status2errno(rpl->status));
 	     status2errno(rpl->status));
 	connect_reply_upcall(ep, status2errno(rpl->status));
 	connect_reply_upcall(ep, status2errno(rpl->status));
 	state_set(&ep->com, DEAD);
 	state_set(&ep->com, DEAD);
-	if (ep->com.tdev->type == T3B)
+	if (ep->com.tdev->type == T3B && act_open_has_tid(rpl->status))
 		release_tid(ep->com.tdev, GET_TID(rpl), NULL);
 		release_tid(ep->com.tdev, GET_TID(rpl), NULL);
 	cxgb3_free_atid(ep->com.tdev, ep->atid);
 	cxgb3_free_atid(ep->com.tdev, ep->atid);
 	dst_release(ep->dst);
 	dst_release(ep->dst);
@@ -1378,6 +1364,7 @@ static int pass_establish(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 
 
 	PDBG("%s ep %p\n", __FUNCTION__, ep);
 	PDBG("%s ep %p\n", __FUNCTION__, ep);
 	ep->snd_seq = ntohl(req->snd_isn);
 	ep->snd_seq = ntohl(req->snd_isn);
+	ep->rcv_seq = ntohl(req->rcv_isn);
 
 
 	set_emss(ep, ntohs(req->tcp_opt));
 	set_emss(ep, ntohs(req->tcp_opt));
 
 
@@ -1485,6 +1472,13 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 	int ret;
 	int ret;
 	int state;
 	int state;
 
 
+	if (is_neg_adv_abort(req->status)) {
+		PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
+		     ep->hwtid);
+		t3_l2t_send_event(ep->com.tdev, ep->l2t);
+		return CPL_RET_BUF_DONE;
+	}
+
 	/*
 	/*
 	 * We get 2 peer aborts from the HW.  The first one must
 	 * We get 2 peer aborts from the HW.  The first one must
 	 * be ignored except for scribbling that we need one more.
 	 * be ignored except for scribbling that we need one more.
@@ -1494,13 +1488,6 @@ static int peer_abort(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
 		return CPL_RET_BUF_DONE;
 		return CPL_RET_BUF_DONE;
 	}
 	}
 
 
-	if (is_neg_adv_abort(req->status)) {
-		PDBG("%s neg_adv_abort ep %p tid %d\n", __FUNCTION__, ep,
-		     ep->hwtid);
-		t3_l2t_send_event(ep->com.tdev, ep->l2t);
-		return CPL_RET_BUF_DONE;
-	}
-
 	state = state_read(&ep->com);
 	state = state_read(&ep->com);
 	PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state);
 	PDBG("%s ep %p state %u\n", __FUNCTION__, ep, state);
 	switch (state) {
 	switch (state) {
@@ -1732,10 +1719,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
 	struct iwch_qp *qp = get_qhp(h, conn_param->qpn);
 
 
 	PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
 	PDBG("%s ep %p tid %u\n", __FUNCTION__, ep, ep->hwtid);
-	if (state_read(&ep->com) == DEAD) {
-		put_ep(&ep->com);
+	if (state_read(&ep->com) == DEAD)
 		return -ECONNRESET;
 		return -ECONNRESET;
-	}
 
 
 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
 	BUG_ON(state_read(&ep->com) != MPA_REQ_RCVD);
 	BUG_ON(!qp);
 	BUG_ON(!qp);
@@ -1755,17 +1740,8 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 	ep->ird = conn_param->ird;
 	ep->ird = conn_param->ird;
 	ep->ord = conn_param->ord;
 	ep->ord = conn_param->ord;
 	PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord);
 	PDBG("%s %d ird %d ord %d\n", __FUNCTION__, __LINE__, ep->ird, ep->ord);
+
 	get_ep(&ep->com);
 	get_ep(&ep->com);
-	err = send_mpa_reply(ep, conn_param->private_data,
-			     conn_param->private_data_len);
-	if (err) {
-		ep->com.cm_id = NULL;
-		ep->com.qp = NULL;
-		cm_id->rem_ref(cm_id);
-		abort_connection(ep, NULL, GFP_KERNEL);
-		put_ep(&ep->com);
-		return err;
-	}
 
 
 	/* bind QP to EP and move to RTS */
 	/* bind QP to EP and move to RTS */
 	attrs.mpa_attr = ep->mpa_attr;
 	attrs.mpa_attr = ep->mpa_attr;
@@ -1783,16 +1759,28 @@ int iwch_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
 
 
 	err = iwch_modify_qp(ep->com.qp->rhp,
 	err = iwch_modify_qp(ep->com.qp->rhp,
 			     ep->com.qp, mask, &attrs, 1);
 			     ep->com.qp, mask, &attrs, 1);
+	if (err)
+		goto err;
 
 
-	if (err) {
-		ep->com.cm_id = NULL;
-		ep->com.qp = NULL;
-		cm_id->rem_ref(cm_id);
-		abort_connection(ep, NULL, GFP_KERNEL);
-	} else {
-		state_set(&ep->com, FPDU_MODE);
-		established_upcall(ep);
-	}
+	err = send_mpa_reply(ep, conn_param->private_data,
+			     conn_param->private_data_len);
+	if (err)
+		goto err;
+
+	/* wait for wr_ack */
+	wait_event(ep->com.waitq, ep->com.rpl_done);
+	err = ep->com.rpl_err;
+	if (err)
+		goto err;
+
+	state_set(&ep->com, FPDU_MODE);
+	established_upcall(ep);
+	put_ep(&ep->com);
+	return 0;
+err:
+	ep->com.cm_id = NULL;
+	ep->com.qp = NULL;
+	cm_id->rem_ref(cm_id);
 	put_ep(&ep->com);
 	put_ep(&ep->com);
 	return err;
 	return err;
 }
 }

+ 1 - 0
drivers/infiniband/hw/cxgb3/iwch_cm.h

@@ -175,6 +175,7 @@ struct iwch_ep {
 	unsigned int atid;
 	unsigned int atid;
 	u32 hwtid;
 	u32 hwtid;
 	u32 snd_seq;
 	u32 snd_seq;
+	u32 rcv_seq;
 	struct l2t_entry *l2t;
 	struct l2t_entry *l2t;
 	struct dst_entry *dst;
 	struct dst_entry *dst;
 	struct sk_buff *mpa_skb;
 	struct sk_buff *mpa_skb;

+ 4 - 3
drivers/infiniband/hw/cxgb3/iwch_provider.c

@@ -1163,9 +1163,10 @@ int iwch_register_device(struct iwch_dev *dev)
 	dev->ibdev.post_recv = iwch_post_receive;
 	dev->ibdev.post_recv = iwch_post_receive;
 
 
 
 
-	dev->ibdev.iwcm =
-	    (struct iw_cm_verbs *) kmalloc(sizeof(struct iw_cm_verbs),
-					   GFP_KERNEL);
+	dev->ibdev.iwcm = kmalloc(sizeof(struct iw_cm_verbs), GFP_KERNEL);
+	if (!dev->ibdev.iwcm)
+		return -ENOMEM;
+
 	dev->ibdev.iwcm->connect = iwch_connect;
 	dev->ibdev.iwcm->connect = iwch_connect;
 	dev->ibdev.iwcm->accept = iwch_accept_cr;
 	dev->ibdev.iwcm->accept = iwch_accept_cr;
 	dev->ibdev.iwcm->reject = iwch_reject_cr;
 	dev->ibdev.iwcm->reject = iwch_reject_cr;

+ 4 - 3
drivers/infiniband/hw/cxgb3/iwch_qp.c

@@ -628,9 +628,9 @@ int iwch_post_terminate(struct iwch_qp *qhp, struct respQ_msg_t *rsp_msg)
 	/* immediate data starts here. */
 	/* immediate data starts here. */
 	term = (struct terminate_message *)wqe->send.sgl;
 	term = (struct terminate_message *)wqe->send.sgl;
 	build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
 	build_term_codes(rsp_msg, &term->layer_etype, &term->ecode);
-	build_fw_riwrh((void *)wqe, T3_WR_SEND,
-		       T3_COMPLETION_FLAG | T3_NOTIFY_FLAG, 1,
-		       qhp->ep->hwtid, 5);
+	wqe->send.wrh.op_seop_flags = cpu_to_be32(V_FW_RIWR_OP(T3_WR_SEND) |
+			 V_FW_RIWR_FLAGS(T3_COMPLETION_FLAG | T3_NOTIFY_FLAG));
+	wqe->send.wrh.gen_tid_len = cpu_to_be32(V_FW_RIWR_TID(qhp->ep->hwtid));
 	skb->priority = CPL_PRIORITY_DATA;
 	skb->priority = CPL_PRIORITY_DATA;
 	return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
 	return cxgb3_ofld_send(qhp->rhp->rdev.t3cdev_p, skb);
 }
 }
@@ -732,6 +732,7 @@ static int rdma_init(struct iwch_dev *rhp, struct iwch_qp *qhp,
 	init_attr.qp_dma_addr = qhp->wq.dma_addr;
 	init_attr.qp_dma_addr = qhp->wq.dma_addr;
 	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
 	init_attr.qp_dma_size = (1UL << qhp->wq.size_log2);
 	init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
 	init_attr.flags = rqes_posted(qhp) ? RECVS_POSTED : 0;
+	init_attr.irs = qhp->ep->rcv_seq;
 	PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
 	PDBG("%s init_attr.rq_addr 0x%x init_attr.rq_size = %d "
 	     "flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
 	     "flags 0x%x qpcaps 0x%x\n", __FUNCTION__,
 	     init_attr.rq_addr, init_attr.rq_size,
 	     init_attr.rq_addr, init_attr.rq_size,

+ 1 - 1
drivers/infiniband/hw/ehca/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_EHCA
 config INFINIBAND_EHCA
 	tristate "eHCA support"
 	tristate "eHCA support"
-	depends on IBMEBUS && INFINIBAND
+	depends on IBMEBUS
 	---help---
 	---help---
 	This driver supports the IBM pSeries eHCA InfiniBand adapter.
 	This driver supports the IBM pSeries eHCA InfiniBand adapter.
 
 

+ 4 - 2
drivers/infiniband/hw/ehca/ehca_av.c

@@ -118,7 +118,7 @@ struct ib_ah *ehca_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr)
 		}
 		}
 		memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
 		memcpy(&av->av.grh.word_1, &gid, sizeof(gid));
 	}
 	}
-	av->av.pmtu = EHCA_MAX_MTU;
+	av->av.pmtu = shca->max_mtu;
 
 
 	/* dgid comes in grh.word_3 */
 	/* dgid comes in grh.word_3 */
 	memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
 	memcpy(&av->av.grh.word_3, &ah_attr->grh.dgid,
@@ -137,6 +137,8 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
 	struct ehca_av *av;
 	struct ehca_av *av;
 	struct ehca_ud_av new_ehca_av;
 	struct ehca_ud_av new_ehca_av;
 	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
 	struct ehca_pd *my_pd = container_of(ah->pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca = container_of(ah->pd->device, struct ehca_shca,
+					      ib_device);
 	u32 cur_pid = current->tgid;
 	u32 cur_pid = current->tgid;
 
 
 	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
 	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
@@ -192,7 +194,7 @@ int ehca_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
 		memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
 		memcpy(&new_ehca_av.grh.word_1, &gid, sizeof(gid));
 	}
 	}
 
 
-	new_ehca_av.pmtu = EHCA_MAX_MTU;
+	new_ehca_av.pmtu = shca->max_mtu;
 
 
 	memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
 	memcpy(&new_ehca_av.grh.word_3, &ah_attr->grh.dgid,
 	       sizeof(ah_attr->grh.dgid));
 	       sizeof(ah_attr->grh.dgid));

+ 62 - 13
drivers/infiniband/hw/ehca/ehca_classes.h

@@ -5,6 +5,7 @@
  *
  *
  *  Authors: Heiko J Schick <schickhj@de.ibm.com>
  *  Authors: Heiko J Schick <schickhj@de.ibm.com>
  *           Christoph Raisch <raisch@de.ibm.com>
  *           Christoph Raisch <raisch@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
  *
  *
  *  Copyright (c) 2005 IBM Corporation
  *  Copyright (c) 2005 IBM Corporation
  *
  *
@@ -86,11 +87,17 @@ struct ehca_eq {
 	struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
 	struct ehca_eqe_cache_entry eqe_cache[EHCA_EQE_CACHE_SIZE];
 };
 };
 
 
+struct ehca_sma_attr {
+	u16 lid, lmc, sm_sl, sm_lid;
+	u16 pkey_tbl_len, pkeys[16];
+};
+
 struct ehca_sport {
 struct ehca_sport {
 	struct ib_cq *ibcq_aqp1;
 	struct ib_cq *ibcq_aqp1;
 	struct ib_qp *ibqp_aqp1;
 	struct ib_qp *ibqp_aqp1;
 	enum ib_rate  rate;
 	enum ib_rate  rate;
 	enum ib_port_state port_state;
 	enum ib_port_state port_state;
+	struct ehca_sma_attr saved_attr;
 };
 };
 
 
 struct ehca_shca {
 struct ehca_shca {
@@ -107,6 +114,8 @@ struct ehca_shca {
 	struct ehca_pd *pd;
 	struct ehca_pd *pd;
 	struct h_galpas galpas;
 	struct h_galpas galpas;
 	struct mutex modify_mutex;
 	struct mutex modify_mutex;
+	u64 hca_cap;
+	int max_mtu;
 };
 };
 
 
 struct ehca_pd {
 struct ehca_pd {
@@ -115,9 +124,20 @@ struct ehca_pd {
 	u32 ownpid;
 	u32 ownpid;
 };
 };
 
 
+enum ehca_ext_qp_type {
+	EQPT_NORMAL    = 0,
+	EQPT_LLQP      = 1,
+	EQPT_SRQBASE   = 2,
+	EQPT_SRQ       = 3,
+};
+
 struct ehca_qp {
 struct ehca_qp {
-	struct ib_qp ib_qp;
+	union {
+		struct ib_qp ib_qp;
+		struct ib_srq ib_srq;
+	};
 	u32 qp_type;
 	u32 qp_type;
+	enum ehca_ext_qp_type ext_type;
 	struct ipz_queue ipz_squeue;
 	struct ipz_queue ipz_squeue;
 	struct ipz_queue ipz_rqueue;
 	struct ipz_queue ipz_rqueue;
 	struct h_galpas galpas;
 	struct h_galpas galpas;
@@ -140,6 +160,10 @@ struct ehca_qp {
 	u32 mm_count_galpa;
 	u32 mm_count_galpa;
 };
 };
 
 
+#define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
+#define HAS_SQ(qp) (qp->ext_type != EQPT_SRQ)
+#define HAS_RQ(qp) (qp->ext_type != EQPT_SRQBASE)
+
 /* must be power of 2 */
 /* must be power of 2 */
 #define QP_HASHTAB_LEN 8
 #define QP_HASHTAB_LEN 8
 
 
@@ -156,8 +180,8 @@ struct ehca_cq {
 	spinlock_t cb_lock;
 	spinlock_t cb_lock;
 	struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
 	struct hlist_head qp_hashtab[QP_HASHTAB_LEN];
 	struct list_head entry;
 	struct list_head entry;
-	u32 nr_callbacks; /* #events assigned to cpu by scaling code */
-	u32 nr_events;    /* #events seen */
+	u32 nr_callbacks;   /* #events assigned to cpu by scaling code */
+	atomic_t nr_events; /* #events seen */
 	wait_queue_head_t wait_completion;
 	wait_queue_head_t wait_completion;
 	spinlock_t task_lock;
 	spinlock_t task_lock;
 	u32 ownpid;
 	u32 ownpid;
@@ -275,9 +299,8 @@ void ehca_cleanup_av_cache(void);
 int ehca_init_mrmw_cache(void);
 int ehca_init_mrmw_cache(void);
 void ehca_cleanup_mrmw_cache(void);
 void ehca_cleanup_mrmw_cache(void);
 
 
-extern spinlock_t ehca_qp_idr_lock;
-extern spinlock_t ehca_cq_idr_lock;
-extern spinlock_t hcall_lock;
+extern rwlock_t ehca_qp_idr_lock;
+extern rwlock_t ehca_cq_idr_lock;
 extern struct idr ehca_qp_idr;
 extern struct idr ehca_qp_idr;
 extern struct idr ehca_cq_idr;
 extern struct idr ehca_cq_idr;
 
 
@@ -305,6 +328,7 @@ struct ehca_create_qp_resp {
 	u32 qp_num;
 	u32 qp_num;
 	u32 token;
 	u32 token;
 	u32 qp_type;
 	u32 qp_type;
+	u32 ext_type;
 	u32 qkey;
 	u32 qkey;
 	/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
 	/* qp_num assigned by ehca: sqp0/1 may have got different numbers */
 	u32 real_qp_num;
 	u32 real_qp_num;
@@ -320,14 +344,42 @@ struct ehca_alloc_cq_parms {
 	struct ipz_eq_handle eq_handle;
 	struct ipz_eq_handle eq_handle;
 };
 };
 
 
+enum ehca_service_type {
+	ST_RC  = 0,
+	ST_UC  = 1,
+	ST_RD  = 2,
+	ST_UD  = 3,
+};
+
+enum ehca_ll_comp_flags {
+	LLQP_SEND_COMP = 0x20,
+	LLQP_RECV_COMP = 0x40,
+	LLQP_COMP_MASK = 0x60,
+};
+
 struct ehca_alloc_qp_parms {
 struct ehca_alloc_qp_parms {
-	int servicetype;
+/* input parameters */
+	enum ehca_service_type servicetype;
 	int sigtype;
 	int sigtype;
-	int daqp_ctrl;
-	int max_send_sge;
-	int max_recv_sge;
+	enum ehca_ext_qp_type ext_type;
+	enum ehca_ll_comp_flags ll_comp_flags;
+
+	int max_send_wr, max_recv_wr;
+	int max_send_sge, max_recv_sge;
 	int ud_av_l_key_ctl;
 	int ud_av_l_key_ctl;
 
 
+	u32 token;
+	struct ipz_eq_handle eq_handle;
+	struct ipz_pd pd;
+	struct ipz_cq_handle send_cq_handle, recv_cq_handle;
+
+	u32 srq_qpn, srq_token, srq_limit;
+
+/* output parameters */
+	u32 real_qp_num;
+	struct ipz_qp_handle qp_handle;
+	struct h_galpas galpas;
+
 	u16 act_nr_send_wqes;
 	u16 act_nr_send_wqes;
 	u16 act_nr_recv_wqes;
 	u16 act_nr_recv_wqes;
 	u8  act_nr_recv_sges;
 	u8  act_nr_recv_sges;
@@ -335,9 +387,6 @@ struct ehca_alloc_qp_parms {
 
 
 	u32 nr_rq_pages;
 	u32 nr_rq_pages;
 	u32 nr_sq_pages;
 	u32 nr_sq_pages;
-
-	struct ipz_eq_handle ipz_eq_handle;
-	struct ipz_pd pd;
 };
 };
 
 
 int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);
 int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp);

+ 2 - 2
drivers/infiniband/hw/ehca/ehca_classes_pSeries.h

@@ -228,8 +228,8 @@ struct hcp_modify_qp_control_block {
 #define MQPCB_QP_NUMBER                         EHCA_BMASK_IBM(8,31)
 #define MQPCB_QP_NUMBER                         EHCA_BMASK_IBM(8,31)
 #define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48,48)
 #define MQPCB_MASK_QP_ENABLE                    EHCA_BMASK_IBM(48,48)
 #define MQPCB_QP_ENABLE                         EHCA_BMASK_IBM(31,31)
 #define MQPCB_QP_ENABLE                         EHCA_BMASK_IBM(31,31)
-#define MQPCB_MASK_CURR_SQR_LIMIT               EHCA_BMASK_IBM(49,49)
-#define MQPCB_CURR_SQR_LIMIT                    EHCA_BMASK_IBM(15,31)
+#define MQPCB_MASK_CURR_SRQ_LIMIT               EHCA_BMASK_IBM(49,49)
+#define MQPCB_CURR_SRQ_LIMIT                    EHCA_BMASK_IBM(16,31)
 #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50,50)
 #define MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG       EHCA_BMASK_IBM(50,50)
 #define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51,51)
 #define MQPCB_MASK_SHARED_RQ_HNDL               EHCA_BMASK_IBM(51,51)
 
 

+ 21 - 29
drivers/infiniband/hw/ehca/ehca_cq.c

@@ -56,11 +56,11 @@ int ehca_cq_assign_qp(struct ehca_cq *cq, struct ehca_qp *qp)
 {
 {
 	unsigned int qp_num = qp->real_qp_num;
 	unsigned int qp_num = qp->real_qp_num;
 	unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
 	unsigned int key = qp_num & (QP_HASHTAB_LEN-1);
-	unsigned long spl_flags;
+	unsigned long flags;
 
 
-	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	spin_lock_irqsave(&cq->spinlock, flags);
 	hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
 	hlist_add_head(&qp->list_entries, &cq->qp_hashtab[key]);
-	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+	spin_unlock_irqrestore(&cq->spinlock, flags);
 
 
 	ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
 	ehca_dbg(cq->ib_cq.device, "cq_num=%x real_qp_num=%x",
 		 cq->cq_number, qp_num);
 		 cq->cq_number, qp_num);
@@ -74,9 +74,9 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
 	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
 	unsigned int key = real_qp_num & (QP_HASHTAB_LEN-1);
 	struct hlist_node *iter;
 	struct hlist_node *iter;
 	struct ehca_qp *qp;
 	struct ehca_qp *qp;
-	unsigned long spl_flags;
+	unsigned long flags;
 
 
-	spin_lock_irqsave(&cq->spinlock, spl_flags);
+	spin_lock_irqsave(&cq->spinlock, flags);
 	hlist_for_each(iter, &cq->qp_hashtab[key]) {
 	hlist_for_each(iter, &cq->qp_hashtab[key]) {
 		qp = hlist_entry(iter, struct ehca_qp, list_entries);
 		qp = hlist_entry(iter, struct ehca_qp, list_entries);
 		if (qp->real_qp_num == real_qp_num) {
 		if (qp->real_qp_num == real_qp_num) {
@@ -88,7 +88,7 @@ int ehca_cq_unassign_qp(struct ehca_cq *cq, unsigned int real_qp_num)
 			break;
 			break;
 		}
 		}
 	}
 	}
-	spin_unlock_irqrestore(&cq->spinlock, spl_flags);
+	spin_unlock_irqrestore(&cq->spinlock, flags);
 	if (ret)
 	if (ret)
 		ehca_err(cq->ib_cq.device,
 		ehca_err(cq->ib_cq.device,
 			 "qp not found cq_num=%x real_qp_num=%x",
 			 "qp not found cq_num=%x real_qp_num=%x",
@@ -146,6 +146,7 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
 	spin_lock_init(&my_cq->spinlock);
 	spin_lock_init(&my_cq->spinlock);
 	spin_lock_init(&my_cq->cb_lock);
 	spin_lock_init(&my_cq->cb_lock);
 	spin_lock_init(&my_cq->task_lock);
 	spin_lock_init(&my_cq->task_lock);
+	atomic_set(&my_cq->nr_events, 0);
 	init_waitqueue_head(&my_cq->wait_completion);
 	init_waitqueue_head(&my_cq->wait_completion);
 	my_cq->ownpid = current->tgid;
 	my_cq->ownpid = current->tgid;
 
 
@@ -162,9 +163,9 @@ struct ib_cq *ehca_create_cq(struct ib_device *device, int cqe, int comp_vector,
 			goto create_cq_exit1;
 			goto create_cq_exit1;
 		}
 		}
 
 
-		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		write_lock_irqsave(&ehca_cq_idr_lock, flags);
 		ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
 		ret = idr_get_new(&ehca_cq_idr, my_cq, &my_cq->token);
-		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+		write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 
 
 	} while (ret == -EAGAIN);
 	} while (ret == -EAGAIN);
 
 
@@ -293,9 +294,9 @@ create_cq_exit3:
 			 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
 			 "cq_num=%x h_ret=%lx", my_cq, my_cq->cq_number, h_ret);
 
 
 create_cq_exit2:
 create_cq_exit2:
-	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	write_lock_irqsave(&ehca_cq_idr_lock, flags);
 	idr_remove(&ehca_cq_idr, my_cq->token);
 	idr_remove(&ehca_cq_idr, my_cq->token);
-	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+	write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 
 
 create_cq_exit1:
 create_cq_exit1:
 	kmem_cache_free(cq_cache, my_cq);
 	kmem_cache_free(cq_cache, my_cq);
@@ -303,16 +304,6 @@ create_cq_exit1:
 	return cq;
 	return cq;
 }
 }
 
 
-static int get_cq_nr_events(struct ehca_cq *my_cq)
-{
-	int ret;
-	unsigned long flags;
-	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-	ret = my_cq->nr_events;
-	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-	return ret;
-}
-
 int ehca_destroy_cq(struct ib_cq *cq)
 int ehca_destroy_cq(struct ib_cq *cq)
 {
 {
 	u64 h_ret;
 	u64 h_ret;
@@ -339,17 +330,18 @@ int ehca_destroy_cq(struct ib_cq *cq)
 		}
 		}
 	}
 	}
 
 
-	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-	while (my_cq->nr_events) {
-		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
-		wait_event(my_cq->wait_completion, !get_cq_nr_events(my_cq));
-		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-		/* recheck nr_events to assure no cqe has just arrived */
-	}
-
+	/*
+	 * remove the CQ from the idr first to make sure
+	 * no more interrupt tasklets will touch this CQ
+	 */
+	write_lock_irqsave(&ehca_cq_idr_lock, flags);
 	idr_remove(&ehca_cq_idr, my_cq->token);
 	idr_remove(&ehca_cq_idr, my_cq->token);
-	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+	write_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+
+	/* now wait until all pending events have completed */
+	wait_event(my_cq->wait_completion, !atomic_read(&my_cq->nr_events));
 
 
+	/* nobody's using our CQ any longer -- we can destroy it */
 	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
 	h_ret = hipz_h_destroy_cq(adapter_handle, my_cq, 0);
 	if (h_ret == H_R_STATE) {
 	if (h_ret == H_R_STATE) {
 		/* cq in err: read err data and destroy it forcibly */
 		/* cq in err: read err data and destroy it forcibly */

+ 58 - 3
drivers/infiniband/hw/ehca/ehca_hca.c

@@ -45,11 +45,25 @@
 
 
 int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
 int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
 {
 {
-	int ret = 0;
+	int i, ret = 0;
 	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
 	struct ehca_shca *shca = container_of(ibdev, struct ehca_shca,
 					      ib_device);
 					      ib_device);
 	struct hipz_query_hca *rblock;
 	struct hipz_query_hca *rblock;
 
 
+	static const u32 cap_mapping[] = {
+		IB_DEVICE_RESIZE_MAX_WR,      HCA_CAP_WQE_RESIZE,
+		IB_DEVICE_BAD_PKEY_CNTR,      HCA_CAP_BAD_P_KEY_CTR,
+		IB_DEVICE_BAD_QKEY_CNTR,      HCA_CAP_Q_KEY_VIOL_CTR,
+		IB_DEVICE_RAW_MULTI,          HCA_CAP_RAW_PACKET_MCAST,
+		IB_DEVICE_AUTO_PATH_MIG,      HCA_CAP_AUTO_PATH_MIG,
+		IB_DEVICE_CHANGE_PHY_PORT,    HCA_CAP_SQD_RTS_PORT_CHANGE,
+		IB_DEVICE_UD_AV_PORT_ENFORCE, HCA_CAP_AH_PORT_NR_CHECK,
+		IB_DEVICE_CURR_QP_STATE_MOD,  HCA_CAP_CUR_QP_STATE_MOD,
+		IB_DEVICE_SHUTDOWN_PORT,      HCA_CAP_SHUTDOWN_PORT,
+		IB_DEVICE_INIT_TYPE,          HCA_CAP_INIT_TYPE,
+		IB_DEVICE_PORT_ACTIVE_EVENT,  HCA_CAP_PORT_ACTIVE_EVENT,
+	};
+
 	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!rblock) {
 	if (!rblock) {
 		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
 		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
@@ -96,6 +110,13 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
 	props->max_total_mcast_qp_attach
 	props->max_total_mcast_qp_attach
 		= min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
 		= min_t(int, rblock->max_total_mcast_qp_attach, INT_MAX);
 
 
+	/* translate device capabilities */
+	props->device_cap_flags = IB_DEVICE_SYS_IMAGE_GUID |
+		IB_DEVICE_RC_RNR_NAK_GEN | IB_DEVICE_N_NOTIFY_CQ;
+	for (i = 0; i < ARRAY_SIZE(cap_mapping); i += 2)
+		if (rblock->hca_cap_indicators & cap_mapping[i + 1])
+			props->device_cap_flags |= cap_mapping[i];
+
 query_device1:
 query_device1:
 	ehca_free_fw_ctrlblock(rblock);
 	ehca_free_fw_ctrlblock(rblock);
 
 
@@ -172,6 +193,40 @@ query_port1:
 	return ret;
 	return ret;
 }
 }
 
 
+int ehca_query_sma_attr(struct ehca_shca *shca,
+			u8 port, struct ehca_sma_attr *attr)
+{
+	int ret = 0;
+	struct hipz_query_port *rblock;
+
+	rblock = ehca_alloc_fw_ctrlblock(GFP_ATOMIC);
+	if (!rblock) {
+		ehca_err(&shca->ib_device, "Can't allocate rblock memory.");
+		return -ENOMEM;
+	}
+
+	if (hipz_h_query_port(shca->ipz_hca_handle, port, rblock) != H_SUCCESS) {
+		ehca_err(&shca->ib_device, "Can't query port properties");
+		ret = -EINVAL;
+		goto query_sma_attr1;
+	}
+
+	memset(attr, 0, sizeof(struct ehca_sma_attr));
+
+	attr->lid    = rblock->lid;
+	attr->lmc    = rblock->lmc;
+	attr->sm_sl  = rblock->sm_sl;
+	attr->sm_lid = rblock->sm_lid;
+
+	attr->pkey_tbl_len = rblock->pkey_tbl_len;
+	memcpy(attr->pkeys, rblock->pkey_entries, sizeof(attr->pkeys));
+
+query_sma_attr1:
+	ehca_free_fw_ctrlblock(rblock);
+
+	return ret;
+}
+
 int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
 int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
 {
 {
 	int ret = 0;
 	int ret = 0;
@@ -261,7 +316,7 @@ int ehca_modify_port(struct ib_device *ibdev,
 	}
 	}
 
 
 	if (mutex_lock_interruptible(&shca->modify_mutex))
 	if (mutex_lock_interruptible(&shca->modify_mutex))
-                return -ERESTARTSYS;
+		return -ERESTARTSYS;
 
 
 	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!rblock) {
 	if (!rblock) {
@@ -290,7 +345,7 @@ modify_port2:
 	ehca_free_fw_ctrlblock(rblock);
 	ehca_free_fw_ctrlblock(rblock);
 
 
 modify_port1:
 modify_port1:
-        mutex_unlock(&shca->modify_mutex);
+	mutex_unlock(&shca->modify_mutex);
 
 
 	return ret;
 	return ret;
 }
 }

+ 81 - 59
drivers/infiniband/hw/ehca/ehca_irq.c

@@ -5,6 +5,8 @@
  *
  *
  *  Authors: Heiko J Schick <schickhj@de.ibm.com>
  *  Authors: Heiko J Schick <schickhj@de.ibm.com>
  *           Khadija Souissi <souissi@de.ibm.com>
  *           Khadija Souissi <souissi@de.ibm.com>
+ *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
  *
  *
  *  Copyright (c) 2005 IBM Corporation
  *  Copyright (c) 2005 IBM Corporation
  *
  *
@@ -59,6 +61,7 @@
 #define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
 #define NEQE_EVENT_CODE        EHCA_BMASK_IBM(2,7)
 #define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
 #define NEQE_PORT_NUMBER       EHCA_BMASK_IBM(8,15)
 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
 #define NEQE_PORT_AVAILABILITY EHCA_BMASK_IBM(16,16)
+#define NEQE_DISRUPTIVE        EHCA_BMASK_IBM(16,16)
 
 
 #define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
 #define ERROR_DATA_LENGTH      EHCA_BMASK_IBM(52,63)
 #define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
 #define ERROR_DATA_TYPE        EHCA_BMASK_IBM(0,7)
@@ -178,12 +181,11 @@ static void qp_event_callback(struct ehca_shca *shca,
 {
 {
 	struct ib_event event;
 	struct ib_event event;
 	struct ehca_qp *qp;
 	struct ehca_qp *qp;
-	unsigned long flags;
 	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
 	u32 token = EHCA_BMASK_GET(EQE_QP_TOKEN, eqe);
 
 
-	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	read_lock(&ehca_qp_idr_lock);
 	qp = idr_find(&ehca_qp_idr, token);
 	qp = idr_find(&ehca_qp_idr, token);
-	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+	read_unlock(&ehca_qp_idr_lock);
 
 
 
 
 	if (!qp)
 	if (!qp)
@@ -207,18 +209,22 @@ static void cq_event_callback(struct ehca_shca *shca,
 			      u64 eqe)
 			      u64 eqe)
 {
 {
 	struct ehca_cq *cq;
 	struct ehca_cq *cq;
-	unsigned long flags;
 	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
 	u32 token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe);
 
 
-	spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+	read_lock(&ehca_cq_idr_lock);
 	cq = idr_find(&ehca_cq_idr, token);
 	cq = idr_find(&ehca_cq_idr, token);
-	spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+	if (cq)
+		atomic_inc(&cq->nr_events);
+	read_unlock(&ehca_cq_idr_lock);
 
 
 	if (!cq)
 	if (!cq)
 		return;
 		return;
 
 
 	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
 	ehca_error_data(shca, cq, cq->ipz_cq_handle.handle);
 
 
+	if (atomic_dec_and_test(&cq->nr_events))
+		wake_up(&cq->wait_completion);
+
 	return;
 	return;
 }
 }
 
 
@@ -281,30 +287,61 @@ static void parse_identifier(struct ehca_shca *shca, u64 eqe)
 	return;
 	return;
 }
 }
 
 
-static void parse_ec(struct ehca_shca *shca, u64 eqe)
+static void dispatch_port_event(struct ehca_shca *shca, int port_num,
+				enum ib_event_type type, const char *msg)
 {
 {
 	struct ib_event event;
 	struct ib_event event;
+
+	ehca_info(&shca->ib_device, "port %d %s.", port_num, msg);
+	event.device = &shca->ib_device;
+	event.event = type;
+	event.element.port_num = port_num;
+	ib_dispatch_event(&event);
+}
+
+static void notify_port_conf_change(struct ehca_shca *shca, int port_num)
+{
+	struct ehca_sma_attr  new_attr;
+	struct ehca_sma_attr *old_attr = &shca->sport[port_num - 1].saved_attr;
+
+	ehca_query_sma_attr(shca, port_num, &new_attr);
+
+	if (new_attr.sm_sl  != old_attr->sm_sl ||
+	    new_attr.sm_lid != old_attr->sm_lid)
+		dispatch_port_event(shca, port_num, IB_EVENT_SM_CHANGE,
+				    "SM changed");
+
+	if (new_attr.lid != old_attr->lid ||
+	    new_attr.lmc != old_attr->lmc)
+		dispatch_port_event(shca, port_num, IB_EVENT_LID_CHANGE,
+				    "LID changed");
+
+	if (new_attr.pkey_tbl_len != old_attr->pkey_tbl_len ||
+	    memcmp(new_attr.pkeys, old_attr->pkeys,
+		   sizeof(u16) * new_attr.pkey_tbl_len))
+		dispatch_port_event(shca, port_num, IB_EVENT_PKEY_CHANGE,
+				    "P_Key changed");
+
+	*old_attr = new_attr;
+}
+
+static void parse_ec(struct ehca_shca *shca, u64 eqe)
+{
 	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
 	u8 ec   = EHCA_BMASK_GET(NEQE_EVENT_CODE, eqe);
 	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
 	u8 port = EHCA_BMASK_GET(NEQE_PORT_NUMBER, eqe);
 
 
 	switch (ec) {
 	switch (ec) {
 	case 0x30: /* port availability change */
 	case 0x30: /* port availability change */
 		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
 		if (EHCA_BMASK_GET(NEQE_PORT_AVAILABILITY, eqe)) {
-			ehca_info(&shca->ib_device,
-				  "port %x is active.", port);
-			event.device = &shca->ib_device;
-			event.event = IB_EVENT_PORT_ACTIVE;
-			event.element.port_num = port;
 			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
 			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
-			ib_dispatch_event(&event);
+			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+					    "is active");
+			ehca_query_sma_attr(shca, port,
+					    &shca->sport[port - 1].saved_attr);
 		} else {
 		} else {
-			ehca_info(&shca->ib_device,
-				  "port %x is inactive.", port);
-			event.device = &shca->ib_device;
-			event.event = IB_EVENT_PORT_ERR;
-			event.element.port_num = port;
 			shca->sport[port - 1].port_state = IB_PORT_DOWN;
 			shca->sport[port - 1].port_state = IB_PORT_DOWN;
-			ib_dispatch_event(&event);
+			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+					    "is inactive");
 		}
 		}
 		break;
 		break;
 	case 0x31:
 	case 0x31:
@@ -312,24 +349,19 @@ static void parse_ec(struct ehca_shca *shca, u64 eqe)
 		 * disruptive change is caused by
 		 * disruptive change is caused by
 		 * LID, PKEY or SM change
 		 * LID, PKEY or SM change
 		 */
 		 */
-		ehca_warn(&shca->ib_device,
-			  "disruptive port %x configuration change", port);
-
-		ehca_info(&shca->ib_device,
-			  "port %x is inactive.", port);
-		event.device = &shca->ib_device;
-		event.event = IB_EVENT_PORT_ERR;
-		event.element.port_num = port;
-		shca->sport[port - 1].port_state = IB_PORT_DOWN;
-		ib_dispatch_event(&event);
-
-		ehca_info(&shca->ib_device,
-			  "port %x is active.", port);
-		event.device = &shca->ib_device;
-		event.event = IB_EVENT_PORT_ACTIVE;
-		event.element.port_num = port;
-		shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
-		ib_dispatch_event(&event);
+		if (EHCA_BMASK_GET(NEQE_DISRUPTIVE, eqe)) {
+			ehca_warn(&shca->ib_device, "disruptive port "
+				  "%d configuration change", port);
+
+			shca->sport[port - 1].port_state = IB_PORT_DOWN;
+			dispatch_port_event(shca, port, IB_EVENT_PORT_ERR,
+					    "is inactive");
+
+			shca->sport[port - 1].port_state = IB_PORT_ACTIVE;
+			dispatch_port_event(shca, port, IB_EVENT_PORT_ACTIVE,
+					    "is active");
+		} else
+			notify_port_conf_change(shca, port);
 		break;
 		break;
 	case 0x32: /* adapter malfunction */
 	case 0x32: /* adapter malfunction */
 		ehca_err(&shca->ib_device, "Adapter malfunction.");
 		ehca_err(&shca->ib_device, "Adapter malfunction.");
@@ -404,7 +436,6 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
 {
 {
 	u64 eqe_value;
 	u64 eqe_value;
 	u32 token;
 	u32 token;
-	unsigned long flags;
 	struct ehca_cq *cq;
 	struct ehca_cq *cq;
 
 
 	eqe_value = eqe->entry;
 	eqe_value = eqe->entry;
@@ -412,27 +443,24 @@ static inline void process_eqe(struct ehca_shca *shca, struct ehca_eqe *eqe)
 	if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
 	if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
 		ehca_dbg(&shca->ib_device, "Got completion event");
 		ehca_dbg(&shca->ib_device, "Got completion event");
 		token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
 		token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		read_lock(&ehca_cq_idr_lock);
 		cq = idr_find(&ehca_cq_idr, token);
 		cq = idr_find(&ehca_cq_idr, token);
+		if (cq)
+			atomic_inc(&cq->nr_events);
+		read_unlock(&ehca_cq_idr_lock);
 		if (cq == NULL) {
 		if (cq == NULL) {
-			spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 			ehca_err(&shca->ib_device,
 			ehca_err(&shca->ib_device,
 				 "Invalid eqe for non-existing cq token=%x",
 				 "Invalid eqe for non-existing cq token=%x",
 				 token);
 				 token);
 			return;
 			return;
 		}
 		}
 		reset_eq_pending(cq);
 		reset_eq_pending(cq);
-		cq->nr_events++;
-		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 		if (ehca_scaling_code)
 		if (ehca_scaling_code)
 			queue_comp_task(cq);
 			queue_comp_task(cq);
 		else {
 		else {
 			comp_event_callback(cq);
 			comp_event_callback(cq);
-			spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-			cq->nr_events--;
-			if (!cq->nr_events)
+			if (atomic_dec_and_test(&cq->nr_events))
 				wake_up(&cq->wait_completion);
 				wake_up(&cq->wait_completion);
-			spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 		}
 		}
 	} else {
 	} else {
 		ehca_dbg(&shca->ib_device, "Got non completion event");
 		ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -476,17 +504,17 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
 		eqe_value = eqe_cache[eqe_cnt].eqe->entry;
 		eqe_value = eqe_cache[eqe_cnt].eqe->entry;
 		if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
 		if (EHCA_BMASK_GET(EQE_COMPLETION_EVENT, eqe_value)) {
 			token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
 			token = EHCA_BMASK_GET(EQE_CQ_TOKEN, eqe_value);
-			spin_lock(&ehca_cq_idr_lock);
+			read_lock(&ehca_cq_idr_lock);
 			eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
 			eqe_cache[eqe_cnt].cq = idr_find(&ehca_cq_idr, token);
+			if (eqe_cache[eqe_cnt].cq)
+				atomic_inc(&eqe_cache[eqe_cnt].cq->nr_events);
+			read_unlock(&ehca_cq_idr_lock);
 			if (!eqe_cache[eqe_cnt].cq) {
 			if (!eqe_cache[eqe_cnt].cq) {
-				spin_unlock(&ehca_cq_idr_lock);
 				ehca_err(&shca->ib_device,
 				ehca_err(&shca->ib_device,
 					 "Invalid eqe for non-existing cq "
 					 "Invalid eqe for non-existing cq "
 					 "token=%x", token);
 					 "token=%x", token);
 				continue;
 				continue;
 			}
 			}
-			eqe_cache[eqe_cnt].cq->nr_events++;
-			spin_unlock(&ehca_cq_idr_lock);
 		} else
 		} else
 			eqe_cache[eqe_cnt].cq = NULL;
 			eqe_cache[eqe_cnt].cq = NULL;
 		eqe_cnt++;
 		eqe_cnt++;
@@ -517,11 +545,8 @@ void ehca_process_eq(struct ehca_shca *shca, int is_irq)
 			else {
 			else {
 				struct ehca_cq *cq = eq->eqe_cache[i].cq;
 				struct ehca_cq *cq = eq->eqe_cache[i].cq;
 				comp_event_callback(cq);
 				comp_event_callback(cq);
-				spin_lock(&ehca_cq_idr_lock);
-				cq->nr_events--;
-				if (!cq->nr_events)
+				if (atomic_dec_and_test(&cq->nr_events))
 					wake_up(&cq->wait_completion);
 					wake_up(&cq->wait_completion);
-				spin_unlock(&ehca_cq_idr_lock);
 			}
 			}
 		} else {
 		} else {
 			ehca_dbg(&shca->ib_device, "Got non completion event");
 			ehca_dbg(&shca->ib_device, "Got non completion event");
@@ -621,13 +646,10 @@ static void run_comp_task(struct ehca_cpu_comp_task* cct)
 	while (!list_empty(&cct->cq_list)) {
 	while (!list_empty(&cct->cq_list)) {
 		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
 		cq = list_entry(cct->cq_list.next, struct ehca_cq, entry);
 		spin_unlock_irqrestore(&cct->task_lock, flags);
 		spin_unlock_irqrestore(&cct->task_lock, flags);
-		comp_event_callback(cq);
 
 
-		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
-		cq->nr_events--;
-		if (!cq->nr_events)
+		comp_event_callback(cq);
+		if (atomic_dec_and_test(&cq->nr_events))
 			wake_up(&cq->wait_completion);
 			wake_up(&cq->wait_completion);
-		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
 
 
 		spin_lock_irqsave(&cct->task_lock, flags);
 		spin_lock_irqsave(&cct->task_lock, flags);
 		spin_lock(&cq->task_lock);
 		spin_lock(&cq->task_lock);

+ 0 - 1
drivers/infiniband/hw/ehca/ehca_irq.h

@@ -47,7 +47,6 @@ struct ehca_shca;
 
 
 #include <linux/interrupt.h>
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <linux/types.h>
-#include <asm/atomic.h>
 
 
 int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
 int ehca_error_data(struct ehca_shca *shca, void *data, u64 resource);
 
 

+ 18 - 0
drivers/infiniband/hw/ehca/ehca_iverbs.h

@@ -49,6 +49,9 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props);
 int ehca_query_port(struct ib_device *ibdev, u8 port,
 int ehca_query_port(struct ib_device *ibdev, u8 port,
 		    struct ib_port_attr *props);
 		    struct ib_port_attr *props);
 
 
+int ehca_query_sma_attr(struct ehca_shca *shca, u8 port,
+			struct ehca_sma_attr *attr);
+
 int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
 int ehca_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 * pkey);
 
 
 int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
 int ehca_query_gid(struct ib_device *ibdev, u8 port, int index,
@@ -154,6 +157,21 @@ int ehca_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
 int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
 int ehca_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
 		   struct ib_recv_wr **bad_recv_wr);
 		   struct ib_recv_wr **bad_recv_wr);
 
 
+int ehca_post_srq_recv(struct ib_srq *srq,
+		       struct ib_recv_wr *recv_wr,
+		       struct ib_recv_wr **bad_recv_wr);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+			       struct ib_srq_init_attr *init_attr,
+			       struct ib_udata *udata);
+
+int ehca_modify_srq(struct ib_srq *srq, struct ib_srq_attr *attr,
+		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
+
+int ehca_destroy_srq(struct ib_srq *srq);
+
 u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
 u64 ehca_define_sqp(struct ehca_shca *shca, struct ehca_qp *ibqp,
 		    struct ib_qp_init_attr *qp_init_attr);
 		    struct ib_qp_init_attr *qp_init_attr);
 
 

+ 76 - 22
drivers/infiniband/hw/ehca/ehca_main.c

@@ -94,17 +94,15 @@ MODULE_PARM_DESC(poll_all_eqs,
 MODULE_PARM_DESC(static_rate,
 MODULE_PARM_DESC(static_rate,
 		 "set permanent static rate (default: disabled)");
 		 "set permanent static rate (default: disabled)");
 MODULE_PARM_DESC(scaling_code,
 MODULE_PARM_DESC(scaling_code,
-		 "set scaling code (0: disabled, 1: enabled/default)");
+		 "set scaling code (0: disabled/default, 1: enabled)");
 
 
-spinlock_t ehca_qp_idr_lock;
-spinlock_t ehca_cq_idr_lock;
-spinlock_t hcall_lock;
+DEFINE_RWLOCK(ehca_qp_idr_lock);
+DEFINE_RWLOCK(ehca_cq_idr_lock);
 DEFINE_IDR(ehca_qp_idr);
 DEFINE_IDR(ehca_qp_idr);
 DEFINE_IDR(ehca_cq_idr);
 DEFINE_IDR(ehca_cq_idr);
 
 
-
-static struct list_head shca_list; /* list of all registered ehcas */
-static spinlock_t shca_list_lock;
+static LIST_HEAD(shca_list); /* list of all registered ehcas */
+static DEFINE_SPINLOCK(shca_list_lock);
 
 
 static struct timer_list poll_eqs_timer;
 static struct timer_list poll_eqs_timer;
 
 
@@ -205,11 +203,35 @@ static void ehca_destroy_slab_caches(void)
 #define EHCA_HCAAVER  EHCA_BMASK_IBM(32,39)
 #define EHCA_HCAAVER  EHCA_BMASK_IBM(32,39)
 #define EHCA_REVID    EHCA_BMASK_IBM(40,63)
 #define EHCA_REVID    EHCA_BMASK_IBM(40,63)
 
 
+static struct cap_descr {
+	u64 mask;
+	char *descr;
+} hca_cap_descr[] = {
+	{ HCA_CAP_AH_PORT_NR_CHECK, "HCA_CAP_AH_PORT_NR_CHECK" },
+	{ HCA_CAP_ATOMIC, "HCA_CAP_ATOMIC" },
+	{ HCA_CAP_AUTO_PATH_MIG, "HCA_CAP_AUTO_PATH_MIG" },
+	{ HCA_CAP_BAD_P_KEY_CTR, "HCA_CAP_BAD_P_KEY_CTR" },
+	{ HCA_CAP_SQD_RTS_PORT_CHANGE, "HCA_CAP_SQD_RTS_PORT_CHANGE" },
+	{ HCA_CAP_CUR_QP_STATE_MOD, "HCA_CAP_CUR_QP_STATE_MOD" },
+	{ HCA_CAP_INIT_TYPE, "HCA_CAP_INIT_TYPE" },
+	{ HCA_CAP_PORT_ACTIVE_EVENT, "HCA_CAP_PORT_ACTIVE_EVENT" },
+	{ HCA_CAP_Q_KEY_VIOL_CTR, "HCA_CAP_Q_KEY_VIOL_CTR" },
+	{ HCA_CAP_WQE_RESIZE, "HCA_CAP_WQE_RESIZE" },
+	{ HCA_CAP_RAW_PACKET_MCAST, "HCA_CAP_RAW_PACKET_MCAST" },
+	{ HCA_CAP_SHUTDOWN_PORT, "HCA_CAP_SHUTDOWN_PORT" },
+	{ HCA_CAP_RC_LL_QP, "HCA_CAP_RC_LL_QP" },
+	{ HCA_CAP_SRQ, "HCA_CAP_SRQ" },
+	{ HCA_CAP_UD_LL_QP, "HCA_CAP_UD_LL_QP" },
+	{ HCA_CAP_RESIZE_MR, "HCA_CAP_RESIZE_MR" },
+	{ HCA_CAP_MINI_QP, "HCA_CAP_MINI_QP" },
+};
+
 int ehca_sense_attributes(struct ehca_shca *shca)
 int ehca_sense_attributes(struct ehca_shca *shca)
 {
 {
-	int ret = 0;
+	int i, ret = 0;
 	u64 h_ret;
 	u64 h_ret;
 	struct hipz_query_hca *rblock;
 	struct hipz_query_hca *rblock;
+	struct hipz_query_port *port;
 
 
 	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	rblock = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	if (!rblock) {
 	if (!rblock) {
@@ -222,7 +244,7 @@ int ehca_sense_attributes(struct ehca_shca *shca)
 		ehca_gen_err("Cannot query device properties. h_ret=%lx",
 		ehca_gen_err("Cannot query device properties. h_ret=%lx",
 			     h_ret);
 			     h_ret);
 		ret = -EPERM;
 		ret = -EPERM;
-		goto num_ports1;
+		goto sense_attributes1;
 	}
 	}
 
 
 	if (ehca_nr_ports == 1)
 	if (ehca_nr_ports == 1)
@@ -242,18 +264,44 @@ int ehca_sense_attributes(struct ehca_shca *shca)
 		ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
 		ehca_gen_dbg(" ... hardware version=%x:%x", hcaaver, revid);
 
 
 		if ((hcaaver == 1) && (revid == 0))
 		if ((hcaaver == 1) && (revid == 0))
-			shca->hw_level = 0;
+			shca->hw_level = 0x11;
 		else if ((hcaaver == 1) && (revid == 1))
 		else if ((hcaaver == 1) && (revid == 1))
-			shca->hw_level = 1;
+			shca->hw_level = 0x12;
 		else if ((hcaaver == 1) && (revid == 2))
 		else if ((hcaaver == 1) && (revid == 2))
-			shca->hw_level = 2;
+			shca->hw_level = 0x13;
+		else if ((hcaaver == 2) && (revid == 0))
+			shca->hw_level = 0x21;
+		else if ((hcaaver == 2) && (revid == 0x10))
+			shca->hw_level = 0x22;
+		else {
+			ehca_gen_warn("unknown hardware version"
+				      " - assuming default level");
+			shca->hw_level = 0x22;
+		}
 	}
 	}
 	ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
 	ehca_gen_dbg(" ... hardware level=%x", shca->hw_level);
 
 
 	shca->sport[0].rate = IB_RATE_30_GBPS;
 	shca->sport[0].rate = IB_RATE_30_GBPS;
 	shca->sport[1].rate = IB_RATE_30_GBPS;
 	shca->sport[1].rate = IB_RATE_30_GBPS;
 
 
-num_ports1:
+	shca->hca_cap = rblock->hca_cap_indicators;
+	ehca_gen_dbg(" ... HCA capabilities:");
+	for (i = 0; i < ARRAY_SIZE(hca_cap_descr); i++)
+		if (EHCA_BMASK_GET(hca_cap_descr[i].mask, shca->hca_cap))
+			ehca_gen_dbg("   %s", hca_cap_descr[i].descr);
+
+	port = (struct hipz_query_port *) rblock;
+	h_ret = hipz_h_query_port(shca->ipz_hca_handle, 1, port);
+	if (h_ret != H_SUCCESS) {
+		ehca_gen_err("Cannot query port properties. h_ret=%lx",
+			     h_ret);
+		ret = -EPERM;
+		goto sense_attributes1;
+	}
+
+	shca->max_mtu = port->max_mtu;
+
+sense_attributes1:
 	ehca_free_fw_ctrlblock(rblock);
 	ehca_free_fw_ctrlblock(rblock);
 	return ret;
 	return ret;
 }
 }
@@ -293,7 +341,7 @@ int ehca_init_device(struct ehca_shca *shca)
 	strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
 	strlcpy(shca->ib_device.name, "ehca%d", IB_DEVICE_NAME_MAX);
 	shca->ib_device.owner               = THIS_MODULE;
 	shca->ib_device.owner               = THIS_MODULE;
 
 
-	shca->ib_device.uverbs_abi_ver	    = 6;
+	shca->ib_device.uverbs_abi_ver	    = 7;
 	shca->ib_device.uverbs_cmd_mask	    =
 	shca->ib_device.uverbs_cmd_mask	    =
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
 		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
 		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
 		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
@@ -361,6 +409,20 @@ int ehca_init_device(struct ehca_shca *shca)
 	/* shca->ib_device.process_mad	    = ehca_process_mad;	    */
 	/* shca->ib_device.process_mad	    = ehca_process_mad;	    */
 	shca->ib_device.mmap		    = ehca_mmap;
 	shca->ib_device.mmap		    = ehca_mmap;
 
 
+	if (EHCA_BMASK_GET(HCA_CAP_SRQ, shca->hca_cap)) {
+		shca->ib_device.uverbs_cmd_mask |=
+			(1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+			(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+			(1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+			(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
+
+		shca->ib_device.create_srq          = ehca_create_srq;
+		shca->ib_device.modify_srq          = ehca_modify_srq;
+		shca->ib_device.query_srq           = ehca_query_srq;
+		shca->ib_device.destroy_srq         = ehca_destroy_srq;
+		shca->ib_device.post_srq_recv       = ehca_post_srq_recv;
+	}
+
 	return ret;
 	return ret;
 }
 }
 
 
@@ -800,14 +862,6 @@ int __init ehca_module_init(void)
 
 
 	printk(KERN_INFO "eHCA Infiniband Device Driver "
 	printk(KERN_INFO "eHCA Infiniband Device Driver "
 	       "(Rel.: SVNEHCA_0023)\n");
 	       "(Rel.: SVNEHCA_0023)\n");
-	idr_init(&ehca_qp_idr);
-	idr_init(&ehca_cq_idr);
-	spin_lock_init(&ehca_qp_idr_lock);
-	spin_lock_init(&ehca_cq_idr_lock);
-	spin_lock_init(&hcall_lock);
-
-	INIT_LIST_HEAD(&shca_list);
-	spin_lock_init(&shca_list_lock);
 
 
 	if ((ret = ehca_create_comp_pool())) {
 	if ((ret = ehca_create_comp_pool())) {
 		ehca_gen_err("Cannot create comp pool.");
 		ehca_gen_err("Cannot create comp pool.");

+ 533 - 218
drivers/infiniband/hw/ehca/ehca_qp.c

@@ -3,7 +3,9 @@
  *
  *
  *  QP functions
  *  QP functions
  *
  *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
+ *  Authors: Joachim Fenkes <fenkes@de.ibm.com>
+ *           Stefan Roscher <stefan.roscher@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
  *           Reinhard Ernst <rernst@de.ibm.com>
  *           Reinhard Ernst <rernst@de.ibm.com>
  *           Heiko J Schick <schickhj@de.ibm.com>
  *           Heiko J Schick <schickhj@de.ibm.com>
@@ -234,13 +236,6 @@ static inline enum ib_qp_statetrans get_modqp_statetrans(int ib_fromstate,
 	return index;
 	return index;
 }
 }
 
 
-enum ehca_service_type {
-	ST_RC = 0,
-	ST_UC = 1,
-	ST_RD = 2,
-	ST_UD = 3
-};
-
 /*
 /*
  * ibqptype2servicetype returns hcp service type corresponding to given
  * ibqptype2servicetype returns hcp service type corresponding to given
  * ib qp type used by create_qp()
  * ib qp type used by create_qp()
@@ -268,15 +263,34 @@ static inline int ibqptype2servicetype(enum ib_qp_type ibqptype)
 }
 }
 
 
 /*
 /*
- * init_qp_queues initializes/constructs r/squeue and registers queue pages.
+ * init userspace queue info from ipz_queue data
  */
  */
-static inline int init_qp_queues(struct ehca_shca *shca,
-				 struct ehca_qp *my_qp,
-				 int nr_sq_pages,
-				 int nr_rq_pages,
-				 int swqe_size,
-				 int rwqe_size,
-				 int nr_send_sges, int nr_receive_sges)
+static inline void queue2resp(struct ipzu_queue_resp *resp,
+			      struct ipz_queue *queue)
+{
+	resp->qe_size = queue->qe_size;
+	resp->act_nr_of_sg = queue->act_nr_of_sg;
+	resp->queue_length = queue->queue_length;
+	resp->pagesize = queue->pagesize;
+	resp->toggle_state = queue->toggle_state;
+}
+
+static inline int ll_qp_msg_size(int nr_sge)
+{
+	return 128 << nr_sge;
+}
+
+/*
+ * init_qp_queue initializes/constructs r/squeue and registers queue pages.
+ */
+static inline int init_qp_queue(struct ehca_shca *shca,
+				struct ehca_qp *my_qp,
+				struct ipz_queue *queue,
+				int q_type,
+				u64 expected_hret,
+				int nr_q_pages,
+				int wqe_size,
+				int nr_sges)
 {
 {
 	int ret, cnt, ipz_rc;
 	int ret, cnt, ipz_rc;
 	void *vpage;
 	void *vpage;
@@ -284,127 +298,93 @@ static inline int init_qp_queues(struct ehca_shca *shca,
 	struct ib_device *ib_dev = &shca->ib_device;
 	struct ib_device *ib_dev = &shca->ib_device;
 	struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
 	struct ipz_adapter_handle ipz_hca_handle = shca->ipz_hca_handle;
 
 
-	ipz_rc = ipz_queue_ctor(&my_qp->ipz_squeue,
-				nr_sq_pages,
-				EHCA_PAGESIZE, swqe_size, nr_send_sges);
+	if (!nr_q_pages)
+		return 0;
+
+	ipz_rc = ipz_queue_ctor(queue, nr_q_pages, EHCA_PAGESIZE,
+				wqe_size, nr_sges);
 	if (!ipz_rc) {
 	if (!ipz_rc) {
-		ehca_err(ib_dev,"Cannot allocate page for squeue. ipz_rc=%x",
+		ehca_err(ib_dev, "Cannot allocate page for queue. ipz_rc=%x",
 			 ipz_rc);
 			 ipz_rc);
 		return -EBUSY;
 		return -EBUSY;
 	}
 	}
 
 
-	ipz_rc = ipz_queue_ctor(&my_qp->ipz_rqueue,
-				nr_rq_pages,
-				EHCA_PAGESIZE, rwqe_size, nr_receive_sges);
-	if (!ipz_rc) {
-		ehca_err(ib_dev, "Cannot allocate page for rqueue. ipz_rc=%x",
-			 ipz_rc);
-		ret = -EBUSY;
-		goto init_qp_queues0;
-	}
-	/* register SQ pages */
-	for (cnt = 0; cnt < nr_sq_pages; cnt++) {
-		vpage = ipz_qpageit_get_inc(&my_qp->ipz_squeue);
+	/* register queue pages */
+	for (cnt = 0; cnt < nr_q_pages; cnt++) {
+		vpage = ipz_qpageit_get_inc(queue);
 		if (!vpage) {
 		if (!vpage) {
-			ehca_err(ib_dev, "SQ ipz_qpageit_get_inc() "
+			ehca_err(ib_dev, "ipz_qpageit_get_inc() "
 				 "failed p_vpage= %p", vpage);
 				 "failed p_vpage= %p", vpage);
 			ret = -EINVAL;
 			ret = -EINVAL;
-			goto init_qp_queues1;
+			goto init_qp_queue1;
 		}
 		}
 		rpage = virt_to_abs(vpage);
 		rpage = virt_to_abs(vpage);
 
 
 		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
 		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
 						 my_qp->ipz_qp_handle,
 						 my_qp->ipz_qp_handle,
-						 &my_qp->pf, 0, 0,
+						 NULL, 0, q_type,
 						 rpage, 1,
 						 rpage, 1,
 						 my_qp->galpas.kernel);
 						 my_qp->galpas.kernel);
-		if (h_ret < H_SUCCESS) {
-			ehca_err(ib_dev, "SQ hipz_qp_register_rpage()"
-				 " failed rc=%lx", h_ret);
-			ret = ehca2ib_return_code(h_ret);
-			goto init_qp_queues1;
-		}
-	}
-
-	ipz_qeit_reset(&my_qp->ipz_squeue);
-
-	/* register RQ pages */
-	for (cnt = 0; cnt < nr_rq_pages; cnt++) {
-		vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
-		if (!vpage) {
-			ehca_err(ib_dev, "RQ ipz_qpageit_get_inc() "
-				 "failed p_vpage = %p", vpage);
-			ret = -EINVAL;
-			goto init_qp_queues1;
-		}
-
-		rpage = virt_to_abs(vpage);
-
-		h_ret = hipz_h_register_rpage_qp(ipz_hca_handle,
-						 my_qp->ipz_qp_handle,
-						 &my_qp->pf, 0, 1,
-						 rpage, 1,my_qp->galpas.kernel);
-		if (h_ret < H_SUCCESS) {
-			ehca_err(ib_dev, "RQ hipz_qp_register_rpage() failed "
-				 "rc=%lx", h_ret);
-			ret = ehca2ib_return_code(h_ret);
-			goto init_qp_queues1;
-		}
-		if (cnt == (nr_rq_pages - 1)) {	/* last page! */
-			if (h_ret != H_SUCCESS) {
-				ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+		if (cnt == (nr_q_pages - 1)) {	/* last page! */
+			if (h_ret != expected_hret) {
+				ehca_err(ib_dev, "hipz_qp_register_rpage() "
 					 "h_ret= %lx ", h_ret);
 					 "h_ret= %lx ", h_ret);
 				ret = ehca2ib_return_code(h_ret);
 				ret = ehca2ib_return_code(h_ret);
-				goto init_qp_queues1;
+				goto init_qp_queue1;
 			}
 			}
 			vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
 			vpage = ipz_qpageit_get_inc(&my_qp->ipz_rqueue);
 			if (vpage) {
 			if (vpage) {
 				ehca_err(ib_dev, "ipz_qpageit_get_inc() "
 				ehca_err(ib_dev, "ipz_qpageit_get_inc() "
 					 "should not succeed vpage=%p", vpage);
 					 "should not succeed vpage=%p", vpage);
 				ret = -EINVAL;
 				ret = -EINVAL;
-				goto init_qp_queues1;
+				goto init_qp_queue1;
 			}
 			}
 		} else {
 		} else {
 			if (h_ret != H_PAGE_REGISTERED) {
 			if (h_ret != H_PAGE_REGISTERED) {
-				ehca_err(ib_dev, "RQ hipz_qp_register_rpage() "
+				ehca_err(ib_dev, "hipz_qp_register_rpage() "
 					 "h_ret= %lx ", h_ret);
 					 "h_ret= %lx ", h_ret);
 				ret = ehca2ib_return_code(h_ret);
 				ret = ehca2ib_return_code(h_ret);
-				goto init_qp_queues1;
+				goto init_qp_queue1;
 			}
 			}
 		}
 		}
 	}
 	}
 
 
-	ipz_qeit_reset(&my_qp->ipz_rqueue);
+	ipz_qeit_reset(queue);
 
 
 	return 0;
 	return 0;
 
 
-init_qp_queues1:
-	ipz_queue_dtor(&my_qp->ipz_rqueue);
-init_qp_queues0:
-	ipz_queue_dtor(&my_qp->ipz_squeue);
+init_qp_queue1:
+	ipz_queue_dtor(queue);
 	return ret;
 	return ret;
 }
 }
 
 
-struct ib_qp *ehca_create_qp(struct ib_pd *pd,
-			     struct ib_qp_init_attr *init_attr,
-			     struct ib_udata *udata)
+/*
+ * Create an ib_qp struct that is either a QP or an SRQ, depending on
+ * the value of the is_srq parameter. If init_attr and srq_init_attr share
+ * fields, the field out of init_attr is used.
+ */
+struct ehca_qp *internal_create_qp(struct ib_pd *pd,
+				   struct ib_qp_init_attr *init_attr,
+				   struct ib_srq_init_attr *srq_init_attr,
+				   struct ib_udata *udata, int is_srq)
 {
 {
-	static int da_rc_msg_size[]={ 128, 256, 512, 1024, 2048, 4096 };
-	static int da_ud_sq_msg_size[]={ 128, 384, 896, 1920, 3968 };
 	struct ehca_qp *my_qp;
 	struct ehca_qp *my_qp;
 	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
 	struct ehca_pd *my_pd = container_of(pd, struct ehca_pd, ib_pd);
 	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
 	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
 					      ib_device);
 					      ib_device);
 	struct ib_ucontext *context = NULL;
 	struct ib_ucontext *context = NULL;
 	u64 h_ret;
 	u64 h_ret;
-	int max_send_sge, max_recv_sge, ret;
+	int is_llqp = 0, has_srq = 0;
+	int qp_type, max_send_sge, max_recv_sge, ret;
 
 
 	/* h_call's out parameters */
 	/* h_call's out parameters */
 	struct ehca_alloc_qp_parms parms;
 	struct ehca_alloc_qp_parms parms;
-	u32 swqe_size = 0, rwqe_size = 0;
-	u8 daqp_completion, isdaqp;
+	u32 swqe_size = 0, rwqe_size = 0, ib_qp_num;
 	unsigned long flags;
 	unsigned long flags;
 
 
+	memset(&parms, 0, sizeof(parms));
+	qp_type = init_attr->qp_type;
+
 	if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
 	if (init_attr->sq_sig_type != IB_SIGNAL_REQ_WR &&
 		init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
 		init_attr->sq_sig_type != IB_SIGNAL_ALL_WR) {
 		ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
 		ehca_err(pd->device, "init_attr->sg_sig_type=%x not allowed",
@@ -412,41 +392,98 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 	}
 	}
 
 
-	/* save daqp completion bits */
-	daqp_completion = init_attr->qp_type & 0x60;
-	/* save daqp bit */
-	isdaqp = (init_attr->qp_type & 0x80) ? 1 : 0;
-	init_attr->qp_type = init_attr->qp_type & 0x1F;
+	/* save LLQP info */
+	if (qp_type & 0x80) {
+		is_llqp = 1;
+		parms.ext_type = EQPT_LLQP;
+		parms.ll_comp_flags = qp_type & LLQP_COMP_MASK;
+	}
+	qp_type &= 0x1F;
+	init_attr->qp_type &= 0x1F;
 
 
-	if (init_attr->qp_type != IB_QPT_UD &&
-	    init_attr->qp_type != IB_QPT_SMI &&
-	    init_attr->qp_type != IB_QPT_GSI &&
-	    init_attr->qp_type != IB_QPT_UC &&
-	    init_attr->qp_type != IB_QPT_RC) {
-		ehca_err(pd->device, "wrong QP Type=%x", init_attr->qp_type);
-		return ERR_PTR(-EINVAL);
+	/* handle SRQ base QPs */
+	if (init_attr->srq) {
+		struct ehca_qp *my_srq =
+			container_of(init_attr->srq, struct ehca_qp, ib_srq);
+
+		has_srq = 1;
+		parms.ext_type = EQPT_SRQBASE;
+		parms.srq_qpn = my_srq->real_qp_num;
+		parms.srq_token = my_srq->token;
 	}
 	}
-	if ((init_attr->qp_type != IB_QPT_RC && init_attr->qp_type != IB_QPT_UD)
-	    && isdaqp) {
-		ehca_err(pd->device, "unsupported LL QP Type=%x",
-			 init_attr->qp_type);
+
+	if (is_llqp && has_srq) {
+		ehca_err(pd->device, "LLQPs can't have an SRQ");
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
-	} else if (init_attr->qp_type == IB_QPT_RC && isdaqp &&
-		   (init_attr->cap.max_send_wr > 255 ||
-		    init_attr->cap.max_recv_wr > 255 )) {
-		       ehca_err(pd->device, "Invalid Number of max_sq_wr =%x "
-				"or max_rq_wr=%x for QP Type=%x",
-				init_attr->cap.max_send_wr,
-				init_attr->cap.max_recv_wr,init_attr->qp_type);
-		       return ERR_PTR(-EINVAL);
-	} else if (init_attr->qp_type == IB_QPT_UD && isdaqp &&
-		  init_attr->cap.max_send_wr > 255) {
-		ehca_err(pd->device,
-			 "Invalid Number of max_send_wr=%x for UD QP_TYPE=%x",
-			 init_attr->cap.max_send_wr, init_attr->qp_type);
+	}
+
+	/* handle SRQs */
+	if (is_srq) {
+		parms.ext_type = EQPT_SRQ;
+		parms.srq_limit = srq_init_attr->attr.srq_limit;
+		if (init_attr->cap.max_recv_sge > 3) {
+			ehca_err(pd->device, "no more than three SGEs "
+				 "supported for SRQ  pd=%p  max_sge=%x",
+				 pd, init_attr->cap.max_recv_sge);
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	/* check QP type */
+	if (qp_type != IB_QPT_UD &&
+	    qp_type != IB_QPT_UC &&
+	    qp_type != IB_QPT_RC &&
+	    qp_type != IB_QPT_SMI &&
+	    qp_type != IB_QPT_GSI) {
+		ehca_err(pd->device, "wrong QP Type=%x", qp_type);
 		return ERR_PTR(-EINVAL);
 		return ERR_PTR(-EINVAL);
 	}
 	}
 
 
+	if (is_llqp) {
+		switch (qp_type) {
+		case IB_QPT_RC:
+			if ((init_attr->cap.max_send_wr > 255) ||
+			    (init_attr->cap.max_recv_wr > 255)) {
+				ehca_err(pd->device,
+					 "Invalid Number of max_sq_wr=%x "
+					 "or max_rq_wr=%x for RC LLQP",
+					 init_attr->cap.max_send_wr,
+					 init_attr->cap.max_recv_wr);
+				return ERR_PTR(-EINVAL);
+			}
+			break;
+		case IB_QPT_UD:
+			if (!EHCA_BMASK_GET(HCA_CAP_UD_LL_QP, shca->hca_cap)) {
+				ehca_err(pd->device, "UD LLQP not supported "
+					 "by this adapter");
+				return ERR_PTR(-ENOSYS);
+			}
+			if (!(init_attr->cap.max_send_sge <= 5
+			    && init_attr->cap.max_send_sge >= 1
+			    && init_attr->cap.max_recv_sge <= 5
+			    && init_attr->cap.max_recv_sge >= 1)) {
+				ehca_err(pd->device,
+					 "Invalid Number of max_send_sge=%x "
+					 "or max_recv_sge=%x for UD LLQP",
+					 init_attr->cap.max_send_sge,
+					 init_attr->cap.max_recv_sge);
+				return ERR_PTR(-EINVAL);
+			} else if (init_attr->cap.max_send_wr > 255) {
+				ehca_err(pd->device,
+					 "Invalid Number of "
+					 "ax_send_wr=%x for UD QP_TYPE=%x",
+					 init_attr->cap.max_send_wr, qp_type);
+				return ERR_PTR(-EINVAL);
+			}
+			break;
+		default:
+			ehca_err(pd->device, "unsupported LL QP Type=%x",
+				 qp_type);
+			return ERR_PTR(-EINVAL);
+			break;
+		}
+	}
+
 	if (pd->uobject && udata)
 	if (pd->uobject && udata)
 		context = pd->uobject->context;
 		context = pd->uobject->context;
 
 
@@ -456,16 +493,17 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 		return ERR_PTR(-ENOMEM);
 		return ERR_PTR(-ENOMEM);
 	}
 	}
 
 
-	memset (&parms, 0, sizeof(struct ehca_alloc_qp_parms));
 	spin_lock_init(&my_qp->spinlock_s);
 	spin_lock_init(&my_qp->spinlock_s);
 	spin_lock_init(&my_qp->spinlock_r);
 	spin_lock_init(&my_qp->spinlock_r);
+	my_qp->qp_type = qp_type;
+	my_qp->ext_type = parms.ext_type;
 
 
-	my_qp->recv_cq =
-		container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
-	my_qp->send_cq =
-		container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
-
-	my_qp->init_attr = *init_attr;
+	if (init_attr->recv_cq)
+		my_qp->recv_cq =
+			container_of(init_attr->recv_cq, struct ehca_cq, ib_cq);
+	if (init_attr->send_cq)
+		my_qp->send_cq =
+			container_of(init_attr->send_cq, struct ehca_cq, ib_cq);
 
 
 	do {
 	do {
 		if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
 		if (!idr_pre_get(&ehca_qp_idr, GFP_KERNEL)) {
@@ -474,9 +512,9 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 			goto create_qp_exit0;
 			goto create_qp_exit0;
 		}
 		}
 
 
-		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		write_lock_irqsave(&ehca_qp_idr_lock, flags);
 		ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
 		ret = idr_get_new(&ehca_qp_idr, my_qp, &my_qp->token);
-		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+		write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
 
 
 	} while (ret == -EAGAIN);
 	} while (ret == -EAGAIN);
 
 
@@ -486,10 +524,10 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 		goto create_qp_exit0;
 		goto create_qp_exit0;
 	}
 	}
 
 
-	parms.servicetype = ibqptype2servicetype(init_attr->qp_type);
+	parms.servicetype = ibqptype2servicetype(qp_type);
 	if (parms.servicetype < 0) {
 	if (parms.servicetype < 0) {
 		ret = -EINVAL;
 		ret = -EINVAL;
-		ehca_err(pd->device, "Invalid qp_type=%x", init_attr->qp_type);
+		ehca_err(pd->device, "Invalid qp_type=%x", qp_type);
 		goto create_qp_exit0;
 		goto create_qp_exit0;
 	}
 	}
 
 
@@ -501,21 +539,25 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 	/* UD_AV CIRCUMVENTION */
 	/* UD_AV CIRCUMVENTION */
 	max_send_sge = init_attr->cap.max_send_sge;
 	max_send_sge = init_attr->cap.max_send_sge;
 	max_recv_sge = init_attr->cap.max_recv_sge;
 	max_recv_sge = init_attr->cap.max_recv_sge;
-	if (IB_QPT_UD == init_attr->qp_type ||
-	    IB_QPT_GSI == init_attr->qp_type ||
-	    IB_QPT_SMI == init_attr->qp_type) {
+	if (parms.servicetype == ST_UD && !is_llqp) {
 		max_send_sge += 2;
 		max_send_sge += 2;
 		max_recv_sge += 2;
 		max_recv_sge += 2;
 	}
 	}
 
 
-	parms.ipz_eq_handle = shca->eq.ipz_eq_handle;
-	parms.daqp_ctrl = isdaqp | daqp_completion;
+	parms.token = my_qp->token;
+	parms.eq_handle = shca->eq.ipz_eq_handle;
 	parms.pd = my_pd->fw_pd;
 	parms.pd = my_pd->fw_pd;
-	parms.max_recv_sge = max_recv_sge;
-	parms.max_send_sge = max_send_sge;
+	if (my_qp->send_cq)
+		parms.send_cq_handle = my_qp->send_cq->ipz_cq_handle;
+	if (my_qp->recv_cq)
+		parms.recv_cq_handle = my_qp->recv_cq->ipz_cq_handle;
 
 
-	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, my_qp, &parms);
+	parms.max_send_wr = init_attr->cap.max_send_wr;
+	parms.max_recv_wr = init_attr->cap.max_recv_wr;
+	parms.max_send_sge = max_send_sge;
+	parms.max_recv_sge = max_recv_sge;
 
 
+	h_ret = hipz_h_alloc_resource_qp(shca->ipz_hca_handle, &parms);
 	if (h_ret != H_SUCCESS) {
 	if (h_ret != H_SUCCESS) {
 		ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
 		ehca_err(pd->device, "h_alloc_resource_qp() failed h_ret=%lx",
 			 h_ret);
 			 h_ret);
@@ -523,18 +565,20 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 		goto create_qp_exit1;
 		goto create_qp_exit1;
 	}
 	}
 
 
-	my_qp->ib_qp.qp_num = my_qp->real_qp_num;
+	ib_qp_num = my_qp->real_qp_num = parms.real_qp_num;
+	my_qp->ipz_qp_handle = parms.qp_handle;
+	my_qp->galpas = parms.galpas;
 
 
-	switch (init_attr->qp_type) {
+	switch (qp_type) {
 	case IB_QPT_RC:
 	case IB_QPT_RC:
-	        if (isdaqp == 0) {
+		if (!is_llqp) {
 			swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
 			swqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
 					     (parms.act_nr_send_sges)]);
 					     (parms.act_nr_send_sges)]);
 			rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
 			rwqe_size = offsetof(struct ehca_wqe, u.nud.sg_list[
 					     (parms.act_nr_recv_sges)]);
 					     (parms.act_nr_recv_sges)]);
-		} else { /* for daqp we need to use msg size, not wqe size */
-		        swqe_size = da_rc_msg_size[max_send_sge];
-			rwqe_size = da_rc_msg_size[max_recv_sge];
+		} else { /* for LLQP we need to use msg size, not wqe size */
+			swqe_size = ll_qp_msg_size(max_send_sge);
+			rwqe_size = ll_qp_msg_size(max_recv_sge);
 			parms.act_nr_send_sges = 1;
 			parms.act_nr_send_sges = 1;
 			parms.act_nr_recv_sges = 1;
 			parms.act_nr_recv_sges = 1;
 		}
 		}
@@ -549,29 +593,27 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 	case IB_QPT_UD:
 	case IB_QPT_UD:
 	case IB_QPT_GSI:
 	case IB_QPT_GSI:
 	case IB_QPT_SMI:
 	case IB_QPT_SMI:
-		/* UD circumvention */
-		parms.act_nr_recv_sges -= 2;
-		parms.act_nr_send_sges -= 2;
-		if (isdaqp) {
-		        swqe_size = da_ud_sq_msg_size[max_send_sge];
-			rwqe_size = da_rc_msg_size[max_recv_sge];
+		if (is_llqp) {
+			swqe_size = ll_qp_msg_size(parms.act_nr_send_sges);
+			rwqe_size = ll_qp_msg_size(parms.act_nr_recv_sges);
 			parms.act_nr_send_sges = 1;
 			parms.act_nr_send_sges = 1;
 			parms.act_nr_recv_sges = 1;
 			parms.act_nr_recv_sges = 1;
 		} else {
 		} else {
+			/* UD circumvention */
+			parms.act_nr_send_sges -= 2;
+			parms.act_nr_recv_sges -= 2;
 			swqe_size = offsetof(struct ehca_wqe,
 			swqe_size = offsetof(struct ehca_wqe,
 					     u.ud_av.sg_list[parms.act_nr_send_sges]);
 					     u.ud_av.sg_list[parms.act_nr_send_sges]);
 			rwqe_size = offsetof(struct ehca_wqe,
 			rwqe_size = offsetof(struct ehca_wqe,
 					     u.ud_av.sg_list[parms.act_nr_recv_sges]);
 					     u.ud_av.sg_list[parms.act_nr_recv_sges]);
 		}
 		}
 
 
-		if (IB_QPT_GSI == init_attr->qp_type ||
-		    IB_QPT_SMI == init_attr->qp_type) {
+		if (IB_QPT_GSI == qp_type || IB_QPT_SMI == qp_type) {
 			parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
 			parms.act_nr_send_wqes = init_attr->cap.max_send_wr;
 			parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
 			parms.act_nr_recv_wqes = init_attr->cap.max_recv_wr;
 			parms.act_nr_send_sges = init_attr->cap.max_send_sge;
 			parms.act_nr_send_sges = init_attr->cap.max_send_sge;
 			parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
 			parms.act_nr_recv_sges = init_attr->cap.max_recv_sge;
-			my_qp->ib_qp.qp_num =
-				(init_attr->qp_type == IB_QPT_SMI) ? 0 : 1;
+			ib_qp_num = (qp_type == IB_QPT_SMI) ? 0 : 1;
 		}
 		}
 
 
 		break;
 		break;
@@ -580,108 +622,234 @@ struct ib_qp *ehca_create_qp(struct ib_pd *pd,
 		break;
 		break;
 	}
 	}
 
 
-	/* initializes r/squeue and registers queue pages */
-	ret = init_qp_queues(shca, my_qp,
-			     parms.nr_sq_pages, parms.nr_rq_pages,
-			     swqe_size, rwqe_size,
-			     parms.act_nr_send_sges, parms.act_nr_recv_sges);
-	if (ret) {
-		ehca_err(pd->device,
-			 "Couldn't initialize r/squeue and pages ret=%x", ret);
-		goto create_qp_exit2;
+	/* initialize r/squeue and register queue pages */
+	if (HAS_SQ(my_qp)) {
+		ret = init_qp_queue(
+			shca, my_qp, &my_qp->ipz_squeue, 0,
+			HAS_RQ(my_qp) ? H_PAGE_REGISTERED : H_SUCCESS,
+			parms.nr_sq_pages, swqe_size,
+			parms.act_nr_send_sges);
+		if (ret) {
+			ehca_err(pd->device, "Couldn't initialize squeue "
+				 "and pages  ret=%x", ret);
+			goto create_qp_exit2;
+		}
 	}
 	}
 
 
-	my_qp->ib_qp.pd = &my_pd->ib_pd;
-	my_qp->ib_qp.device = my_pd->ib_pd.device;
+	if (HAS_RQ(my_qp)) {
+		ret = init_qp_queue(
+			shca, my_qp, &my_qp->ipz_rqueue, 1,
+			H_SUCCESS, parms.nr_rq_pages, rwqe_size,
+			parms.act_nr_recv_sges);
+		if (ret) {
+			ehca_err(pd->device, "Couldn't initialize rqueue "
+				 "and pages ret=%x", ret);
+			goto create_qp_exit3;
+		}
+	}
 
 
-	my_qp->ib_qp.recv_cq = init_attr->recv_cq;
-	my_qp->ib_qp.send_cq = init_attr->send_cq;
+	if (is_srq) {
+		my_qp->ib_srq.pd = &my_pd->ib_pd;
+		my_qp->ib_srq.device = my_pd->ib_pd.device;
 
 
-	my_qp->ib_qp.qp_type = init_attr->qp_type;
+		my_qp->ib_srq.srq_context = init_attr->qp_context;
+		my_qp->ib_srq.event_handler = init_attr->event_handler;
+	} else {
+		my_qp->ib_qp.qp_num = ib_qp_num;
+		my_qp->ib_qp.pd = &my_pd->ib_pd;
+		my_qp->ib_qp.device = my_pd->ib_pd.device;
+
+		my_qp->ib_qp.recv_cq = init_attr->recv_cq;
+		my_qp->ib_qp.send_cq = init_attr->send_cq;
 
 
-	my_qp->qp_type = init_attr->qp_type;
-	my_qp->ib_qp.srq = init_attr->srq;
+		my_qp->ib_qp.qp_type = qp_type;
+		my_qp->ib_qp.srq = init_attr->srq;
 
 
-	my_qp->ib_qp.qp_context = init_attr->qp_context;
-	my_qp->ib_qp.event_handler = init_attr->event_handler;
+		my_qp->ib_qp.qp_context = init_attr->qp_context;
+		my_qp->ib_qp.event_handler = init_attr->event_handler;
+	}
 
 
 	init_attr->cap.max_inline_data = 0; /* not supported yet */
 	init_attr->cap.max_inline_data = 0; /* not supported yet */
 	init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
 	init_attr->cap.max_recv_sge = parms.act_nr_recv_sges;
 	init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
 	init_attr->cap.max_recv_wr = parms.act_nr_recv_wqes;
 	init_attr->cap.max_send_sge = parms.act_nr_send_sges;
 	init_attr->cap.max_send_sge = parms.act_nr_send_sges;
 	init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
 	init_attr->cap.max_send_wr = parms.act_nr_send_wqes;
+	my_qp->init_attr = *init_attr;
 
 
 	/* NOTE: define_apq0() not supported yet */
 	/* NOTE: define_apq0() not supported yet */
-	if (init_attr->qp_type == IB_QPT_GSI) {
+	if (qp_type == IB_QPT_GSI) {
 		h_ret = ehca_define_sqp(shca, my_qp, init_attr);
 		h_ret = ehca_define_sqp(shca, my_qp, init_attr);
 		if (h_ret != H_SUCCESS) {
 		if (h_ret != H_SUCCESS) {
 			ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
 			ehca_err(pd->device, "ehca_define_sqp() failed rc=%lx",
 				 h_ret);
 				 h_ret);
 			ret = ehca2ib_return_code(h_ret);
 			ret = ehca2ib_return_code(h_ret);
-			goto create_qp_exit3;
+			goto create_qp_exit4;
 		}
 		}
 	}
 	}
-	if (init_attr->send_cq) {
-		struct ehca_cq *cq = container_of(init_attr->send_cq,
-						  struct ehca_cq, ib_cq);
-		ret = ehca_cq_assign_qp(cq, my_qp);
+
+	if (my_qp->send_cq) {
+		ret = ehca_cq_assign_qp(my_qp->send_cq, my_qp);
 		if (ret) {
 		if (ret) {
 			ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
 			ehca_err(pd->device, "Couldn't assign qp to send_cq ret=%x",
 				 ret);
 				 ret);
-			goto create_qp_exit3;
+			goto create_qp_exit4;
 		}
 		}
-		my_qp->send_cq = cq;
 	}
 	}
+
 	/* copy queues, galpa data to user space */
 	/* copy queues, galpa data to user space */
 	if (context && udata) {
 	if (context && udata) {
-		struct ipz_queue *ipz_rqueue = &my_qp->ipz_rqueue;
-		struct ipz_queue *ipz_squeue = &my_qp->ipz_squeue;
 		struct ehca_create_qp_resp resp;
 		struct ehca_create_qp_resp resp;
 		memset(&resp, 0, sizeof(resp));
 		memset(&resp, 0, sizeof(resp));
 
 
 		resp.qp_num = my_qp->real_qp_num;
 		resp.qp_num = my_qp->real_qp_num;
 		resp.token = my_qp->token;
 		resp.token = my_qp->token;
 		resp.qp_type = my_qp->qp_type;
 		resp.qp_type = my_qp->qp_type;
+		resp.ext_type = my_qp->ext_type;
 		resp.qkey = my_qp->qkey;
 		resp.qkey = my_qp->qkey;
 		resp.real_qp_num = my_qp->real_qp_num;
 		resp.real_qp_num = my_qp->real_qp_num;
-		/* rqueue properties */
-		resp.ipz_rqueue.qe_size = ipz_rqueue->qe_size;
-		resp.ipz_rqueue.act_nr_of_sg = ipz_rqueue->act_nr_of_sg;
-		resp.ipz_rqueue.queue_length = ipz_rqueue->queue_length;
-		resp.ipz_rqueue.pagesize = ipz_rqueue->pagesize;
-		resp.ipz_rqueue.toggle_state = ipz_rqueue->toggle_state;
-		/* squeue properties */
-		resp.ipz_squeue.qe_size = ipz_squeue->qe_size;
-		resp.ipz_squeue.act_nr_of_sg = ipz_squeue->act_nr_of_sg;
-		resp.ipz_squeue.queue_length = ipz_squeue->queue_length;
-		resp.ipz_squeue.pagesize = ipz_squeue->pagesize;
-		resp.ipz_squeue.toggle_state = ipz_squeue->toggle_state;
+		if (HAS_SQ(my_qp))
+			queue2resp(&resp.ipz_squeue, &my_qp->ipz_squeue);
+		if (HAS_RQ(my_qp))
+			queue2resp(&resp.ipz_rqueue, &my_qp->ipz_rqueue);
+
 		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
 		if (ib_copy_to_udata(udata, &resp, sizeof resp)) {
 			ehca_err(pd->device, "Copy to udata failed");
 			ehca_err(pd->device, "Copy to udata failed");
 			ret = -EINVAL;
 			ret = -EINVAL;
-			goto create_qp_exit3;
+			goto create_qp_exit4;
 		}
 		}
 	}
 	}
 
 
-	return &my_qp->ib_qp;
+	return my_qp;
+
+create_qp_exit4:
+	if (HAS_RQ(my_qp))
+		ipz_queue_dtor(&my_qp->ipz_rqueue);
 
 
 create_qp_exit3:
 create_qp_exit3:
-	ipz_queue_dtor(&my_qp->ipz_rqueue);
-	ipz_queue_dtor(&my_qp->ipz_squeue);
+	if (HAS_SQ(my_qp))
+		ipz_queue_dtor(&my_qp->ipz_squeue);
 
 
 create_qp_exit2:
 create_qp_exit2:
 	hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
 	hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
 
 
 create_qp_exit1:
 create_qp_exit1:
-	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	write_lock_irqsave(&ehca_qp_idr_lock, flags);
 	idr_remove(&ehca_qp_idr, my_qp->token);
 	idr_remove(&ehca_qp_idr, my_qp->token);
-	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
 
 
 create_qp_exit0:
 create_qp_exit0:
 	kmem_cache_free(qp_cache, my_qp);
 	kmem_cache_free(qp_cache, my_qp);
 	return ERR_PTR(ret);
 	return ERR_PTR(ret);
 }
 }
 
 
+struct ib_qp *ehca_create_qp(struct ib_pd *pd,
+			     struct ib_qp_init_attr *qp_init_attr,
+			     struct ib_udata *udata)
+{
+	struct ehca_qp *ret;
+
+	ret = internal_create_qp(pd, qp_init_attr, NULL, udata, 0);
+	return IS_ERR(ret) ? (struct ib_qp *) ret : &ret->ib_qp;
+}
+
+int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+			struct ib_uobject *uobject);
+
+struct ib_srq *ehca_create_srq(struct ib_pd *pd,
+			       struct ib_srq_init_attr *srq_init_attr,
+			       struct ib_udata *udata)
+{
+	struct ib_qp_init_attr qp_init_attr;
+	struct ehca_qp *my_qp;
+	struct ib_srq *ret;
+	struct ehca_shca *shca = container_of(pd->device, struct ehca_shca,
+					      ib_device);
+	struct hcp_modify_qp_control_block *mqpcb;
+	u64 hret, update_mask;
+
+	/* For common attributes, internal_create_qp() takes its info
+	 * out of qp_init_attr, so copy all common attrs there.
+	 */
+	memset(&qp_init_attr, 0, sizeof(qp_init_attr));
+	qp_init_attr.event_handler = srq_init_attr->event_handler;
+	qp_init_attr.qp_context = srq_init_attr->srq_context;
+	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
+	qp_init_attr.qp_type = IB_QPT_RC;
+	qp_init_attr.cap.max_recv_wr = srq_init_attr->attr.max_wr;
+	qp_init_attr.cap.max_recv_sge = srq_init_attr->attr.max_sge;
+
+	my_qp = internal_create_qp(pd, &qp_init_attr, srq_init_attr, udata, 1);
+	if (IS_ERR(my_qp))
+		return (struct ib_srq *) my_qp;
+
+	/* copy back return values */
+	srq_init_attr->attr.max_wr = qp_init_attr.cap.max_recv_wr;
+	srq_init_attr->attr.max_sge = qp_init_attr.cap.max_recv_sge;
+
+	/* drive SRQ into RTR state */
+	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+	if (!mqpcb) {
+		ehca_err(pd->device, "Could not get zeroed page for mqpcb "
+			 "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+		ret = ERR_PTR(-ENOMEM);
+		goto create_srq1;
+	}
+
+	mqpcb->qp_state = EHCA_QPS_INIT;
+	mqpcb->prim_phys_port = 1;
+	update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+	hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+				my_qp->ipz_qp_handle,
+				&my_qp->pf,
+				update_mask,
+				mqpcb, my_qp->galpas.kernel);
+	if (hret != H_SUCCESS) {
+		ehca_err(pd->device, "Could not modify SRQ to INIT"
+			 "ehca_qp=%p qp_num=%x hret=%lx",
+			 my_qp, my_qp->real_qp_num, hret);
+		goto create_srq2;
+	}
+
+	mqpcb->qp_enable = 1;
+	update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_ENABLE, 1);
+	hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+				my_qp->ipz_qp_handle,
+				&my_qp->pf,
+				update_mask,
+				mqpcb, my_qp->galpas.kernel);
+	if (hret != H_SUCCESS) {
+		ehca_err(pd->device, "Could not enable SRQ"
+			 "ehca_qp=%p qp_num=%x hret=%lx",
+			 my_qp, my_qp->real_qp_num, hret);
+		goto create_srq2;
+	}
+
+	mqpcb->qp_state  = EHCA_QPS_RTR;
+	update_mask = EHCA_BMASK_SET(MQPCB_MASK_QP_STATE, 1);
+	hret = hipz_h_modify_qp(shca->ipz_hca_handle,
+				my_qp->ipz_qp_handle,
+				&my_qp->pf,
+				update_mask,
+				mqpcb, my_qp->galpas.kernel);
+	if (hret != H_SUCCESS) {
+		ehca_err(pd->device, "Could not modify SRQ to RTR"
+			 "ehca_qp=%p qp_num=%x hret=%lx",
+			 my_qp, my_qp->real_qp_num, hret);
+		goto create_srq2;
+	}
+
+	return &my_qp->ib_srq;
+
+create_srq2:
+	ret = ERR_PTR(ehca2ib_return_code(hret));
+	ehca_free_fw_ctrlblock(mqpcb);
+
+create_srq1:
+	internal_destroy_qp(pd->device, my_qp, my_qp->ib_srq.uobject);
+
+	return ret;
+}
+
 /*
 /*
  * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
  * prepare_sqe_rts called by internal_modify_qp() at trans sqe -> rts
  * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
  * set purge bit of bad wqe and subsequent wqes to avoid reentering sqe
@@ -765,7 +933,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 	u64 h_ret;
 	u64 h_ret;
 	int bad_wqe_cnt = 0;
 	int bad_wqe_cnt = 0;
 	int squeue_locked = 0;
 	int squeue_locked = 0;
-	unsigned long spl_flags = 0;
+	unsigned long flags = 0;
 
 
 	/* do query_qp to obtain current attr values */
 	/* do query_qp to obtain current attr values */
 	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
 	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
@@ -886,6 +1054,17 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 		 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
 		 "ehca_qp=%p qp_num=%x <VALID STATE CHANGE> qp_state_xsit=%x",
 		 my_qp, ibqp->qp_num, statetrans);
 		 my_qp, ibqp->qp_num, statetrans);
 
 
+	/* eHCA2 rev2 and higher require the SEND_GRH_FLAG to be set
+	 * in non-LL UD QPs.
+	 */
+	if ((my_qp->qp_type == IB_QPT_UD) &&
+	    (my_qp->ext_type != EQPT_LLQP) &&
+	    (statetrans == IB_QPST_INIT2RTR) &&
+	    (shca->hw_level >= 0x22)) {
+		update_mask |= EHCA_BMASK_SET(MQPCB_MASK_SEND_GRH_FLAG, 1);
+		mqpcb->send_grh_flag = 1;
+	}
+
 	/* sqe -> rts: set purge bit of bad wqe before actual trans */
 	/* sqe -> rts: set purge bit of bad wqe before actual trans */
 	if ((my_qp->qp_type == IB_QPT_UD ||
 	if ((my_qp->qp_type == IB_QPT_UD ||
 	     my_qp->qp_type == IB_QPT_GSI ||
 	     my_qp->qp_type == IB_QPT_GSI ||
@@ -895,7 +1074,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 		if (!ibqp->uobject) {
 		if (!ibqp->uobject) {
 			struct ehca_wqe *wqe;
 			struct ehca_wqe *wqe;
 			/* lock send queue */
 			/* lock send queue */
-			spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+			spin_lock_irqsave(&my_qp->spinlock_s, flags);
 			squeue_locked = 1;
 			squeue_locked = 1;
 			/* mark next free wqe */
 			/* mark next free wqe */
 			wqe = (struct ehca_wqe*)
 			wqe = (struct ehca_wqe*)
@@ -1181,7 +1360,7 @@ static int internal_modify_qp(struct ib_qp *ibqp,
 
 
 modify_qp_exit2:
 modify_qp_exit2:
 	if (squeue_locked) { /* this means: sqe -> rts */
 	if (squeue_locked) { /* this means: sqe -> rts */
-		spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
+		spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
 		my_qp->sqerr_purgeflag = 1;
 		my_qp->sqerr_purgeflag = 1;
 	}
 	}
 
 
@@ -1312,6 +1491,9 @@ int ehca_query_qp(struct ib_qp *qp,
 	qp_attr->alt_port_num = qpcb->alt_phys_port;
 	qp_attr->alt_port_num = qpcb->alt_phys_port;
 	qp_attr->alt_timeout = qpcb->timeout_al;
 	qp_attr->alt_timeout = qpcb->timeout_al;
 
 
+	qp_attr->max_dest_rd_atomic = qpcb->rdma_nr_atomic_resp_res;
+	qp_attr->max_rd_atomic = qpcb->rdma_atomic_outst_dest_qp;
+
 	/* primary av */
 	/* primary av */
 	qp_attr->ah_attr.sl = qpcb->service_level;
 	qp_attr->ah_attr.sl = qpcb->service_level;
 
 
@@ -1367,53 +1549,170 @@ query_qp_exit1:
 	return ret;
 	return ret;
 }
 }
 
 
-int ehca_destroy_qp(struct ib_qp *ibqp)
+int ehca_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		    enum ib_srq_attr_mask attr_mask, struct ib_udata *udata)
 {
 {
-	struct ehca_qp *my_qp = container_of(ibqp, struct ehca_qp, ib_qp);
-	struct ehca_shca *shca = container_of(ibqp->device, struct ehca_shca,
+	struct ehca_qp *my_qp =
+		container_of(ibsrq, struct ehca_qp, ib_srq);
+	struct ehca_pd *my_pd =
+		container_of(ibsrq->pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca =
+		container_of(ibsrq->pd->device, struct ehca_shca, ib_device);
+	struct hcp_modify_qp_control_block *mqpcb;
+	u64 update_mask;
+	u64 h_ret;
+	int ret = 0;
+
+	u32 cur_pid = current->tgid;
+	if (my_pd->ib_pd.uobject && my_pd->ib_pd.uobject->context &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(ibsrq->pd->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	mqpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+	if (!mqpcb) {
+		ehca_err(ibsrq->device, "Could not get zeroed page for mqpcb "
+			 "ehca_qp=%p qp_num=%x ", my_qp, my_qp->real_qp_num);
+		return -ENOMEM;
+	}
+
+	update_mask = 0;
+	if (attr_mask & IB_SRQ_LIMIT) {
+		attr_mask &= ~IB_SRQ_LIMIT;
+		update_mask |=
+			EHCA_BMASK_SET(MQPCB_MASK_CURR_SRQ_LIMIT, 1)
+			| EHCA_BMASK_SET(MQPCB_MASK_QP_AFF_ASYN_EV_LOG_REG, 1);
+		mqpcb->curr_srq_limit =
+			EHCA_BMASK_SET(MQPCB_CURR_SRQ_LIMIT, attr->srq_limit);
+		mqpcb->qp_aff_asyn_ev_log_reg =
+			EHCA_BMASK_SET(QPX_AAELOG_RESET_SRQ_LIMIT, 1);
+	}
+
+	/* by now, all bits in attr_mask should have been cleared */
+	if (attr_mask) {
+		ehca_err(ibsrq->device, "invalid attribute mask bits set  "
+			 "attr_mask=%x", attr_mask);
+		ret = -EINVAL;
+		goto modify_srq_exit0;
+	}
+
+	if (ehca_debug_level)
+		ehca_dmp(mqpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+	h_ret = hipz_h_modify_qp(shca->ipz_hca_handle, my_qp->ipz_qp_handle,
+				 NULL, update_mask, mqpcb,
+				 my_qp->galpas.kernel);
+
+	if (h_ret != H_SUCCESS) {
+		ret = ehca2ib_return_code(h_ret);
+		ehca_err(ibsrq->device, "hipz_h_modify_qp() failed rc=%lx "
+			 "ehca_qp=%p qp_num=%x",
+			 h_ret, my_qp, my_qp->real_qp_num);
+	}
+
+modify_srq_exit0:
+	ehca_free_fw_ctrlblock(mqpcb);
+
+	return ret;
+}
+
+int ehca_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr)
+{
+	struct ehca_qp *my_qp = container_of(srq, struct ehca_qp, ib_srq);
+	struct ehca_pd *my_pd = container_of(srq->pd, struct ehca_pd, ib_pd);
+	struct ehca_shca *shca = container_of(srq->device, struct ehca_shca,
 					      ib_device);
 					      ib_device);
+	struct ipz_adapter_handle adapter_handle = shca->ipz_hca_handle;
+	struct hcp_modify_qp_control_block *qpcb;
+	u32 cur_pid = current->tgid;
+	int ret = 0;
+	u64 h_ret;
+
+	if (my_pd->ib_pd.uobject  && my_pd->ib_pd.uobject->context  &&
+	    my_pd->ownpid != cur_pid) {
+		ehca_err(srq->device, "Invalid caller pid=%x ownpid=%x",
+			 cur_pid, my_pd->ownpid);
+		return -EINVAL;
+	}
+
+	qpcb = ehca_alloc_fw_ctrlblock(GFP_KERNEL);
+	if (!qpcb) {
+		ehca_err(srq->device, "Out of memory for qpcb "
+			 "ehca_qp=%p qp_num=%x", my_qp, my_qp->real_qp_num);
+		return -ENOMEM;
+	}
+
+	h_ret = hipz_h_query_qp(adapter_handle, my_qp->ipz_qp_handle,
+				NULL, qpcb, my_qp->galpas.kernel);
+
+	if (h_ret != H_SUCCESS) {
+		ret = ehca2ib_return_code(h_ret);
+		ehca_err(srq->device, "hipz_h_query_qp() failed "
+			 "ehca_qp=%p qp_num=%x h_ret=%lx",
+			 my_qp, my_qp->real_qp_num, h_ret);
+		goto query_srq_exit1;
+	}
+
+	srq_attr->max_wr = qpcb->max_nr_outst_recv_wr - 1;
+	srq_attr->srq_limit = EHCA_BMASK_GET(
+		MQPCB_CURR_SRQ_LIMIT, qpcb->curr_srq_limit);
+
+	if (ehca_debug_level)
+		ehca_dmp(qpcb, 4*70, "qp_num=%x", my_qp->real_qp_num);
+
+query_srq_exit1:
+	ehca_free_fw_ctrlblock(qpcb);
+
+	return ret;
+}
+
+int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+			struct ib_uobject *uobject)
+{
+	struct ehca_shca *shca = container_of(dev, struct ehca_shca, ib_device);
 	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
 	struct ehca_pd *my_pd = container_of(my_qp->ib_qp.pd, struct ehca_pd,
 					     ib_pd);
 					     ib_pd);
 	u32 cur_pid = current->tgid;
 	u32 cur_pid = current->tgid;
-	u32 qp_num = ibqp->qp_num;
+	u32 qp_num = my_qp->real_qp_num;
 	int ret;
 	int ret;
 	u64 h_ret;
 	u64 h_ret;
 	u8 port_num;
 	u8 port_num;
 	enum ib_qp_type	qp_type;
 	enum ib_qp_type	qp_type;
 	unsigned long flags;
 	unsigned long flags;
 
 
-	if (ibqp->uobject) {
+	if (uobject) {
 		if (my_qp->mm_count_galpa ||
 		if (my_qp->mm_count_galpa ||
 		    my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
 		    my_qp->mm_count_rqueue || my_qp->mm_count_squeue) {
-			ehca_err(ibqp->device, "Resources still referenced in "
-				 "user space qp_num=%x", ibqp->qp_num);
+			ehca_err(dev, "Resources still referenced in "
+				 "user space qp_num=%x", qp_num);
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 		if (my_pd->ownpid != cur_pid) {
 		if (my_pd->ownpid != cur_pid) {
-			ehca_err(ibqp->device, "Invalid caller pid=%x ownpid=%x",
+			ehca_err(dev, "Invalid caller pid=%x ownpid=%x",
 				 cur_pid, my_pd->ownpid);
 				 cur_pid, my_pd->ownpid);
 			return -EINVAL;
 			return -EINVAL;
 		}
 		}
 	}
 	}
 
 
 	if (my_qp->send_cq) {
 	if (my_qp->send_cq) {
-		ret = ehca_cq_unassign_qp(my_qp->send_cq,
-					      my_qp->real_qp_num);
+		ret = ehca_cq_unassign_qp(my_qp->send_cq, qp_num);
 		if (ret) {
 		if (ret) {
-			ehca_err(ibqp->device, "Couldn't unassign qp from "
+			ehca_err(dev, "Couldn't unassign qp from "
 				 "send_cq ret=%x qp_num=%x cq_num=%x", ret,
 				 "send_cq ret=%x qp_num=%x cq_num=%x", ret,
-				 my_qp->ib_qp.qp_num, my_qp->send_cq->cq_number);
+				 qp_num, my_qp->send_cq->cq_number);
 			return ret;
 			return ret;
 		}
 		}
 	}
 	}
 
 
-	spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+	write_lock_irqsave(&ehca_qp_idr_lock, flags);
 	idr_remove(&ehca_qp_idr, my_qp->token);
 	idr_remove(&ehca_qp_idr, my_qp->token);
-	spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
 
 
 	h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
 	h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
 	if (h_ret != H_SUCCESS) {
 	if (h_ret != H_SUCCESS) {
-		ehca_err(ibqp->device, "hipz_h_destroy_qp() failed rc=%lx "
+		ehca_err(dev, "hipz_h_destroy_qp() failed rc=%lx "
 			 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
 			 "ehca_qp=%p qp_num=%x", h_ret, my_qp, qp_num);
 		return ehca2ib_return_code(h_ret);
 		return ehca2ib_return_code(h_ret);
 	}
 	}
@@ -1424,7 +1723,7 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
 	/* no support for IB_QPT_SMI yet */
 	/* no support for IB_QPT_SMI yet */
 	if (qp_type == IB_QPT_GSI) {
 	if (qp_type == IB_QPT_GSI) {
 		struct ib_event event;
 		struct ib_event event;
-		ehca_info(ibqp->device, "device %s: port %x is inactive.",
+		ehca_info(dev, "device %s: port %x is inactive.",
 			  shca->ib_device.name, port_num);
 			  shca->ib_device.name, port_num);
 		event.device = &shca->ib_device;
 		event.device = &shca->ib_device;
 		event.event = IB_EVENT_PORT_ERR;
 		event.event = IB_EVENT_PORT_ERR;
@@ -1433,12 +1732,28 @@ int ehca_destroy_qp(struct ib_qp *ibqp)
 		ib_dispatch_event(&event);
 		ib_dispatch_event(&event);
 	}
 	}
 
 
-	ipz_queue_dtor(&my_qp->ipz_rqueue);
-	ipz_queue_dtor(&my_qp->ipz_squeue);
+	if (HAS_RQ(my_qp))
+		ipz_queue_dtor(&my_qp->ipz_rqueue);
+	if (HAS_SQ(my_qp))
+		ipz_queue_dtor(&my_qp->ipz_squeue);
 	kmem_cache_free(qp_cache, my_qp);
 	kmem_cache_free(qp_cache, my_qp);
 	return 0;
 	return 0;
 }
 }
 
 
+int ehca_destroy_qp(struct ib_qp *qp)
+{
+	return internal_destroy_qp(qp->device,
+				   container_of(qp, struct ehca_qp, ib_qp),
+				   qp->uobject);
+}
+
+int ehca_destroy_srq(struct ib_srq *srq)
+{
+	return internal_destroy_qp(srq->device,
+				   container_of(srq, struct ehca_qp, ib_srq),
+				   srq->uobject);
+}
+
 int ehca_init_qp_cache(void)
 int ehca_init_qp_cache(void)
 {
 {
 	qp_cache = kmem_cache_create("ehca_cache_qp",
 	qp_cache = kmem_cache_create("ehca_cache_qp",

+ 56 - 29
drivers/infiniband/hw/ehca/ehca_reqs.c

@@ -3,8 +3,9 @@
  *
  *
  *  post_send/recv, poll_cq, req_notify
  *  post_send/recv, poll_cq, req_notify
  *
  *
- *  Authors: Waleri Fomin <fomin@de.ibm.com>
- *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *  Authors: Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Waleri Fomin <fomin@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
  *           Reinhard Ernst <rernst@de.ibm.com>
  *           Reinhard Ernst <rernst@de.ibm.com>
  *
  *
  *  Copyright (c) 2005 IBM Corporation
  *  Copyright (c) 2005 IBM Corporation
@@ -362,10 +363,10 @@ int ehca_post_send(struct ib_qp *qp,
 	struct ehca_wqe *wqe_p;
 	struct ehca_wqe *wqe_p;
 	int wqe_cnt = 0;
 	int wqe_cnt = 0;
 	int ret = 0;
 	int ret = 0;
-	unsigned long spl_flags;
+	unsigned long flags;
 
 
 	/* LOCK the QUEUE */
 	/* LOCK the QUEUE */
-	spin_lock_irqsave(&my_qp->spinlock_s, spl_flags);
+	spin_lock_irqsave(&my_qp->spinlock_s, flags);
 
 
 	/* loop processes list of send reqs */
 	/* loop processes list of send reqs */
 	for (cur_send_wr = send_wr; cur_send_wr != NULL;
 	for (cur_send_wr = send_wr; cur_send_wr != NULL;
@@ -406,26 +407,31 @@ int ehca_post_send(struct ib_qp *qp,
 	} /* eof for cur_send_wr */
 	} /* eof for cur_send_wr */
 
 
 post_send_exit0:
 post_send_exit0:
-	/* UNLOCK the QUEUE */
-	spin_unlock_irqrestore(&my_qp->spinlock_s, spl_flags);
 	iosync(); /* serialize GAL register access */
 	iosync(); /* serialize GAL register access */
 	hipz_update_sqa(my_qp, wqe_cnt);
 	hipz_update_sqa(my_qp, wqe_cnt);
+	spin_unlock_irqrestore(&my_qp->spinlock_s, flags);
 	return ret;
 	return ret;
 }
 }
 
 
-int ehca_post_recv(struct ib_qp *qp,
-		   struct ib_recv_wr *recv_wr,
-		   struct ib_recv_wr **bad_recv_wr)
+static int internal_post_recv(struct ehca_qp *my_qp,
+			      struct ib_device *dev,
+			      struct ib_recv_wr *recv_wr,
+			      struct ib_recv_wr **bad_recv_wr)
 {
 {
-	struct ehca_qp *my_qp = container_of(qp, struct ehca_qp, ib_qp);
 	struct ib_recv_wr *cur_recv_wr;
 	struct ib_recv_wr *cur_recv_wr;
 	struct ehca_wqe *wqe_p;
 	struct ehca_wqe *wqe_p;
 	int wqe_cnt = 0;
 	int wqe_cnt = 0;
 	int ret = 0;
 	int ret = 0;
-	unsigned long spl_flags;
+	unsigned long flags;
+
+	if (unlikely(!HAS_RQ(my_qp))) {
+		ehca_err(dev, "QP has no RQ  ehca_qp=%p qp_num=%x ext_type=%d",
+			 my_qp, my_qp->real_qp_num, my_qp->ext_type);
+		return -ENODEV;
+	}
 
 
 	/* LOCK the QUEUE */
 	/* LOCK the QUEUE */
-	spin_lock_irqsave(&my_qp->spinlock_r, spl_flags);
+	spin_lock_irqsave(&my_qp->spinlock_r, flags);
 
 
 	/* loop processes list of send reqs */
 	/* loop processes list of send reqs */
 	for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
 	for (cur_recv_wr = recv_wr; cur_recv_wr != NULL;
@@ -439,8 +445,8 @@ int ehca_post_recv(struct ib_qp *qp,
 				*bad_recv_wr = cur_recv_wr;
 				*bad_recv_wr = cur_recv_wr;
 			if (wqe_cnt == 0) {
 			if (wqe_cnt == 0) {
 				ret = -ENOMEM;
 				ret = -ENOMEM;
-				ehca_err(qp->device, "Too many posted WQEs "
-					 "qp_num=%x", qp->qp_num);
+				ehca_err(dev, "Too many posted WQEs "
+					 "qp_num=%x", my_qp->real_qp_num);
 			}
 			}
 			goto post_recv_exit0;
 			goto post_recv_exit0;
 		}
 		}
@@ -455,23 +461,39 @@ int ehca_post_recv(struct ib_qp *qp,
 			*bad_recv_wr = cur_recv_wr;
 			*bad_recv_wr = cur_recv_wr;
 			if (wqe_cnt == 0) {
 			if (wqe_cnt == 0) {
 				ret = -EINVAL;
 				ret = -EINVAL;
-				ehca_err(qp->device, "Could not write WQE "
-					 "qp_num=%x", qp->qp_num);
+				ehca_err(dev, "Could not write WQE "
+					 "qp_num=%x", my_qp->real_qp_num);
 			}
 			}
 			goto post_recv_exit0;
 			goto post_recv_exit0;
 		}
 		}
 		wqe_cnt++;
 		wqe_cnt++;
-		ehca_gen_dbg("ehca_qp=%p qp_num=%x wqe_cnt=%d",
-		     my_qp, qp->qp_num, wqe_cnt);
+		ehca_dbg(dev, "ehca_qp=%p qp_num=%x wqe_cnt=%d",
+			 my_qp, my_qp->real_qp_num, wqe_cnt);
 	} /* eof for cur_recv_wr */
 	} /* eof for cur_recv_wr */
 
 
 post_recv_exit0:
 post_recv_exit0:
-	spin_unlock_irqrestore(&my_qp->spinlock_r, spl_flags);
 	iosync(); /* serialize GAL register access */
 	iosync(); /* serialize GAL register access */
 	hipz_update_rqa(my_qp, wqe_cnt);
 	hipz_update_rqa(my_qp, wqe_cnt);
+	spin_unlock_irqrestore(&my_qp->spinlock_r, flags);
 	return ret;
 	return ret;
 }
 }
 
 
+int ehca_post_recv(struct ib_qp *qp,
+		   struct ib_recv_wr *recv_wr,
+		   struct ib_recv_wr **bad_recv_wr)
+{
+	return internal_post_recv(container_of(qp, struct ehca_qp, ib_qp),
+				  qp->device, recv_wr, bad_recv_wr);
+}
+
+int ehca_post_srq_recv(struct ib_srq *srq,
+		       struct ib_recv_wr *recv_wr,
+		       struct ib_recv_wr **bad_recv_wr)
+{
+	return internal_post_recv(container_of(srq, struct ehca_qp, ib_srq),
+				  srq->device, recv_wr, bad_recv_wr);
+}
+
 /*
 /*
  * ib_wc_opcode table converts ehca wc opcode to ib
  * ib_wc_opcode table converts ehca wc opcode to ib
  * Since we use zero to indicate invalid opcode, the actual ib opcode must
  * Since we use zero to indicate invalid opcode, the actual ib opcode must
@@ -494,6 +516,7 @@ static inline int ehca_poll_cq_one(struct ib_cq *cq, struct ib_wc *wc)
 	int ret = 0;
 	int ret = 0;
 	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
 	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
 	struct ehca_cqe *cqe;
 	struct ehca_cqe *cqe;
+	struct ehca_qp *my_qp;
 	int cqe_count = 0;
 	int cqe_count = 0;
 
 
 poll_cq_one_read_cqe:
 poll_cq_one_read_cqe:
@@ -513,7 +536,7 @@ poll_cq_one_read_cqe:
 	if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
 	if (unlikely(cqe->status & WC_STATUS_PURGE_BIT)) {
 		struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
 		struct ehca_qp *qp=ehca_cq_get_qp(my_cq, cqe->local_qp_number);
 		int purgeflag;
 		int purgeflag;
-		unsigned long spl_flags;
+		unsigned long flags;
 		if (!qp) {
 		if (!qp) {
 			ehca_err(cq->device, "cq_num=%x qp_num=%x "
 			ehca_err(cq->device, "cq_num=%x qp_num=%x "
 				 "could not find qp -> ignore cqe",
 				 "could not find qp -> ignore cqe",
@@ -523,9 +546,9 @@ poll_cq_one_read_cqe:
 			/* ignore this purged cqe */
 			/* ignore this purged cqe */
 			goto poll_cq_one_read_cqe;
 			goto poll_cq_one_read_cqe;
 		}
 		}
-		spin_lock_irqsave(&qp->spinlock_s, spl_flags);
+		spin_lock_irqsave(&qp->spinlock_s, flags);
 		purgeflag = qp->sqerr_purgeflag;
 		purgeflag = qp->sqerr_purgeflag;
-		spin_unlock_irqrestore(&qp->spinlock_s, spl_flags);
+		spin_unlock_irqrestore(&qp->spinlock_s, flags);
 
 
 		if (purgeflag) {
 		if (purgeflag) {
 			ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
 			ehca_dbg(cq->device, "Got CQE with purged bit qp_num=%x "
@@ -545,7 +568,7 @@ poll_cq_one_read_cqe:
 	}
 	}
 
 
 	/* tracing cqe */
 	/* tracing cqe */
-	if (ehca_debug_level) {
+	if (unlikely(ehca_debug_level)) {
 		ehca_dbg(cq->device,
 		ehca_dbg(cq->device,
 			 "Received COMPLETION ehca_cq=%p cq_num=%x -----",
 			 "Received COMPLETION ehca_cq=%p cq_num=%x -----",
 			 my_cq, my_cq->cq_number);
 			 my_cq, my_cq->cq_number);
@@ -579,7 +602,11 @@ poll_cq_one_read_cqe:
 	} else
 	} else
 		wc->status = IB_WC_SUCCESS;
 		wc->status = IB_WC_SUCCESS;
 
 
-	wc->qp = NULL;
+	read_lock(&ehca_qp_idr_lock);
+	my_qp = idr_find(&ehca_qp_idr, cqe->qp_token);
+	wc->qp = &my_qp->ib_qp;
+	read_unlock(&ehca_qp_idr_lock);
+
 	wc->byte_len = cqe->nr_bytes_transferred;
 	wc->byte_len = cqe->nr_bytes_transferred;
 	wc->pkey_index = cqe->pkey_index;
 	wc->pkey_index = cqe->pkey_index;
 	wc->slid = cqe->rlid;
 	wc->slid = cqe->rlid;
@@ -589,7 +616,7 @@ poll_cq_one_read_cqe:
 	wc->imm_data = cpu_to_be32(cqe->immediate_data);
 	wc->imm_data = cpu_to_be32(cqe->immediate_data);
 	wc->sl = cqe->service_level;
 	wc->sl = cqe->service_level;
 
 
-	if (wc->status != IB_WC_SUCCESS)
+	if (unlikely(wc->status != IB_WC_SUCCESS))
 		ehca_dbg(cq->device,
 		ehca_dbg(cq->device,
 			 "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
 			 "ehca_cq=%p cq_num=%x WARNING unsuccessful cqe "
 			 "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
 			 "OPType=%x status=%x qp_num=%x src_qp=%x wr_id=%lx "
@@ -610,7 +637,7 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
 	int nr;
 	int nr;
 	struct ib_wc *current_wc = wc;
 	struct ib_wc *current_wc = wc;
 	int ret = 0;
 	int ret = 0;
-	unsigned long spl_flags;
+	unsigned long flags;
 
 
 	if (num_entries < 1) {
 	if (num_entries < 1) {
 		ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
 		ehca_err(cq->device, "Invalid num_entries=%d ehca_cq=%p "
@@ -619,14 +646,14 @@ int ehca_poll_cq(struct ib_cq *cq, int num_entries, struct ib_wc *wc)
 		goto poll_cq_exit0;
 		goto poll_cq_exit0;
 	}
 	}
 
 
-	spin_lock_irqsave(&my_cq->spinlock, spl_flags);
+	spin_lock_irqsave(&my_cq->spinlock, flags);
 	for (nr = 0; nr < num_entries; nr++) {
 	for (nr = 0; nr < num_entries; nr++) {
 		ret = ehca_poll_cq_one(cq, current_wc);
 		ret = ehca_poll_cq_one(cq, current_wc);
 		if (ret)
 		if (ret)
 			break;
 			break;
 		current_wc++;
 		current_wc++;
 	} /* eof for nr */
 	} /* eof for nr */
-	spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
+	spin_unlock_irqrestore(&my_cq->spinlock, flags);
 	if (ret == -EAGAIN  || !ret)
 	if (ret == -EAGAIN  || !ret)
 		ret = nr;
 		ret = nr;
 
 
@@ -637,7 +664,6 @@ poll_cq_exit0:
 int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
 int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
 {
 {
 	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
 	struct ehca_cq *my_cq = container_of(cq, struct ehca_cq, ib_cq);
-	unsigned long spl_flags;
 	int ret = 0;
 	int ret = 0;
 
 
 	switch (notify_flags & IB_CQ_SOLICITED_MASK) {
 	switch (notify_flags & IB_CQ_SOLICITED_MASK) {
@@ -652,6 +678,7 @@ int ehca_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags notify_flags)
 	}
 	}
 
 
 	if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
 	if (notify_flags & IB_CQ_REPORT_MISSED_EVENTS) {
+		unsigned long spl_flags;
 		spin_lock_irqsave(&my_cq->spinlock, spl_flags);
 		spin_lock_irqsave(&my_cq->spinlock, spl_flags);
 		ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
 		ret = ipz_qeit_is_valid(&my_cq->ipz_queue);
 		spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);
 		spin_unlock_irqrestore(&my_cq->spinlock, spl_flags);

+ 1 - 0
drivers/infiniband/hw/ehca/ehca_tools.h

@@ -59,6 +59,7 @@
 #include <linux/cpu.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
 #include <linux/device.h>
 
 
+#include <asm/atomic.h>
 #include <asm/abs_addr.h>
 #include <asm/abs_addr.h>
 #include <asm/ibmebus.h>
 #include <asm/ibmebus.h>
 #include <asm/io.h>
 #include <asm/io.h>

+ 7 - 6
drivers/infiniband/hw/ehca/ehca_uverbs.c

@@ -253,16 +253,16 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
 	u32 rsrc_type = (fileoffset >> 24) & 0xF; /* sq,rq,cmnd_window */
 	u32 cur_pid = current->tgid;
 	u32 cur_pid = current->tgid;
 	u32 ret;
 	u32 ret;
-	unsigned long flags;
 	struct ehca_cq *cq;
 	struct ehca_cq *cq;
 	struct ehca_qp *qp;
 	struct ehca_qp *qp;
 	struct ehca_pd *pd;
 	struct ehca_pd *pd;
+	struct ib_uobject *uobject;
 
 
 	switch (q_type) {
 	switch (q_type) {
 	case  1: /* CQ */
 	case  1: /* CQ */
-		spin_lock_irqsave(&ehca_cq_idr_lock, flags);
+		read_lock(&ehca_cq_idr_lock);
 		cq = idr_find(&ehca_cq_idr, idr_handle);
 		cq = idr_find(&ehca_cq_idr, idr_handle);
-		spin_unlock_irqrestore(&ehca_cq_idr_lock, flags);
+		read_unlock(&ehca_cq_idr_lock);
 
 
 		/* make sure this mmap really belongs to the authorized user */
 		/* make sure this mmap really belongs to the authorized user */
 		if (!cq)
 		if (!cq)
@@ -288,9 +288,9 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 		break;
 		break;
 
 
 	case 2: /* QP */
 	case 2: /* QP */
-		spin_lock_irqsave(&ehca_qp_idr_lock, flags);
+		read_lock(&ehca_qp_idr_lock);
 		qp = idr_find(&ehca_qp_idr, idr_handle);
 		qp = idr_find(&ehca_qp_idr, idr_handle);
-		spin_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+		read_unlock(&ehca_qp_idr_lock);
 
 
 		/* make sure this mmap really belongs to the authorized user */
 		/* make sure this mmap really belongs to the authorized user */
 		if (!qp)
 		if (!qp)
@@ -304,7 +304,8 @@ int ehca_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 			return -ENOMEM;
 			return -ENOMEM;
 		}
 		}
 
 
-		if (!qp->ib_qp.uobject || qp->ib_qp.uobject->context != context)
+		uobject = IS_SRQ(qp) ? qp->ib_srq.uobject : qp->ib_qp.uobject;
+		if (!uobject || uobject->context != context)
 			return -EINVAL;
 			return -EINVAL;
 
 
 		ret = ehca_mmap_qp(vma, qp, rsrc_type);
 		ret = ehca_mmap_qp(vma, qp, rsrc_type);

+ 31 - 27
drivers/infiniband/hw/ehca/hcp_if.c

@@ -5,6 +5,7 @@
  *
  *
  *  Authors: Christoph Raisch <raisch@de.ibm.com>
  *  Authors: Christoph Raisch <raisch@de.ibm.com>
  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
  *           Hoang-Nam Nguyen <hnguyen@de.ibm.com>
+ *           Joachim Fenkes <fenkes@de.ibm.com>
  *           Gerd Bayer <gerd.bayer@de.ibm.com>
  *           Gerd Bayer <gerd.bayer@de.ibm.com>
  *           Waleri Fomin <fomin@de.ibm.com>
  *           Waleri Fomin <fomin@de.ibm.com>
  *
  *
@@ -62,6 +63,12 @@
 #define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
 #define H_ALL_RES_QP_MAX_SEND_SGE       EHCA_BMASK_IBM(32, 39)
 #define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
 #define H_ALL_RES_QP_MAX_RECV_SGE       EHCA_BMASK_IBM(40, 47)
 
 
+#define H_ALL_RES_QP_UD_AV_LKEY         EHCA_BMASK_IBM(32, 63)
+#define H_ALL_RES_QP_SRQ_QP_TOKEN       EHCA_BMASK_IBM(0, 31)
+#define H_ALL_RES_QP_SRQ_QP_HANDLE      EHCA_BMASK_IBM(0, 64)
+#define H_ALL_RES_QP_SRQ_LIMIT          EHCA_BMASK_IBM(48, 63)
+#define H_ALL_RES_QP_SRQ_QPN            EHCA_BMASK_IBM(40, 63)
+
 #define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
 #define H_ALL_RES_QP_ACT_OUTST_SEND_WR  EHCA_BMASK_IBM(16, 31)
 #define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
 #define H_ALL_RES_QP_ACT_OUTST_RECV_WR  EHCA_BMASK_IBM(48, 63)
 #define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
 #define H_ALL_RES_QP_ACT_SEND_SGE       EHCA_BMASK_IBM(8, 15)
@@ -74,10 +81,7 @@
 #define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
 #define H_MP_SHUTDOWN                   EHCA_BMASK_IBM(48, 48)
 #define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
 #define H_MP_RESET_QKEY_CTR             EHCA_BMASK_IBM(49, 49)
 
 
-/* direct access qp controls */
-#define DAQP_CTRL_ENABLE    0x01
-#define DAQP_CTRL_SEND_COMP 0x20
-#define DAQP_CTRL_RECV_COMP 0x40
+static DEFINE_SPINLOCK(hcall_lock);
 
 
 static u32 get_longbusy_msecs(int longbusy_rc)
 static u32 get_longbusy_msecs(int longbusy_rc)
 {
 {
@@ -155,7 +159,7 @@ static long ehca_plpar_hcall9(unsigned long opcode,
 {
 {
 	long ret;
 	long ret;
 	int i, sleep_msecs, lock_is_set = 0;
 	int i, sleep_msecs, lock_is_set = 0;
-	unsigned long flags;
+	unsigned long flags = 0;
 
 
 	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
 	ehca_gen_dbg("opcode=%lx arg1=%lx arg2=%lx arg3=%lx arg4=%lx "
 		     "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
 		     "arg5=%lx arg6=%lx arg7=%lx arg8=%lx arg9=%lx",
@@ -284,53 +288,53 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
 }
 }
 
 
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_qp *qp,
 			     struct ehca_alloc_qp_parms *parms)
 			     struct ehca_alloc_qp_parms *parms)
 {
 {
 	u64 ret;
 	u64 ret;
-	u64 allocate_controls;
-	u64 max_r10_reg;
+	u64 allocate_controls, max_r10_reg, r11, r12;
 	u64 outs[PLPAR_HCALL9_BUFSIZE];
 	u64 outs[PLPAR_HCALL9_BUFSIZE];
-	u16 max_nr_receive_wqes = qp->init_attr.cap.max_recv_wr + 1;
-	u16 max_nr_send_wqes = qp->init_attr.cap.max_send_wr + 1;
-	int daqp_ctrl = parms->daqp_ctrl;
 
 
 	allocate_controls =
 	allocate_controls =
-		EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS,
-			       (daqp_ctrl & DAQP_CTRL_ENABLE) ? 1 : 0)
+		EHCA_BMASK_SET(H_ALL_RES_QP_ENHANCED_OPS, parms->ext_type)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_PTE_PIN, 0)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_SERVICE_TYPE, parms->servicetype)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_SIGNALING_TYPE, parms->sigtype)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
 		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_RQ_CQE_POSTING,
-				 (daqp_ctrl & DAQP_CTRL_RECV_COMP) ? 1 : 0)
+				 !!(parms->ll_comp_flags & LLQP_RECV_COMP))
 		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
 		| EHCA_BMASK_SET(H_ALL_RES_QP_LL_SQ_CQE_POSTING,
-				 (daqp_ctrl & DAQP_CTRL_SEND_COMP) ? 1 : 0)
+				 !!(parms->ll_comp_flags & LLQP_SEND_COMP))
 		| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
 		| EHCA_BMASK_SET(H_ALL_RES_QP_UD_AV_LKEY_CTRL,
 				 parms->ud_av_l_key_ctl)
 				 parms->ud_av_l_key_ctl)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
 		| EHCA_BMASK_SET(H_ALL_RES_QP_RESOURCE_TYPE, 1);
 
 
 	max_r10_reg =
 	max_r10_reg =
 		EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
 		EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_SEND_WR,
-			       max_nr_send_wqes)
+			       parms->max_send_wr + 1)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
 		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_OUTST_RECV_WR,
-				 max_nr_receive_wqes)
+				 parms->max_recv_wr + 1)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
 		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_SEND_SGE,
 				 parms->max_send_sge)
 				 parms->max_send_sge)
 		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
 		| EHCA_BMASK_SET(H_ALL_RES_QP_MAX_RECV_SGE,
 				 parms->max_recv_sge);
 				 parms->max_recv_sge);
 
 
+	r11 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QP_TOKEN, parms->srq_token);
+
+	if (parms->ext_type == EQPT_SRQ)
+		r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_LIMIT, parms->srq_limit);
+	else
+		r12 = EHCA_BMASK_SET(H_ALL_RES_QP_SRQ_QPN, parms->srq_qpn);
+
 	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
 	ret = ehca_plpar_hcall9(H_ALLOC_RESOURCE, outs,
 				adapter_handle.handle,	           /* r4  */
 				adapter_handle.handle,	           /* r4  */
 				allocate_controls,	           /* r5  */
 				allocate_controls,	           /* r5  */
-				qp->send_cq->ipz_cq_handle.handle,
-				qp->recv_cq->ipz_cq_handle.handle,
-				parms->ipz_eq_handle.handle,
-				((u64)qp->token << 32) | parms->pd.value,
-				max_r10_reg,	                   /* r10 */
-				parms->ud_av_l_key_ctl,            /* r11 */
-				0);
-	qp->ipz_qp_handle.handle = outs[0];
-	qp->real_qp_num = (u32)outs[1];
+				parms->send_cq_handle.handle,
+				parms->recv_cq_handle.handle,
+				parms->eq_handle.handle,
+				((u64)parms->token << 32) | parms->pd.value,
+				max_r10_reg, r11, r12);
+
+	parms->qp_handle.handle = outs[0];
+	parms->real_qp_num = (u32)outs[1];
 	parms->act_nr_send_wqes =
 	parms->act_nr_send_wqes =
 		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
 		(u16)EHCA_BMASK_GET(H_ALL_RES_QP_ACT_OUTST_SEND_WR, outs[2]);
 	parms->act_nr_recv_wqes =
 	parms->act_nr_recv_wqes =
@@ -345,7 +349,7 @@ u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
 		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
 		(u32)EHCA_BMASK_GET(H_ALL_RES_QP_RQUEUE_SIZE_PAGES, outs[4]);
 
 
 	if (ret == H_SUCCESS)
 	if (ret == H_SUCCESS)
-		hcp_galpas_ctor(&qp->galpas, outs[6], outs[6]);
+		hcp_galpas_ctor(&parms->galpas, outs[6], outs[6]);
 
 
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 	if (ret == H_NOT_ENOUGH_RESOURCES)
 		ehca_gen_err("Not enough resources. ret=%lx", ret);
 		ehca_gen_err("Not enough resources. ret=%lx", ret);

+ 0 - 1
drivers/infiniband/hw/ehca/hcp_if.h

@@ -78,7 +78,6 @@ u64 hipz_h_alloc_resource_cq(const struct ipz_adapter_handle adapter_handle,
  * initialize resources, create empty QPPTs (2 rings).
  * initialize resources, create empty QPPTs (2 rings).
  */
  */
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
 u64 hipz_h_alloc_resource_qp(const struct ipz_adapter_handle adapter_handle,
-			     struct ehca_qp *qp,
 			     struct ehca_alloc_qp_parms *parms);
 			     struct ehca_alloc_qp_parms *parms);
 
 
 u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,
 u64 hipz_h_query_port(const struct ipz_adapter_handle adapter_handle,

+ 19 - 0
drivers/infiniband/hw/ehca/hipz_hw.h

@@ -163,6 +163,7 @@ struct hipz_qptemm {
 
 
 #define QPX_SQADDER EHCA_BMASK_IBM(48,63)
 #define QPX_SQADDER EHCA_BMASK_IBM(48,63)
 #define QPX_RQADDER EHCA_BMASK_IBM(48,63)
 #define QPX_RQADDER EHCA_BMASK_IBM(48,63)
+#define QPX_AAELOG_RESET_SRQ_LIMIT EHCA_BMASK_IBM(3,3)
 
 
 #define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
 #define QPTEMM_OFFSET(x) offsetof(struct hipz_qptemm,x)
 
 
@@ -360,6 +361,24 @@ struct hipz_query_hca {
 	u32 max_neq;
 	u32 max_neq;
 } __attribute__ ((packed));
 } __attribute__ ((packed));
 
 
+#define HCA_CAP_AH_PORT_NR_CHECK      EHCA_BMASK_IBM( 0,  0)
+#define HCA_CAP_ATOMIC                EHCA_BMASK_IBM( 1,  1)
+#define HCA_CAP_AUTO_PATH_MIG         EHCA_BMASK_IBM( 2,  2)
+#define HCA_CAP_BAD_P_KEY_CTR         EHCA_BMASK_IBM( 3,  3)
+#define HCA_CAP_SQD_RTS_PORT_CHANGE   EHCA_BMASK_IBM( 4,  4)
+#define HCA_CAP_CUR_QP_STATE_MOD      EHCA_BMASK_IBM( 5,  5)
+#define HCA_CAP_INIT_TYPE             EHCA_BMASK_IBM( 6,  6)
+#define HCA_CAP_PORT_ACTIVE_EVENT     EHCA_BMASK_IBM( 7,  7)
+#define HCA_CAP_Q_KEY_VIOL_CTR        EHCA_BMASK_IBM( 8,  8)
+#define HCA_CAP_WQE_RESIZE            EHCA_BMASK_IBM( 9,  9)
+#define HCA_CAP_RAW_PACKET_MCAST      EHCA_BMASK_IBM(10, 10)
+#define HCA_CAP_SHUTDOWN_PORT         EHCA_BMASK_IBM(11, 11)
+#define HCA_CAP_RC_LL_QP              EHCA_BMASK_IBM(12, 12)
+#define HCA_CAP_SRQ                   EHCA_BMASK_IBM(13, 13)
+#define HCA_CAP_UD_LL_QP              EHCA_BMASK_IBM(16, 16)
+#define HCA_CAP_RESIZE_MR             EHCA_BMASK_IBM(17, 17)
+#define HCA_CAP_MINI_QP               EHCA_BMASK_IBM(18, 18)
+
 /* query port response block */
 /* query port response block */
 struct hipz_query_port {
 struct hipz_query_port {
 	u32 state;
 	u32 state;

+ 10 - 18
drivers/infiniband/hw/ehca/ipz_pt_fn.h

@@ -105,7 +105,6 @@ void *ipz_qpageit_get_inc(struct ipz_queue *queue);
  * step in struct ipz_queue, will wrap in ringbuffer
  * step in struct ipz_queue, will wrap in ringbuffer
  * returns address (kv) of Queue Entry BEFORE increment
  * returns address (kv) of Queue Entry BEFORE increment
  * warning don't use in parallel with ipz_qpageit_get_inc()
  * warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
  */
  */
 static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
 static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
 {
 {
@@ -120,32 +119,25 @@ static inline void *ipz_qeit_get_inc(struct ipz_queue *queue)
 	return ret;
 	return ret;
 }
 }
 
 
+/*
+ * return a bool indicating whether current Queue Entry is valid
+ */
+static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
+{
+	struct ehca_cqe *cqe = ipz_qeit_get(queue);
+	return ((cqe->cqe_flags >> 7) == (queue->toggle_state & 1));
+}
+
 /*
 /*
  * return current Queue Entry, increment Queue Entry iterator by one
  * return current Queue Entry, increment Queue Entry iterator by one
  * step in struct ipz_queue, will wrap in ringbuffer
  * step in struct ipz_queue, will wrap in ringbuffer
  * returns address (kv) of Queue Entry BEFORE increment
  * returns address (kv) of Queue Entry BEFORE increment
  * returns 0 and does not increment, if wrong valid state
  * returns 0 and does not increment, if wrong valid state
  * warning don't use in parallel with ipz_qpageit_get_inc()
  * warning don't use in parallel with ipz_qpageit_get_inc()
- * warning unpredictable results may occur if steps>act_nr_of_queue_entries
  */
  */
 static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
 static inline void *ipz_qeit_get_inc_valid(struct ipz_queue *queue)
 {
 {
-	struct ehca_cqe *cqe = ipz_qeit_get(queue);
-	u32 cqe_flags = cqe->cqe_flags;
-
-	if ((cqe_flags >> 7) != (queue->toggle_state & 1))
-		return NULL;
-
-	ipz_qeit_get_inc(queue);
-	return cqe;
-}
-
-static inline int ipz_qeit_is_valid(struct ipz_queue *queue)
-{
-	struct ehca_cqe *cqe = ipz_qeit_get(queue);
-	u32 cqe_flags = cqe->cqe_flags;
-
-	return cqe_flags >> 7 == (queue->toggle_state & 1);
+	return ipz_qeit_is_valid(queue) ? ipz_qeit_get_inc(queue) : NULL;
 }
 }
 
 
 /*
 /*

+ 1 - 1
drivers/infiniband/hw/ipath/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_IPATH
 config INFINIBAND_IPATH
 	tristate "QLogic InfiniPath Driver"
 	tristate "QLogic InfiniPath Driver"
-	depends on (PCI_MSI || HT_IRQ) && 64BIT && INFINIBAND && NET
+	depends on (PCI_MSI || HT_IRQ) && 64BIT && NET
 	---help---
 	---help---
 	This is a driver for QLogic InfiniPath host channel adapters,
 	This is a driver for QLogic InfiniPath host channel adapters,
 	including InfiniBand verbs support.  This driver allows these
 	including InfiniBand verbs support.  This driver allows these

+ 29 - 4
drivers/infiniband/hw/ipath/ipath_common.h

@@ -189,8 +189,7 @@ typedef enum _ipath_ureg {
 #define IPATH_RUNTIME_FORCE_WC_ORDER	0x4
 #define IPATH_RUNTIME_FORCE_WC_ORDER	0x4
 #define IPATH_RUNTIME_RCVHDR_COPY	0x8
 #define IPATH_RUNTIME_RCVHDR_COPY	0x8
 #define IPATH_RUNTIME_MASTER	0x10
 #define IPATH_RUNTIME_MASTER	0x10
-#define IPATH_RUNTIME_PBC_REWRITE 0x20
-#define IPATH_RUNTIME_LOOSE_DMA_ALIGN 0x40
+/* 0x20 and 0x40 are no longer used, but are reserved for ABI compatibility */
 
 
 /*
 /*
  * This structure is returned by ipath_userinit() immediately after
  * This structure is returned by ipath_userinit() immediately after
@@ -432,8 +431,15 @@ struct ipath_user_info {
 #define IPATH_CMD_UNUSED_1	25
 #define IPATH_CMD_UNUSED_1	25
 #define IPATH_CMD_UNUSED_2	26
 #define IPATH_CMD_UNUSED_2	26
 #define IPATH_CMD_PIOAVAILUPD	27	/* force an update of PIOAvail reg */
 #define IPATH_CMD_PIOAVAILUPD	27	/* force an update of PIOAvail reg */
+#define IPATH_CMD_POLL_TYPE	28	/* set the kind of polling we want */
 
 
-#define IPATH_CMD_MAX		27
+#define IPATH_CMD_MAX		28
+
+/*
+ * Poll types
+ */
+#define IPATH_POLL_TYPE_URGENT	 0x01
+#define IPATH_POLL_TYPE_OVERFLOW 0x02
 
 
 struct ipath_port_info {
 struct ipath_port_info {
 	__u32 num_active;	/* number of active units */
 	__u32 num_active;	/* number of active units */
@@ -474,6 +480,8 @@ struct ipath_cmd {
 		__u16 part_key;
 		__u16 part_key;
 		/* user address of __u32 bitmask of active slaves */
 		/* user address of __u32 bitmask of active slaves */
 		__u64 slave_mask_addr;
 		__u64 slave_mask_addr;
+		/* type of polling we want */
+		__u16 poll_type;
 	} cmd;
 	} cmd;
 };
 };
 
 
@@ -502,13 +510,30 @@ struct __ipath_sendpkt {
 	struct ipath_iovec sps_iov[4];
 	struct ipath_iovec sps_iov[4];
 };
 };
 
 
-/* Passed into diag data special file's ->write method. */
+/*
+ * diagnostics can send a packet by "writing" one of the following
+ * two structs to diag data special file
+ * The first is the legacy version for backward compatibility
+ */
 struct ipath_diag_pkt {
 struct ipath_diag_pkt {
 	__u32 unit;
 	__u32 unit;
 	__u64 data;
 	__u64 data;
 	__u32 len;
 	__u32 len;
 };
 };
 
 
+/* The second diag_pkt struct is the expanded version that allows
+ * more control over the packet, specifically, by allowing a custom
+ * pbc (+ extra) qword, so that special modes and deliberate
+ * changes to CRCs can be used. The elements were also re-ordered
+ * for better alignment and to avoid padding issues.
+ */
+struct ipath_diag_xpkt {
+	__u64 data;
+	__u64 pbc_wd;
+	__u32 unit;
+	__u32 len;
+};
+
 /*
 /*
  * Data layout in I2C flash (for GUID, etc.)
  * Data layout in I2C flash (for GUID, etc.)
  * All fields are little-endian binary unless otherwise stated
  * All fields are little-endian binary unless otherwise stated

+ 5 - 2
drivers/infiniband/hw/ipath/ipath_cq.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -90,6 +90,8 @@ void ipath_cq_enter(struct ipath_cq *cq, struct ib_wc *entry, int solicited)
 	wc->queue[head].sl = entry->sl;
 	wc->queue[head].sl = entry->sl;
 	wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
 	wc->queue[head].dlid_path_bits = entry->dlid_path_bits;
 	wc->queue[head].port_num = entry->port_num;
 	wc->queue[head].port_num = entry->port_num;
+	/* Make sure queue entry is written before the head index. */
+	smp_wmb();
 	wc->head = next;
 	wc->head = next;
 
 
 	if (cq->notify == IB_CQ_NEXT_COMP ||
 	if (cq->notify == IB_CQ_NEXT_COMP ||
@@ -139,7 +141,8 @@ int ipath_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry)
 
 
 		if (tail == wc->head)
 		if (tail == wc->head)
 			break;
 			break;
-
+		/* Make sure entry is read after head index is read. */
+		smp_rmb();
 		qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
 		qp = ipath_lookup_qpn(&to_idev(cq->ibcq.device)->qp_table,
 				      wc->queue[tail].qp_num);
 				      wc->queue[tail].qp_num);
 		entry->qp = &qp->ibqp;
 		entry->qp = &qp->ibqp;

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_debug.h

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two

+ 35 - 6
drivers/infiniband/hw/ipath/ipath_diag.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -323,13 +323,14 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
 {
 {
 	u32 __iomem *piobuf;
 	u32 __iomem *piobuf;
 	u32 plen, clen, pbufn;
 	u32 plen, clen, pbufn;
-	struct ipath_diag_pkt dp;
+	struct ipath_diag_pkt odp;
+	struct ipath_diag_xpkt dp;
 	u32 *tmpbuf = NULL;
 	u32 *tmpbuf = NULL;
 	struct ipath_devdata *dd;
 	struct ipath_devdata *dd;
 	ssize_t ret = 0;
 	ssize_t ret = 0;
 	u64 val;
 	u64 val;
 
 
-	if (count < sizeof(dp)) {
+	if (count != sizeof(dp)) {
 		ret = -EINVAL;
 		ret = -EINVAL;
 		goto bail;
 		goto bail;
 	}
 	}
@@ -339,6 +340,29 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
 		goto bail;
 		goto bail;
 	}
 	}
 
 
+	/*
+	 * Due to padding/alignment issues (lessened with new struct)
+	 * the old and new structs are the same length. We need to
+	 * disambiguate them, which we can do because odp.len has never
+	 * been less than the total of LRH+BTH+DETH so far, while
+	 * dp.unit (same offset) unit is unlikely to get that high.
+	 * Similarly, dp.data, the pointer to user at the same offset
+	 * as odp.unit, is almost certainly at least one (512byte)page
+	 * "above" NULL. The if-block below can be omitted if compatibility
+	 * between a new driver and older diagnostic code is unimportant.
+	 * compatibility the other direction (new diags, old driver) is
+	 * handled in the diagnostic code, with a warning.
+	 */
+	if (dp.unit >= 20 && dp.data < 512) {
+		/* very probable version mismatch. Fix it up */
+		memcpy(&odp, &dp, sizeof(odp));
+		/* We got a legacy dp, copy elements to dp */
+		dp.unit = odp.unit;
+		dp.data = odp.data;
+		dp.len = odp.len;
+		dp.pbc_wd = 0; /* Indicate we need to compute PBC wd */
+	}
+
 	/* send count must be an exact number of dwords */
 	/* send count must be an exact number of dwords */
 	if (dp.len & 3) {
 	if (dp.len & 3) {
 		ret = -EINVAL;
 		ret = -EINVAL;
@@ -371,9 +395,10 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
 		ret = -ENODEV;
 		ret = -ENODEV;
 		goto bail;
 		goto bail;
 	}
 	}
+	/* Check link state, but not if we have custom PBC */
 	val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
 	val = dd->ipath_lastibcstat & IPATH_IBSTATE_MASK;
-	if (val != IPATH_IBSTATE_INIT && val != IPATH_IBSTATE_ARM &&
-	    val != IPATH_IBSTATE_ACTIVE) {
+	if (!dp.pbc_wd && val != IPATH_IBSTATE_INIT &&
+		val != IPATH_IBSTATE_ARM && val != IPATH_IBSTATE_ACTIVE) {
 		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
 		ipath_cdbg(VERBOSE, "unit %u not ready (state %llx)\n",
 			   dd->ipath_unit, (unsigned long long) val);
 			   dd->ipath_unit, (unsigned long long) val);
 		ret = -EINVAL;
 		ret = -EINVAL;
@@ -419,9 +444,13 @@ static ssize_t ipath_diagpkt_write(struct file *fp,
 		ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
 		ipath_cdbg(VERBOSE, "unit %u 0x%x+1w pio%d\n",
 			   dd->ipath_unit, plen - 1, pbufn);
 			   dd->ipath_unit, plen - 1, pbufn);
 
 
+	if (dp.pbc_wd == 0)
+		/* Legacy operation, use computed pbc_wd */
+		dp.pbc_wd = plen;
+
 	/* we have to flush after the PBC for correctness on some cpus
 	/* we have to flush after the PBC for correctness on some cpus
 	 * or WC buffer can be written out of order */
 	 * or WC buffer can be written out of order */
-	writeq(plen, piobuf);
+	writeq(dp.pbc_wd, piobuf);
 	ipath_flush_wc();
 	ipath_flush_wc();
 	/* copy all by the trigger word, then flush, so it's written
 	/* copy all by the trigger word, then flush, so it's written
 	 * to chip before trigger word, then write trigger word, then
 	 * to chip before trigger word, then write trigger word, then

+ 155 - 32
drivers/infiniband/hw/ipath/ipath_driver.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -104,6 +104,9 @@ static int __devinit ipath_init_one(struct pci_dev *,
 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd
 #define PCI_DEVICE_ID_INFINIPATH_HT 0xd
 #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
 #define PCI_DEVICE_ID_INFINIPATH_PE800 0x10
 
 
+/* Number of seconds before our card status check...  */
+#define STATUS_TIMEOUT 60
+
 static const struct pci_device_id ipath_pci_tbl[] = {
 static const struct pci_device_id ipath_pci_tbl[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_HT) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_PATHSCALE, PCI_DEVICE_ID_INFINIPATH_PE800) },
@@ -119,6 +122,18 @@ static struct pci_driver ipath_driver = {
 	.id_table = ipath_pci_tbl,
 	.id_table = ipath_pci_tbl,
 };
 };
 
 
+static void ipath_check_status(struct work_struct *work)
+{
+	struct ipath_devdata *dd = container_of(work, struct ipath_devdata,
+						status_work.work);
+
+	/*
+	 * If we don't have any interrupts, let the user know and
+	 * don't bother checking again.
+	 */
+	if (dd->ipath_int_counter == 0)
+		dev_err(&dd->pcidev->dev, "No interrupts detected.\n");
+}
 
 
 static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
 static inline void read_bars(struct ipath_devdata *dd, struct pci_dev *dev,
 			     u32 *bar0, u32 *bar1)
 			     u32 *bar0, u32 *bar1)
@@ -187,6 +202,8 @@ static struct ipath_devdata *ipath_alloc_devdata(struct pci_dev *pdev)
 	dd->pcidev = pdev;
 	dd->pcidev = pdev;
 	pci_set_drvdata(pdev, dd);
 	pci_set_drvdata(pdev, dd);
 
 
+	INIT_DELAYED_WORK(&dd->status_work, ipath_check_status);
+
 	list_add(&dd->ipath_list, &ipath_dev_list);
 	list_add(&dd->ipath_list, &ipath_dev_list);
 
 
 bail_unlock:
 bail_unlock:
@@ -504,6 +521,9 @@ static int __devinit ipath_init_one(struct pci_dev *pdev,
 	ipath_diag_add(dd);
 	ipath_diag_add(dd);
 	ipath_register_ib_device(dd);
 	ipath_register_ib_device(dd);
 
 
+	/* Check that card status in STATUS_TIMEOUT seconds. */
+	schedule_delayed_work(&dd->status_work, HZ * STATUS_TIMEOUT);
+
 	goto bail;
 	goto bail;
 
 
 bail_irqsetup:
 bail_irqsetup:
@@ -631,6 +651,9 @@ static void __devexit ipath_remove_one(struct pci_dev *pdev)
 	 */
 	 */
 	ipath_shutdown_device(dd);
 	ipath_shutdown_device(dd);
 
 
+	cancel_delayed_work(&dd->status_work);
+	flush_scheduled_work();
+
 	if (dd->verbs_dev)
 	if (dd->verbs_dev)
 		ipath_unregister_ib_device(dd->verbs_dev);
 		ipath_unregister_ib_device(dd->verbs_dev);
 
 
@@ -699,9 +722,9 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
 	u64 sendctrl, sendorig;
 	u64 sendctrl, sendorig;
 
 
 	ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
 	ipath_cdbg(PKT, "disarm %u PIObufs first=%u\n", cnt, first);
-	sendorig = dd->ipath_sendctrl | INFINIPATH_S_DISARM;
+	sendorig = dd->ipath_sendctrl;
 	for (i = first; i < last; i++) {
 	for (i = first; i < last; i++) {
-		sendctrl = sendorig |
+		sendctrl = sendorig  | INFINIPATH_S_DISARM |
 			(i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
 			(i << INFINIPATH_S_DISARMPIOBUF_SHIFT);
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 				 sendctrl);
 				 sendctrl);
@@ -712,12 +735,12 @@ void ipath_disarm_piobufs(struct ipath_devdata *dd, unsigned first,
 	 * while we were looping; no critical bits that would require
 	 * while we were looping; no critical bits that would require
 	 * locking.
 	 * locking.
 	 *
 	 *
-	 * Write a 0, and then the original value, reading scratch in
+	 * disable PIOAVAILUPD, then re-enable, reading scratch in
 	 * between.  This seems to avoid a chip timing race that causes
 	 * between.  This seems to avoid a chip timing race that causes
 	 * pioavail updates to memory to stop.
 	 * pioavail updates to memory to stop.
 	 */
 	 */
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 0);
+			 sendorig & ~IPATH_S_PIOBUFAVAILUPD);
 	sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 	sendorig = ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
 			 dd->ipath_sendctrl);
 			 dd->ipath_sendctrl);
@@ -1014,14 +1037,10 @@ void ipath_kreceive(struct ipath_devdata *dd)
 		goto bail;
 		goto bail;
 	}
 	}
 
 
-	/* There is already a thread processing this queue. */
-	if (test_and_set_bit(0, &dd->ipath_rcv_pending))
-		goto bail;
-
 	l = dd->ipath_port0head;
 	l = dd->ipath_port0head;
 	hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr);
 	hdrqtail = (u32) le64_to_cpu(*dd->ipath_hdrqtailptr);
 	if (l == hdrqtail)
 	if (l == hdrqtail)
-		goto done;
+		goto bail;
 
 
 reloop:
 reloop:
 	for (i = 0; l != hdrqtail; i++) {
 	for (i = 0; l != hdrqtail; i++) {
@@ -1156,10 +1175,6 @@ reloop:
 	ipath_stats.sps_avgpkts_call =
 	ipath_stats.sps_avgpkts_call =
 		ipath_stats.sps_port0pkts / ++totcalls;
 		ipath_stats.sps_port0pkts / ++totcalls;
 
 
-done:
-	clear_bit(0, &dd->ipath_rcv_pending);
-	smp_mb__after_clear_bit();
-
 bail:;
 bail:;
 }
 }
 
 
@@ -1589,6 +1604,35 @@ int ipath_waitfor_mdio_cmdready(struct ipath_devdata *dd)
 	return ret;
 	return ret;
 }
 }
 
 
+
+/*
+ * Flush all sends that might be in the ready to send state, as well as any
+ * that are in the process of being sent.   Used whenever we need to be
+ * sure the send side is idle.  Cleans up all buffer state by canceling
+ * all pio buffers, and issuing an abort, which cleans up anything in the
+ * launch fifo.  The cancel is superfluous on some chip versions, but
+ * it's safer to always do it.
+ * PIOAvail bits are updated by the chip as if normal send had happened.
+ */
+void ipath_cancel_sends(struct ipath_devdata *dd)
+{
+	ipath_dbg("Cancelling all in-progress send buffers\n");
+	dd->ipath_lastcancel = jiffies+HZ/2; /* skip armlaunch errs a bit */
+	/*
+	 * the abort bit is auto-clearing.  We read scratch to be sure
+	 * that cancels and the abort have taken effect in the chip.
+	 */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+		INFINIPATH_S_ABORT);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	ipath_disarm_piobufs(dd, 0,
+		(unsigned)(dd->ipath_piobcnt2k + dd->ipath_piobcnt4k));
+
+	/* and again, be sure all have hit the chip */
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+}
+
+
 static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 {
 {
 	static const char *what[4] = {
 	static const char *what[4] = {
@@ -1610,14 +1654,8 @@ static void ipath_set_ib_lstate(struct ipath_devdata *dd, int which)
 			   INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
 			   INFINIPATH_IBCS_LINKTRAININGSTATE_MASK]);
 	/* flush all queued sends when going to DOWN or INIT, to be sure that
 	/* flush all queued sends when going to DOWN or INIT, to be sure that
 	 * they don't block MAD packets */
 	 * they don't block MAD packets */
-	if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT) {
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 INFINIPATH_S_ABORT);
-		ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
-		                    (unsigned)(dd->ipath_piobcnt2k +
-				    dd->ipath_piobcnt4k) -
-				    dd->ipath_lastport_piobuf);
-	}
+	if (!linkcmd || linkcmd == INFINIPATH_IBCC_LINKCMD_INIT)
+		ipath_cancel_sends(dd);
 
 
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_ibcctrl,
 			 dd->ipath_ibcctrl | which);
 			 dd->ipath_ibcctrl | which);
@@ -1839,6 +1877,87 @@ void ipath_write_kreg_port(const struct ipath_devdata *dd, ipath_kreg regno,
 	ipath_write_kreg(dd, where, value);
 	ipath_write_kreg(dd, where, value);
 }
 }
 
 
+/*
+ * Following deal with the "obviously simple" task of overriding the state
+ * of the LEDS, which normally indicate link physical and logical status.
+ * The complications arise in dealing with different hardware mappings
+ * and the board-dependent routine being called from interrupts.
+ * and then there's the requirement to _flash_ them.
+ */
+#define LED_OVER_FREQ_SHIFT 8
+#define LED_OVER_FREQ_MASK (0xFF<<LED_OVER_FREQ_SHIFT)
+/* Below is "non-zero" to force override, but both actual LEDs are off */
+#define LED_OVER_BOTH_OFF (8)
+
+void ipath_run_led_override(unsigned long opaque)
+{
+	struct ipath_devdata *dd = (struct ipath_devdata *)opaque;
+	int timeoff;
+	int pidx;
+	u64 lstate, ltstate, val;
+
+	if (!(dd->ipath_flags & IPATH_INITTED))
+		return;
+
+	pidx = dd->ipath_led_override_phase++ & 1;
+	dd->ipath_led_override = dd->ipath_led_override_vals[pidx];
+	timeoff = dd->ipath_led_override_timeoff;
+
+	/*
+	 * below potentially restores the LED values per current status,
+	 * should also possibly setup the traffic-blink register,
+	 * but leave that to per-chip functions.
+	 */
+	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_ibcstatus);
+	ltstate = (val >> INFINIPATH_IBCS_LINKTRAININGSTATE_SHIFT) &
+		  INFINIPATH_IBCS_LINKTRAININGSTATE_MASK;
+	lstate = (val >> INFINIPATH_IBCS_LINKSTATE_SHIFT) &
+		 INFINIPATH_IBCS_LINKSTATE_MASK;
+
+	dd->ipath_f_setextled(dd, lstate, ltstate);
+	mod_timer(&dd->ipath_led_override_timer, jiffies + timeoff);
+}
+
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val)
+{
+	int timeoff, freq;
+
+	if (!(dd->ipath_flags & IPATH_INITTED))
+		return;
+
+	/* First check if we are blinking. If not, use 1HZ polling */
+	timeoff = HZ;
+	freq = (val & LED_OVER_FREQ_MASK) >> LED_OVER_FREQ_SHIFT;
+
+	if (freq) {
+		/* For blink, set each phase from one nybble of val */
+		dd->ipath_led_override_vals[0] = val & 0xF;
+		dd->ipath_led_override_vals[1] = (val >> 4) & 0xF;
+		timeoff = (HZ << 4)/freq;
+	} else {
+		/* Non-blink set both phases the same. */
+		dd->ipath_led_override_vals[0] = val & 0xF;
+		dd->ipath_led_override_vals[1] = val & 0xF;
+	}
+	dd->ipath_led_override_timeoff = timeoff;
+
+	/*
+	 * If the timer has not already been started, do so. Use a "quick"
+	 * timeout so the function will be called soon, to look at our request.
+	 */
+	if (atomic_inc_return(&dd->ipath_led_override_timer_active) == 1) {
+		/* Need to start timer */
+		init_timer(&dd->ipath_led_override_timer);
+		dd->ipath_led_override_timer.function =
+						 ipath_run_led_override;
+		dd->ipath_led_override_timer.data = (unsigned long) dd;
+		dd->ipath_led_override_timer.expires = jiffies + 1;
+		add_timer(&dd->ipath_led_override_timer);
+	} else {
+		atomic_dec(&dd->ipath_led_override_timer_active);
+	}
+}
+
 /**
 /**
  * ipath_shutdown_device - shut down a device
  * ipath_shutdown_device - shut down a device
  * @dd: the infinipath device
  * @dd: the infinipath device
@@ -1879,17 +1998,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 	 */
 	 */
 	udelay(5);
 	udelay(5);
 
 
-	/*
-	 * abort any armed or launched PIO buffers that didn't go. (self
-	 * clearing).  Will cause any packet currently being transmitted to
-	 * go out with an EBP, and may also cause a short packet error on
-	 * the receiver.
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 INFINIPATH_S_ABORT);
-
 	ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
 	ipath_set_ib_lstate(dd, INFINIPATH_IBCC_LINKINITCMD_DISABLE <<
 			    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
 			    INFINIPATH_IBCC_LINKINITCMD_SHIFT);
+	ipath_cancel_sends(dd);
 
 
 	/* disable IBC */
 	/* disable IBC */
 	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
 	dd->ipath_control &= ~INFINIPATH_C_LINKENABLE;
@@ -1902,7 +2013,6 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 	 * Turn the LEDs off explictly for the same reason.
 	 * Turn the LEDs off explictly for the same reason.
 	 */
 	 */
 	dd->ipath_f_quiet_serdes(dd);
 	dd->ipath_f_quiet_serdes(dd);
-	dd->ipath_f_setextled(dd, 0, 0);
 
 
 	if (dd->ipath_stats_timer_active) {
 	if (dd->ipath_stats_timer_active) {
 		del_timer_sync(&dd->ipath_stats_timer);
 		del_timer_sync(&dd->ipath_stats_timer);
@@ -1918,6 +2028,9 @@ void ipath_shutdown_device(struct ipath_devdata *dd)
 			 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
 			 ~0ULL & ~INFINIPATH_HWE_MEMBISTFAILED);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear, -1LL);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, -1LL);
+
+	ipath_cdbg(VERBOSE, "Flush time and errors to EEPROM\n");
+	ipath_update_eeprom_log(dd);
 }
 }
 
 
 /**
 /**
@@ -2078,6 +2191,16 @@ int ipath_reset_device(int unit)
 		goto bail;
 		goto bail;
 	}
 	}
 
 
+	if (atomic_read(&dd->ipath_led_override_timer_active)) {
+		/* Need to stop LED timer, _then_ shut off LEDs */
+		del_timer_sync(&dd->ipath_led_override_timer);
+		atomic_set(&dd->ipath_led_override_timer_active, 0);
+	}
+
+	/* Shut off LEDs after we are sure timer is not running */
+	dd->ipath_led_override = LED_OVER_BOTH_OFF;
+	dd->ipath_f_setextled(dd, 0, 0);
+
 	dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
 	dev_info(&dd->pcidev->dev, "Reset on unit %u requested\n", unit);
 
 
 	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {
 	if (!dd->ipath_kregbase || !(dd->ipath_flags & IPATH_PRESENT)) {

+ 268 - 35
drivers/infiniband/hw/ipath/ipath_eeprom.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -95,39 +95,37 @@ static int i2c_gpio_set(struct ipath_devdata *dd,
 			enum i2c_type line,
 			enum i2c_type line,
 			enum i2c_state new_line_state)
 			enum i2c_state new_line_state)
 {
 {
-	u64 read_val, write_val, mask, *gpioval;
+	u64 out_mask, dir_mask, *gpioval;
+	unsigned long flags = 0;
 
 
 	gpioval = &dd->ipath_gpio_out;
 	gpioval = &dd->ipath_gpio_out;
-	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
-	if (line == i2c_line_scl)
-		mask = dd->ipath_gpio_scl;
-	else
-		mask = dd->ipath_gpio_sda;
 
 
-	if (new_line_state == i2c_line_high)
+	if (line == i2c_line_scl) {
+		dir_mask = dd->ipath_gpio_scl;
+		out_mask = (1UL << dd->ipath_gpio_scl_num);
+	} else {
+		dir_mask = dd->ipath_gpio_sda;
+		out_mask = (1UL << dd->ipath_gpio_sda_num);
+	}
+
+	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+	if (new_line_state == i2c_line_high) {
 		/* tri-state the output rather than force high */
 		/* tri-state the output rather than force high */
-		write_val = read_val & ~mask;
-	else
+		dd->ipath_extctrl &= ~dir_mask;
+	} else {
 		/* config line to be an output */
 		/* config line to be an output */
-		write_val = read_val | mask;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
+		dd->ipath_extctrl |= dir_mask;
+	}
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
 
 
-	/* set high and verify */
+	/* set output as well (no real verify) */
 	if (new_line_state == i2c_line_high)
 	if (new_line_state == i2c_line_high)
-		write_val = 0x1UL;
+		*gpioval |= out_mask;
 	else
 	else
-		write_val = 0x0UL;
+		*gpioval &= ~out_mask;
 
 
-	if (line == i2c_line_scl) {
-		write_val <<= dd->ipath_gpio_scl_num;
-		*gpioval = *gpioval & ~(1UL << dd->ipath_gpio_scl_num);
-		*gpioval |= write_val;
-	} else {
-		write_val <<= dd->ipath_gpio_sda_num;
-		*gpioval = *gpioval & ~(1UL << dd->ipath_gpio_sda_num);
-		*gpioval |= write_val;
-	}
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_out, *gpioval);
+	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
 
 
 	return 0;
 	return 0;
 }
 }
@@ -145,8 +143,9 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
 			enum i2c_type line,
 			enum i2c_type line,
 			enum i2c_state *curr_statep)
 			enum i2c_state *curr_statep)
 {
 {
-	u64 read_val, write_val, mask;
+	u64 read_val, mask;
 	int ret;
 	int ret;
+	unsigned long flags = 0;
 
 
 	/* check args */
 	/* check args */
 	if (curr_statep == NULL) {
 	if (curr_statep == NULL) {
@@ -154,15 +153,21 @@ static int i2c_gpio_get(struct ipath_devdata *dd,
 		goto bail;
 		goto bail;
 	}
 	}
 
 
-	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
 	/* config line to be an input */
 	/* config line to be an input */
 	if (line == i2c_line_scl)
 	if (line == i2c_line_scl)
 		mask = dd->ipath_gpio_scl;
 		mask = dd->ipath_gpio_scl;
 	else
 	else
 		mask = dd->ipath_gpio_sda;
 		mask = dd->ipath_gpio_sda;
-	write_val = read_val & ~mask;
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, write_val);
+
+	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+	dd->ipath_extctrl &= ~mask;
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, dd->ipath_extctrl);
+	/*
+	 * Below is very unlikely to reflect true input state if Output
+	 * Enable actually changed.
+	 */
 	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
 	read_val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
+	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
 
 
 	if (read_val & mask)
 	if (read_val & mask)
 		*curr_statep = i2c_line_high;
 		*curr_statep = i2c_line_high;
@@ -192,6 +197,7 @@ static void i2c_wait_for_writes(struct ipath_devdata *dd)
 
 
 static void scl_out(struct ipath_devdata *dd, u8 bit)
 static void scl_out(struct ipath_devdata *dd, u8 bit)
 {
 {
+	udelay(1);
 	i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
 	i2c_gpio_set(dd, i2c_line_scl, bit ? i2c_line_high : i2c_line_low);
 
 
 	i2c_wait_for_writes(dd);
 	i2c_wait_for_writes(dd);
@@ -314,12 +320,18 @@ static int eeprom_reset(struct ipath_devdata *dd)
 	int clock_cycles_left = 9;
 	int clock_cycles_left = 9;
 	u64 *gpioval = &dd->ipath_gpio_out;
 	u64 *gpioval = &dd->ipath_gpio_out;
 	int ret;
 	int ret;
+	unsigned long flags;
 
 
-	eeprom_init = 1;
+	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
+	/* Make sure shadows are consistent */
+	dd->ipath_extctrl = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extctrl);
 	*gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
 	*gpioval = ipath_read_kreg64(dd, dd->ipath_kregs->kr_gpio_out);
+	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
+
 	ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
 	ipath_cdbg(VERBOSE, "Resetting i2c eeprom; initial gpioout reg "
 		   "is %llx\n", (unsigned long long) *gpioval);
 		   "is %llx\n", (unsigned long long) *gpioval);
 
 
+	eeprom_init = 1;
 	/*
 	/*
 	 * This is to get the i2c into a known state, by first going low,
 	 * This is to get the i2c into a known state, by first going low,
 	 * then tristate sda (and then tristate scl as first thing
 	 * then tristate sda (and then tristate scl as first thing
@@ -355,8 +367,8 @@ bail:
  * @len: number of bytes to receive
  * @len: number of bytes to receive
  */
  */
 
 
-int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
-		      void *buffer, int len)
+static int ipath_eeprom_internal_read(struct ipath_devdata *dd,
+					u8 eeprom_offset, void *buffer, int len)
 {
 {
 	/* compiler complains unless initialized */
 	/* compiler complains unless initialized */
 	u8 single_byte = 0;
 	u8 single_byte = 0;
@@ -406,6 +418,7 @@ bail:
 	return ret;
 	return ret;
 }
 }
 
 
+
 /**
 /**
  * ipath_eeprom_write - writes data to the eeprom via I2C
  * ipath_eeprom_write - writes data to the eeprom via I2C
  * @dd: the infinipath device
  * @dd: the infinipath device
@@ -413,8 +426,8 @@ bail:
  * @buffer: data to write
  * @buffer: data to write
  * @len: number of bytes to write
  * @len: number of bytes to write
  */
  */
-int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
-		       const void *buffer, int len)
+int ipath_eeprom_internal_write(struct ipath_devdata *dd, u8 eeprom_offset,
+				const void *buffer, int len)
 {
 {
 	u8 single_byte;
 	u8 single_byte;
 	int sub_len;
 	int sub_len;
@@ -488,6 +501,38 @@ bail:
 	return ret;
 	return ret;
 }
 }
 
 
+/*
+ * The public entry-points ipath_eeprom_read() and ipath_eeprom_write()
+ * are now just wrappers around the internal functions.
+ */
+int ipath_eeprom_read(struct ipath_devdata *dd, u8 eeprom_offset,
+			void *buff, int len)
+{
+	int ret;
+
+	ret = down_interruptible(&dd->ipath_eep_sem);
+	if (!ret) {
+		ret = ipath_eeprom_internal_read(dd, eeprom_offset, buff, len);
+		up(&dd->ipath_eep_sem);
+	}
+
+	return ret;
+}
+
+int ipath_eeprom_write(struct ipath_devdata *dd, u8 eeprom_offset,
+			const void *buff, int len)
+{
+	int ret;
+
+	ret = down_interruptible(&dd->ipath_eep_sem);
+	if (!ret) {
+		ret = ipath_eeprom_internal_write(dd, eeprom_offset, buff, len);
+		up(&dd->ipath_eep_sem);
+	}
+
+	return ret;
+}
+
 static u8 flash_csum(struct ipath_flash *ifp, int adjust)
 static u8 flash_csum(struct ipath_flash *ifp, int adjust)
 {
 {
 	u8 *ip = (u8 *) ifp;
 	u8 *ip = (u8 *) ifp;
@@ -515,7 +560,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
 	void *buf;
 	void *buf;
 	struct ipath_flash *ifp;
 	struct ipath_flash *ifp;
 	__be64 guid;
 	__be64 guid;
-	int len;
+	int len, eep_stat;
 	u8 csum, *bguid;
 	u8 csum, *bguid;
 	int t = dd->ipath_unit;
 	int t = dd->ipath_unit;
 	struct ipath_devdata *dd0 = ipath_lookup(0);
 	struct ipath_devdata *dd0 = ipath_lookup(0);
@@ -559,7 +604,11 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
 		goto bail;
 		goto bail;
 	}
 	}
 
 
-	if (ipath_eeprom_read(dd, 0, buf, len)) {
+	down(&dd->ipath_eep_sem);
+	eep_stat = ipath_eeprom_internal_read(dd, 0, buf, len);
+	up(&dd->ipath_eep_sem);
+
+	if (eep_stat) {
 		ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
 		ipath_dev_err(dd, "Failed reading GUID from eeprom\n");
 		goto done;
 		goto done;
 	}
 	}
@@ -634,8 +683,192 @@ void ipath_get_eeprom_info(struct ipath_devdata *dd)
 	ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
 	ipath_cdbg(VERBOSE, "Initted GUID to %llx from eeprom\n",
 		   (unsigned long long) be64_to_cpu(dd->ipath_guid));
 		   (unsigned long long) be64_to_cpu(dd->ipath_guid));
 
 
+	memcpy(&dd->ipath_eep_st_errs, &ifp->if_errcntp, IPATH_EEP_LOG_CNT);
+	/*
+	 * Power-on (actually "active") hours are kept as little-endian value
+	 * in EEPROM, but as seconds in a (possibly as small as 24-bit)
+	 * atomic_t while running.
+	 */
+	atomic_set(&dd->ipath_active_time, 0);
+	dd->ipath_eep_hrs = ifp->if_powerhour[0] | (ifp->if_powerhour[1] << 8);
+
 done:
 done:
 	vfree(buf);
 	vfree(buf);
 
 
 bail:;
 bail:;
 }
 }
+
+/**
+ * ipath_update_eeprom_log - copy active-time and error counters to eeprom
+ * @dd: the infinipath device
+ *
+ * Although the time is kept as seconds in the ipath_devdata struct, it is
+ * rounded to hours for re-write, as we have only 16 bits in EEPROM.
+ * First-cut code reads whole (expected) struct ipath_flash, modifies,
+ * re-writes. Future direction: read/write only what we need, assuming
+ * that the EEPROM had to have been "good enough" for driver init, and
+ * if not, we aren't making it worse.
+ *
+ */
+
+int ipath_update_eeprom_log(struct ipath_devdata *dd)
+{
+	void *buf;
+	struct ipath_flash *ifp;
+	int len, hi_water;
+	uint32_t new_time, new_hrs;
+	u8 csum;
+	int ret, idx;
+	unsigned long flags;
+
+	/* first, check if we actually need to do anything. */
+	ret = 0;
+	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+		if (dd->ipath_eep_st_new_errs[idx]) {
+			ret = 1;
+			break;
+		}
+	}
+	new_time = atomic_read(&dd->ipath_active_time);
+
+	if (ret == 0 && new_time < 3600)
+		return 0;
+
+	/*
+	 * The quick-check above determined that there is something worthy
+	 * of logging, so get current contents and do a more detailed idea.
+	 */
+	len = offsetof(struct ipath_flash, if_future);
+	buf = vmalloc(len);
+	ret = 1;
+	if (!buf) {
+		ipath_dev_err(dd, "Couldn't allocate memory to read %u "
+				"bytes from eeprom for logging\n", len);
+		goto bail;
+	}
+
+	/* Grab semaphore and read current EEPROM. If we get an
+	 * error, let go, but if not, keep it until we finish write.
+	 */
+	ret = down_interruptible(&dd->ipath_eep_sem);
+	if (ret) {
+		ipath_dev_err(dd, "Unable to acquire EEPROM for logging\n");
+		goto free_bail;
+	}
+	ret = ipath_eeprom_internal_read(dd, 0, buf, len);
+	if (ret) {
+		up(&dd->ipath_eep_sem);
+		ipath_dev_err(dd, "Unable read EEPROM for logging\n");
+		goto free_bail;
+	}
+	ifp = (struct ipath_flash *)buf;
+
+	csum = flash_csum(ifp, 0);
+	if (csum != ifp->if_csum) {
+		up(&dd->ipath_eep_sem);
+		ipath_dev_err(dd, "EEPROM cks err (0x%02X, S/B 0x%02X)\n",
+				csum, ifp->if_csum);
+		ret = 1;
+		goto free_bail;
+	}
+	hi_water = 0;
+	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+		int new_val = dd->ipath_eep_st_new_errs[idx];
+		if (new_val) {
+			/*
+			 * If we have seen any errors, add to EEPROM values
+			 * We need to saturate at 0xFF (255) and we also
+			 * would need to adjust the checksum if we were
+			 * trying to minimize EEPROM traffic
+			 * Note that we add to actual current count in EEPROM,
+			 * in case it was altered while we were running.
+			 */
+			new_val += ifp->if_errcntp[idx];
+			if (new_val > 0xFF)
+				new_val = 0xFF;
+			if (ifp->if_errcntp[idx] != new_val) {
+				ifp->if_errcntp[idx] = new_val;
+				hi_water = offsetof(struct ipath_flash,
+						if_errcntp) + idx;
+			}
+			/*
+			 * update our shadow (used to minimize EEPROM
+			 * traffic), to match what we are about to write.
+			 */
+			dd->ipath_eep_st_errs[idx] = new_val;
+			dd->ipath_eep_st_new_errs[idx] = 0;
+		}
+	}
+	/*
+	 * now update active-time. We would like to round to the nearest hour
+	 * but unless atomic_t are sure to be proper signed ints we cannot,
+	 * because we need to account for what we "transfer" to EEPROM and
+	 * if we log an hour at 31 minutes, then we would need to set
+	 * active_time to -29 to accurately count the _next_ hour.
+	 */
+	if (new_time > 3600) {
+		new_hrs = new_time / 3600;
+		atomic_sub((new_hrs * 3600), &dd->ipath_active_time);
+		new_hrs += dd->ipath_eep_hrs;
+		if (new_hrs > 0xFFFF)
+			new_hrs = 0xFFFF;
+		dd->ipath_eep_hrs = new_hrs;
+		if ((new_hrs & 0xFF) != ifp->if_powerhour[0]) {
+			ifp->if_powerhour[0] = new_hrs & 0xFF;
+			hi_water = offsetof(struct ipath_flash, if_powerhour);
+		}
+		if ((new_hrs >> 8) != ifp->if_powerhour[1]) {
+			ifp->if_powerhour[1] = new_hrs >> 8;
+			hi_water = offsetof(struct ipath_flash, if_powerhour)
+					+ 1;
+		}
+	}
+	/*
+	 * There is a tiny possibility that we could somehow fail to write
+	 * the EEPROM after updating our shadows, but problems from holding
+	 * the spinlock too long are a much bigger issue.
+	 */
+	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+	if (hi_water) {
+		/* we made some change to the data, uopdate cksum and write */
+		csum = flash_csum(ifp, 1);
+		ret = ipath_eeprom_internal_write(dd, 0, buf, hi_water + 1);
+	}
+	up(&dd->ipath_eep_sem);
+	if (ret)
+		ipath_dev_err(dd, "Failed updating EEPROM\n");
+
+free_bail:
+	vfree(buf);
+bail:
+	return ret;
+
+}
+
+/**
+ * ipath_inc_eeprom_err - increment one of the four error counters
+ * that are logged to EEPROM.
+ * @dd: the infinipath device
+ * @eidx: 0..3, the counter to increment
+ * @incr: how much to add
+ *
+ * Each counter is 8-bits, and saturates at 255 (0xFF). They
+ * are copied to the EEPROM (aka flash) whenever ipath_update_eeprom_log()
+ * is called, but it can only be called in a context that allows sleep.
+ * This function can be called even at interrupt level.
+ */
+
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr)
+{
+	uint new_val;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+	new_val = dd->ipath_eep_st_new_errs[eidx] + incr;
+	if (new_val > 255)
+		new_val = 255;
+	dd->ipath_eep_st_new_errs[eidx] = new_val;
+	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+	return;
+}

+ 148 - 57
drivers/infiniband/hw/ipath/ipath_file_ops.c

@@ -396,7 +396,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
 			   "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
 			   "TID %u, vaddr %lx, physaddr %llx pgp %p\n",
 			   tid, vaddr, (unsigned long long) physaddr,
 			   tid, vaddr, (unsigned long long) physaddr,
 			   pagep[i]);
 			   pagep[i]);
-		dd->ipath_f_put_tid(dd, &tidbase[tid], 1, physaddr);
+		dd->ipath_f_put_tid(dd, &tidbase[tid], RCVHQ_RCV_TYPE_EXPECTED,
+				    physaddr);
 		/*
 		/*
 		 * don't check this tid in ipath_portshadow, since we
 		 * don't check this tid in ipath_portshadow, since we
 		 * just filled it in; start with the next one.
 		 * just filled it in; start with the next one.
@@ -422,7 +423,8 @@ static int ipath_tid_update(struct ipath_portdata *pd, struct file *fp,
 			if (dd->ipath_pageshadow[porttid + tid]) {
 			if (dd->ipath_pageshadow[porttid + tid]) {
 				ipath_cdbg(VERBOSE, "Freeing TID %u\n",
 				ipath_cdbg(VERBOSE, "Freeing TID %u\n",
 					   tid);
 					   tid);
-				dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
+				dd->ipath_f_put_tid(dd, &tidbase[tid],
+						    RCVHQ_RCV_TYPE_EXPECTED,
 						    dd->ipath_tidinvalid);
 						    dd->ipath_tidinvalid);
 				pci_unmap_page(dd->pcidev,
 				pci_unmap_page(dd->pcidev,
 					dd->ipath_physshadow[porttid + tid],
 					dd->ipath_physshadow[porttid + tid],
@@ -538,7 +540,8 @@ static int ipath_tid_free(struct ipath_portdata *pd, unsigned subport,
 		if (dd->ipath_pageshadow[porttid + tid]) {
 		if (dd->ipath_pageshadow[porttid + tid]) {
 			ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
 			ipath_cdbg(VERBOSE, "PID %u freeing TID %u\n",
 				   pd->port_pid, tid);
 				   pd->port_pid, tid);
-			dd->ipath_f_put_tid(dd, &tidbase[tid], 1,
+			dd->ipath_f_put_tid(dd, &tidbase[tid],
+					    RCVHQ_RCV_TYPE_EXPECTED,
 					    dd->ipath_tidinvalid);
 					    dd->ipath_tidinvalid);
 			pci_unmap_page(dd->pcidev,
 			pci_unmap_page(dd->pcidev,
 				dd->ipath_physshadow[porttid + tid],
 				dd->ipath_physshadow[porttid + tid],
@@ -921,7 +924,8 @@ static int ipath_create_user_egr(struct ipath_portdata *pd)
 					    (u64 __iomem *)
 					    (u64 __iomem *)
 					    ((char __iomem *)
 					    ((char __iomem *)
 					     dd->ipath_kregbase +
 					     dd->ipath_kregbase +
-					     dd->ipath_rcvegrbase), 0, pa);
+					     dd->ipath_rcvegrbase),
+					    RCVHQ_RCV_TYPE_EAGER, pa);
 			pa += egrsize;
 			pa += egrsize;
 		}
 		}
 		cond_resched();	/* don't hog the cpu */
 		cond_resched();	/* don't hog the cpu */
@@ -1337,68 +1341,133 @@ bail:
 	return ret;
 	return ret;
 }
 }
 
 
-static unsigned int ipath_poll(struct file *fp,
-			       struct poll_table_struct *pt)
+static unsigned int ipath_poll_urgent(struct ipath_portdata *pd,
+				      struct file *fp,
+				      struct poll_table_struct *pt)
 {
 {
-	struct ipath_portdata *pd;
-	u32 head, tail;
-	int bit;
 	unsigned pollflag = 0;
 	unsigned pollflag = 0;
 	struct ipath_devdata *dd;
 	struct ipath_devdata *dd;
 
 
-	pd = port_fp(fp);
-	if (!pd)
-		goto bail;
 	dd = pd->port_dd;
 	dd = pd->port_dd;
 
 
-	bit = pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT;
-	set_bit(bit, &dd->ipath_rcvctrl);
+	if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
+		pollflag |= POLLERR;
+		clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
+	}
 
 
-	/*
-	 * Before blocking, make sure that head is still == tail,
-	 * reading from the chip, so we can be sure the interrupt
-	 * enable has made it to the chip.  If not equal, disable
-	 * interrupt again and return immediately.  This avoids races,
-	 * and the overhead of the chip read doesn't matter much at
-	 * this point, since we are waiting for something anyway.
-	 */
+	if (test_bit(IPATH_PORT_WAITING_URG, &pd->int_flag)) {
+		pollflag |= POLLIN | POLLRDNORM;
+		clear_bit(IPATH_PORT_WAITING_URG, &pd->int_flag);
+	}
 
 
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
+	if (!pollflag) {
+		set_bit(IPATH_PORT_WAITING_URG, &pd->port_flag);
+		if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
+			set_bit(IPATH_PORT_WAITING_OVERFLOW,
+				&pd->port_flag);
+
+		poll_wait(fp, &pd->port_wait, pt);
+	}
+
+	return pollflag;
+}
+
+static unsigned int ipath_poll_next(struct ipath_portdata *pd,
+				    struct file *fp,
+				    struct poll_table_struct *pt)
+{
+	u32 head, tail;
+	unsigned pollflag = 0;
+	struct ipath_devdata *dd;
+
+	dd = pd->port_dd;
 
 
 	head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
 	head = ipath_read_ureg32(dd, ur_rcvhdrhead, pd->port_port);
-	tail = ipath_read_ureg32(dd, ur_rcvhdrtail, pd->port_port);
+	tail = *(volatile u64 *)pd->port_rcvhdrtail_kvaddr;
 
 
-	if (tail == head) {
+	if (test_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag)) {
+		pollflag |= POLLERR;
+		clear_bit(IPATH_PORT_WAITING_OVERFLOW, &pd->int_flag);
+	}
+
+	if (tail != head ||
+	    test_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag)) {
+		pollflag |= POLLIN | POLLRDNORM;
+		clear_bit(IPATH_PORT_WAITING_RCV, &pd->int_flag);
+	}
+
+	if (!pollflag) {
 		set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
 		set_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
+		if (pd->poll_type & IPATH_POLL_TYPE_OVERFLOW)
+			set_bit(IPATH_PORT_WAITING_OVERFLOW,
+				&pd->port_flag);
+
+		set_bit(pd->port_port + INFINIPATH_R_INTRAVAIL_SHIFT,
+			&dd->ipath_rcvctrl);
+
+		ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
+				 dd->ipath_rcvctrl);
+
 		if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
 		if (dd->ipath_rhdrhead_intr_off) /* arm rcv interrupt */
-			(void)ipath_write_ureg(dd, ur_rcvhdrhead,
-					       dd->ipath_rhdrhead_intr_off
-					       | head, pd->port_port);
-		poll_wait(fp, &pd->port_wait, pt);
+			ipath_write_ureg(dd, ur_rcvhdrhead,
+					 dd->ipath_rhdrhead_intr_off | head,
+					 pd->port_port);
 
 
-		if (test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
-			/* timed out, no packets received */
-			clear_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag);
-			pd->port_rcvwait_to++;
-		}
-		else
-			pollflag = POLLIN | POLLRDNORM;
-	}
-	else {
-		/* it's already happened; don't do wait_event overhead */
-		pollflag = POLLIN | POLLRDNORM;
-		pd->port_rcvnowait++;
+		poll_wait(fp, &pd->port_wait, pt);
 	}
 	}
 
 
-	clear_bit(bit, &dd->ipath_rcvctrl);
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_rcvctrl,
-			 dd->ipath_rcvctrl);
+	return pollflag;
+}
+
+static unsigned int ipath_poll(struct file *fp,
+			       struct poll_table_struct *pt)
+{
+	struct ipath_portdata *pd;
+	unsigned pollflag;
+
+	pd = port_fp(fp);
+	if (!pd)
+		pollflag = 0;
+	else if (pd->poll_type & IPATH_POLL_TYPE_URGENT)
+		pollflag = ipath_poll_urgent(pd, fp, pt);
+	else
+		pollflag = ipath_poll_next(pd, fp, pt);
 
 
-bail:
 	return pollflag;
 	return pollflag;
 }
 }
 
 
+static int ipath_supports_subports(int user_swmajor, int user_swminor)
+{
+	/* no subport implementation prior to software version 1.3 */
+	return (user_swmajor > 1) || (user_swminor >= 3);
+}
+
+static int ipath_compatible_subports(int user_swmajor, int user_swminor)
+{
+	/* this code is written long-hand for clarity */
+	if (IPATH_USER_SWMAJOR != user_swmajor) {
+		/* no promise of compatibility if major mismatch */
+		return 0;
+	}
+	if (IPATH_USER_SWMAJOR == 1) {
+		switch (IPATH_USER_SWMINOR) {
+		case 0:
+		case 1:
+		case 2:
+			/* no subport implementation so cannot be compatible */
+			return 0;
+		case 3:
+			/* 3 is only compatible with itself */
+			return user_swminor == 3;
+		default:
+			/* >= 4 are compatible (or are expected to be) */
+			return user_swminor >= 4;
+		}
+	}
+	/* make no promises yet for future major versions */
+	return 0;
+}
+
 static int init_subports(struct ipath_devdata *dd,
 static int init_subports(struct ipath_devdata *dd,
 			 struct ipath_portdata *pd,
 			 struct ipath_portdata *pd,
 			 const struct ipath_user_info *uinfo)
 			 const struct ipath_user_info *uinfo)
@@ -1408,20 +1477,32 @@ static int init_subports(struct ipath_devdata *dd,
 	size_t size;
 	size_t size;
 
 
 	/*
 	/*
-	 * If the user is requesting zero or one port,
+	 * If the user is requesting zero subports,
 	 * skip the subport allocation.
 	 * skip the subport allocation.
 	 */
 	 */
-	if (uinfo->spu_subport_cnt <= 1)
+	if (uinfo->spu_subport_cnt <= 0)
+		goto bail;
+
+	/* Self-consistency check for ipath_compatible_subports() */
+	if (ipath_supports_subports(IPATH_USER_SWMAJOR, IPATH_USER_SWMINOR) &&
+	    !ipath_compatible_subports(IPATH_USER_SWMAJOR,
+				       IPATH_USER_SWMINOR)) {
+		dev_info(&dd->pcidev->dev,
+			 "Inconsistent ipath_compatible_subports()\n");
 		goto bail;
 		goto bail;
+	}
 
 
-	/* Old user binaries don't know about new subport implementation */
-	if ((uinfo->spu_userversion & 0xffff) != IPATH_USER_SWMINOR) {
+	/* Check for subport compatibility */
+	if (!ipath_compatible_subports(uinfo->spu_userversion >> 16,
+				       uinfo->spu_userversion & 0xffff)) {
 		dev_info(&dd->pcidev->dev,
 		dev_info(&dd->pcidev->dev,
-			 "Mismatched user minor version (%d) and driver "
-                         "minor version (%d) while port sharing. Ensure "
+			 "Mismatched user version (%d.%d) and driver "
+			 "version (%d.%d) while port sharing. Ensure "
                          "that driver and library are from the same "
                          "that driver and library are from the same "
                          "release.\n",
                          "release.\n",
+			 (int) (uinfo->spu_userversion >> 16),
                          (int) (uinfo->spu_userversion & 0xffff),
                          (int) (uinfo->spu_userversion & 0xffff),
+			 IPATH_USER_SWMAJOR,
 	                 IPATH_USER_SWMINOR);
 	                 IPATH_USER_SWMINOR);
 		goto bail;
 		goto bail;
 	}
 	}
@@ -1725,14 +1806,13 @@ static int ipath_open(struct inode *in, struct file *fp)
 	return fp->private_data ? 0 : -ENOMEM;
 	return fp->private_data ? 0 : -ENOMEM;
 }
 }
 
 
-
 /* Get port early, so can set affinity prior to memory allocation */
 /* Get port early, so can set affinity prior to memory allocation */
 static int ipath_assign_port(struct file *fp,
 static int ipath_assign_port(struct file *fp,
 			      const struct ipath_user_info *uinfo)
 			      const struct ipath_user_info *uinfo)
 {
 {
 	int ret;
 	int ret;
 	int i_minor;
 	int i_minor;
-	unsigned swminor;
+	unsigned swmajor, swminor;
 
 
 	/* Check to be sure we haven't already initialized this file */
 	/* Check to be sure we haven't already initialized this file */
 	if (port_fp(fp)) {
 	if (port_fp(fp)) {
@@ -1741,7 +1821,8 @@ static int ipath_assign_port(struct file *fp,
 	}
 	}
 
 
 	/* for now, if major version is different, bail */
 	/* for now, if major version is different, bail */
-	if ((uinfo->spu_userversion >> 16) != IPATH_USER_SWMAJOR) {
+	swmajor = uinfo->spu_userversion >> 16;
+	if (swmajor != IPATH_USER_SWMAJOR) {
 		ipath_dbg("User major version %d not same as driver "
 		ipath_dbg("User major version %d not same as driver "
 			  "major %d\n", uinfo->spu_userversion >> 16,
 			  "major %d\n", uinfo->spu_userversion >> 16,
 			  IPATH_USER_SWMAJOR);
 			  IPATH_USER_SWMAJOR);
@@ -1756,7 +1837,8 @@ static int ipath_assign_port(struct file *fp,
 
 
 	mutex_lock(&ipath_mutex);
 	mutex_lock(&ipath_mutex);
 
 
-	if (swminor == IPATH_USER_SWMINOR && uinfo->spu_subport_cnt &&
+	if (ipath_compatible_subports(swmajor, swminor) &&
+	    uinfo->spu_subport_cnt &&
 	    (ret = find_shared_port(fp, uinfo))) {
 	    (ret = find_shared_port(fp, uinfo))) {
 		mutex_unlock(&ipath_mutex);
 		mutex_unlock(&ipath_mutex);
 		if (ret > 0)
 		if (ret > 0)
@@ -2020,7 +2102,8 @@ static int ipath_port_info(struct ipath_portdata *pd, u16 subport,
 	info.port = pd->port_port;
 	info.port = pd->port_port;
 	info.subport = subport;
 	info.subport = subport;
 	/* Don't return new fields if old library opened the port. */
 	/* Don't return new fields if old library opened the port. */
-	if ((pd->userversion & 0xffff) == IPATH_USER_SWMINOR) {
+	if (ipath_supports_subports(pd->userversion >> 16,
+				    pd->userversion & 0xffff)) {
 		/* Number of user ports available for this device. */
 		/* Number of user ports available for this device. */
 		info.num_ports = pd->port_dd->ipath_cfgports - 1;
 		info.num_ports = pd->port_dd->ipath_cfgports - 1;
 		info.num_subports = pd->port_subport_cnt;
 		info.num_subports = pd->port_subport_cnt;
@@ -2123,6 +2206,11 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 		src = NULL;
 		src = NULL;
 		dest = NULL;
 		dest = NULL;
 		break;
 		break;
+	case IPATH_CMD_POLL_TYPE:
+		copy = sizeof(cmd.cmd.poll_type);
+		dest = &cmd.cmd.poll_type;
+		src = &ucmd->cmd.poll_type;
+		break;
 	default:
 	default:
 		ret = -EINVAL;
 		ret = -EINVAL;
 		goto bail;
 		goto bail;
@@ -2195,6 +2283,9 @@ static ssize_t ipath_write(struct file *fp, const char __user *data,
 	case IPATH_CMD_PIOAVAILUPD:
 	case IPATH_CMD_PIOAVAILUPD:
 		ret = ipath_force_pio_avail_update(pd->port_dd);
 		ret = ipath_force_pio_avail_update(pd->port_dd);
 		break;
 		break;
+	case IPATH_CMD_POLL_TYPE:
+		pd->poll_type = cmd.cmd.poll_type;
+		break;
 	}
 	}
 
 
 	if (ret >= 0)
 	if (ret >= 0)

+ 7 - 2
drivers/infiniband/hw/ipath/ipath_fs.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -257,9 +257,14 @@ static ssize_t atomic_port_info_read(struct file *file, char __user *buf,
 		/* Notimpl InitType (actually, an SMA decision) */
 		/* Notimpl InitType (actually, an SMA decision) */
 		/* VLHighLimit is 0 (only one VL) */
 		/* VLHighLimit is 0 (only one VL) */
 		; /* VLArbitrationHighCap is 0 (only one VL) */
 		; /* VLArbitrationHighCap is 0 (only one VL) */
+	/*
+	 * Note: the chips support a maximum MTU of 4096, but the driver
+	 * hasn't implemented this feature yet, so set the maximum
+	 * to 2048.
+	 */
 	portinfo[10] = 	/* VLArbitrationLowCap is 0 (only one VL) */
 	portinfo[10] = 	/* VLArbitrationLowCap is 0 (only one VL) */
 		/* InitTypeReply is SMA decision */
 		/* InitTypeReply is SMA decision */
-		(5 << 16)	/* MTUCap 4096 */
+		(4 << 16)	/* MTUCap 2048 */
 		| (7 << 13)	/* VLStallCount */
 		| (7 << 13)	/* VLStallCount */
 		| (0x1f << 8)	/* HOQLife */
 		| (0x1f << 8)	/* HOQLife */
 		| (1 << 4)
 		| (1 << 4)

+ 57 - 44
drivers/infiniband/hw/ipath/ipath_iba6110.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -36,6 +36,7 @@
  * HT chip.
  * HT chip.
  */
  */
 
 
+#include <linux/vmalloc.h>
 #include <linux/pci.h>
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/delay.h>
 #include <linux/htirq.h>
 #include <linux/htirq.h>
@@ -439,6 +440,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	u32 bits, ctrl;
 	u32 bits, ctrl;
 	int isfatal = 0;
 	int isfatal = 0;
 	char bitsmsg[64];
 	char bitsmsg[64];
+	int log_idx;
 
 
 	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
 	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
 
 
@@ -467,6 +469,11 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
 
 	hwerrs &= dd->ipath_hwerrmask;
 	hwerrs &= dd->ipath_hwerrmask;
 
 
+	/* We log some errors to EEPROM, check if we have any of those. */
+	for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+		if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+			ipath_inc_eeprom_err(dd, log_idx, 1);
+
 	/*
 	/*
 	 * make sure we get this much out, unless told to be quiet,
 	 * make sure we get this much out, unless told to be quiet,
 	 * it's a parity error we may recover from,
 	 * it's a parity error we may recover from,
@@ -502,9 +509,7 @@ static void ipath_ht_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 		if (!hwerrs) {
 		if (!hwerrs) {
 			ipath_dbg("Clearing freezemode on ignored or "
 			ipath_dbg("Clearing freezemode on ignored or "
 				  "recovered hardware error\n");
 				  "recovered hardware error\n");
-			ctrl &= ~INFINIPATH_C_FREEZEMODE;
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-					 ctrl);
+			ipath_clear_freeze(dd);
 		}
 		}
 	}
 	}
 
 
@@ -672,10 +677,16 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 	if (n)
 	if (n)
 		snprintf(name, namelen, "%s", n);
 		snprintf(name, namelen, "%s", n);
 
 
+	if (dd->ipath_boardrev != 6 && dd->ipath_boardrev != 7 &&
+	    dd->ipath_boardrev != 11) {
+		ipath_dev_err(dd, "Unsupported InfiniPath board %s!\n", name);
+		ret = 1;
+		goto bail;
+	}
 	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
 	if (dd->ipath_majrev != 3 || (dd->ipath_minrev < 2 ||
-		dd->ipath_minrev > 3)) {
+		dd->ipath_minrev > 4)) {
 		/*
 		/*
-		 * This version of the driver only supports Rev 3.2 and 3.3
+		 * This version of the driver only supports Rev 3.2 - 3.4
 		 */
 		 */
 		ipath_dev_err(dd,
 		ipath_dev_err(dd,
 			      "Unsupported InfiniPath hardware revision %u.%u!\n",
 			      "Unsupported InfiniPath hardware revision %u.%u!\n",
@@ -689,36 +700,11 @@ static int ipath_ht_boardname(struct ipath_devdata *dd, char *name,
 	 * copies
 	 * copies
 	 */
 	 */
 	dd->ipath_flags |= IPATH_32BITCOUNTERS;
 	dd->ipath_flags |= IPATH_32BITCOUNTERS;
+	dd->ipath_flags |= IPATH_GPIO_INTR;
 	if (dd->ipath_htspeed != 800)
 	if (dd->ipath_htspeed != 800)
 		ipath_dev_err(dd,
 		ipath_dev_err(dd,
 			      "Incorrectly configured for HT @ %uMHz\n",
 			      "Incorrectly configured for HT @ %uMHz\n",
 			      dd->ipath_htspeed);
 			      dd->ipath_htspeed);
-	if (dd->ipath_boardrev == 7 || dd->ipath_boardrev == 11 ||
-	    dd->ipath_boardrev == 6)
-		dd->ipath_flags |= IPATH_GPIO_INTR;
-	else
-		dd->ipath_flags |= IPATH_POLL_RX_INTR;
-	if (dd->ipath_boardrev == 8) {	/* LS/X-1 */
-		u64 val;
-		val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_extstatus);
-		if (val & INFINIPATH_EXTS_SERDESSEL) {
-			/*
-			 * hardware disabled
-			 *
-			 * This means that the chip is hardware disabled,
-			 * and will not be able to bring up the link,
-			 * in any case.  We special case this and abort
-			 * early, to avoid later messages.  We also set
-			 * the DISABLED status bit
-			 */
-			ipath_dbg("Unit %u is hardware-disabled\n",
-				  dd->ipath_unit);
-			*dd->ipath_statusp |= IPATH_STATUS_DISABLED;
-			/* this value is handled differently */
-			ret = 2;
-			goto bail;
-		}
-	}
 	ret = 0;
 	ret = 0;
 
 
 bail:
 bail:
@@ -1058,12 +1044,24 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
 				     u64 lst, u64 ltst)
 				     u64 lst, u64 ltst)
 {
 {
 	u64 extctl;
 	u64 extctl;
+	unsigned long flags = 0;
 
 
 	/* the diags use the LED to indicate diag info, so we leave
 	/* the diags use the LED to indicate diag info, so we leave
 	 * the external LED alone when the diags are running */
 	 * the external LED alone when the diags are running */
 	if (ipath_diag_inuse)
 	if (ipath_diag_inuse)
 		return;
 		return;
 
 
+	/* Allow override of LED display for, e.g. Locating system in rack */
+	if (dd->ipath_led_override) {
+		ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+			? INFINIPATH_IBCS_LT_STATE_LINKUP
+			: INFINIPATH_IBCS_LT_STATE_DISABLED;
+		lst = (dd->ipath_led_override & IPATH_LED_LOG)
+			? INFINIPATH_IBCS_L_STATE_ACTIVE
+			: INFINIPATH_IBCS_L_STATE_DOWN;
+	}
+
+	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
 	/*
 	/*
 	 * start by setting both LED control bits to off, then turn
 	 * start by setting both LED control bits to off, then turn
 	 * on the appropriate bit(s).
 	 * on the appropriate bit(s).
@@ -1092,6 +1090,7 @@ static void ipath_setup_ht_setextled(struct ipath_devdata *dd,
 	}
 	}
 	dd->ipath_extctrl = extctl;
 	dd->ipath_extctrl = extctl;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
 }
 }
 
 
 static void ipath_init_ht_variables(struct ipath_devdata *dd)
 static void ipath_init_ht_variables(struct ipath_devdata *dd)
@@ -1157,6 +1156,22 @@ static void ipath_init_ht_variables(struct ipath_devdata *dd)
 
 
 	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
 	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
 	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
 	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+
+	/*
+	 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+	 * 2 is Some Misc, 3 is reserved for future.
+	 */
+	dd->ipath_eep_st_masks[0].hwerrs_to_log =
+		INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+		INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+	dd->ipath_eep_st_masks[1].hwerrs_to_log =
+		INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+		INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+	dd->ipath_eep_st_masks[2].errs_to_log =
+		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
+
 }
 }
 
 
 /**
 /**
@@ -1372,7 +1387,7 @@ static void ipath_ht_quiet_serdes(struct ipath_devdata *dd)
  * ipath_pe_put_tid - write a TID in chip
  * ipath_pe_put_tid - write a TID in chip
  * @dd: the infinipath device
  * @dd: the infinipath device
  * @tidptr: pointer to the expected TID (in chip) to udpate
  * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
  * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
  * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
  *
  *
  * This exists as a separate routine to allow for special locking etc.
  * This exists as a separate routine to allow for special locking etc.
@@ -1393,7 +1408,7 @@ static void ipath_ht_put_tid(struct ipath_devdata *dd,
 				 "40 bits, using only 40!!!\n", pa);
 				 "40 bits, using only 40!!!\n", pa);
 			pa &= INFINIPATH_RT_ADDR_MASK;
 			pa &= INFINIPATH_RT_ADDR_MASK;
 		}
 		}
-		if (type == 0)
+		if (type == RCVHQ_RCV_TYPE_EAGER)
 			pa |= dd->ipath_tidtemplate;
 			pa |= dd->ipath_tidtemplate;
 		else {
 		else {
 			/* in words (fixed, full page).  */
 			/* in words (fixed, full page).  */
@@ -1433,7 +1448,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
 				   port * dd->ipath_rcvtidcnt *
 				   port * dd->ipath_rcvtidcnt *
 				   sizeof(*tidbase));
 				   sizeof(*tidbase));
 	for (i = 0; i < dd->ipath_rcvtidcnt; i++)
 	for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-		ipath_ht_put_tid(dd, &tidbase[i], 1, dd->ipath_tidinvalid);
+		ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+				 dd->ipath_tidinvalid);
 
 
 	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
 	tidbase = (u64 __iomem *) ((char __iomem *)(dd->ipath_kregbase) +
 				   dd->ipath_rcvegrbase +
 				   dd->ipath_rcvegrbase +
@@ -1441,7 +1457,8 @@ static void ipath_ht_clear_tids(struct ipath_devdata *dd, unsigned port)
 				   sizeof(*tidbase));
 				   sizeof(*tidbase));
 
 
 	for (i = 0; i < dd->ipath_rcvegrcnt; i++)
 	for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-		ipath_ht_put_tid(dd, &tidbase[i], 0, dd->ipath_tidinvalid);
+		ipath_ht_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+				 dd->ipath_tidinvalid);
 }
 }
 
 
 /**
 /**
@@ -1528,11 +1545,6 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 		writel(16, piobuf);
 		writel(16, piobuf);
 		piobuf += pioincr;
 		piobuf += pioincr;
 	}
 	}
-	/*
-	 * self-clearing
-	 */
-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-			 INFINIPATH_S_ABORT);
 
 
 	ipath_get_eeprom_info(dd);
 	ipath_get_eeprom_info(dd);
 	if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
 	if (dd->ipath_boardrev == 5 && dd->ipath_serial[0] == '1' &&
@@ -1543,8 +1555,10 @@ static int ipath_ht_early_init(struct ipath_devdata *dd)
 		 * with 128, rather than 112.
 		 * with 128, rather than 112.
 		 */
 		 */
 		dd->ipath_flags |= IPATH_GPIO_INTR;
 		dd->ipath_flags |= IPATH_GPIO_INTR;
-		dd->ipath_flags &= ~IPATH_POLL_RX_INTR;
-	}
+	} else
+		ipath_dev_err(dd, "Unsupported InfiniPath serial "
+			      "number %.16s!\n", dd->ipath_serial);
+
 	return 0;
 	return 0;
 }
 }
 
 
@@ -1561,7 +1575,6 @@ static int ipath_ht_txe_recover(struct ipath_devdata *dd)
 	}
 	}
 	dev_info(&dd->pcidev->dev,
 	dev_info(&dd->pcidev->dev,
 		"Recovering from TXE PIO parity error\n");
 		"Recovering from TXE PIO parity error\n");
-	ipath_disarm_senderrbufs(dd, 1);
 	return 1;
 	return 1;
 }
 }
 
 

+ 55 - 37
drivers/infiniband/hw/ipath/ipath_iba6120.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -296,13 +296,6 @@ static const struct ipath_cregs ipath_pe_cregs = {
 #define IPATH_GPIO_SCL (1ULL << \
 #define IPATH_GPIO_SCL (1ULL << \
 	(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
 	(_IPATH_GPIO_SCL_NUM+INFINIPATH_EXTC_GPIOOE_SHIFT))
 
 
-/*
- * Rev2 silicon allows suppressing check for ArmLaunch errors.
- * this can speed up short packet sends on systems that do
- * not guaranteee write-order.
- */
-#define INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR (1ULL<<63)
-
 /* 6120 specific hardware errors... */
 /* 6120 specific hardware errors... */
 static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
 static const struct ipath_hwerror_msgs ipath_6120_hwerror_msgs[] = {
 	INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
 	INFINIPATH_HWE_MSG(PCIEPOISONEDTLP, "PCIe Poisoned TLP"),
@@ -347,6 +340,7 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 	u32 bits, ctrl;
 	u32 bits, ctrl;
 	int isfatal = 0;
 	int isfatal = 0;
 	char bitsmsg[64];
 	char bitsmsg[64];
+	int log_idx;
 
 
 	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
 	hwerrs = ipath_read_kreg64(dd, dd->ipath_kregs->kr_hwerrstatus);
 	if (!hwerrs) {
 	if (!hwerrs) {
@@ -374,6 +368,11 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 
 
 	hwerrs &= dd->ipath_hwerrmask;
 	hwerrs &= dd->ipath_hwerrmask;
 
 
+	/* We log some errors to EEPROM, check if we have any of those. */
+	for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx)
+		if (hwerrs & dd->ipath_eep_st_masks[log_idx].hwerrs_to_log)
+			ipath_inc_eeprom_err(dd, log_idx, 1);
+
 	/*
 	/*
 	 * make sure we get this much out, unless told to be quiet,
 	 * make sure we get this much out, unless told to be quiet,
 	 * or it's occurred within the last 5 seconds
 	 * or it's occurred within the last 5 seconds
@@ -431,10 +430,12 @@ static void ipath_pe_handle_hwerrors(struct ipath_devdata *dd, char *msg,
 			*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
 			*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
 			dd->ipath_flags &= ~IPATH_INITTED;
 			dd->ipath_flags &= ~IPATH_INITTED;
 		} else {
 		} else {
-			ipath_dbg("Clearing freezemode on ignored hardware "
-				  "error\n");
-			ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
-			   		 dd->ipath_control);
+			static u32 freeze_cnt;
+
+			freeze_cnt++;
+			ipath_dbg("Clearing freezemode on ignored or recovered "
+				  "hardware error (%u)\n", freeze_cnt);
+			ipath_clear_freeze(dd);
 		}
 		}
 	}
 	}
 
 
@@ -680,17 +681,6 @@ static int ipath_pe_bringup_serdes(struct ipath_devdata *dd)
 		val |= dd->ipath_rx_pol_inv <<
 		val |= dd->ipath_rx_pol_inv <<
 			INFINIPATH_XGXS_RX_POL_SHIFT;
 			INFINIPATH_XGXS_RX_POL_SHIFT;
 	}
 	}
-	if (dd->ipath_minrev >= 2) {
-		/* Rev 2. can tolerate multiple writes to PBC, and
-		 * allowing them can provide lower latency on some
-		 * CPUs, but this feature is off by default, only
-		 * turned on by setting D63 of XGXSconfig reg.
-		 * May want to make this conditional more
-		 * fine-grained in future. This is not exactly
-		 * related to XGXS, but where the bit ended up.
-		 */
-		val |= INFINIPATH_XGXS_SUPPRESS_ARMLAUNCH_ERR;
-	}
 	if (val != prev_val)
 	if (val != prev_val)
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 		ipath_write_kreg(dd, dd->ipath_kregs->kr_xgxsconfig, val);
 
 
@@ -791,12 +781,24 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
 				     u64 ltst)
 				     u64 ltst)
 {
 {
 	u64 extctl;
 	u64 extctl;
+	unsigned long flags = 0;
 
 
 	/* the diags use the LED to indicate diag info, so we leave
 	/* the diags use the LED to indicate diag info, so we leave
 	 * the external LED alone when the diags are running */
 	 * the external LED alone when the diags are running */
 	if (ipath_diag_inuse)
 	if (ipath_diag_inuse)
 		return;
 		return;
 
 
+	/* Allow override of LED display for, e.g. Locating system in rack */
+	if (dd->ipath_led_override) {
+		ltst = (dd->ipath_led_override & IPATH_LED_PHYS)
+			? INFINIPATH_IBCS_LT_STATE_LINKUP
+			: INFINIPATH_IBCS_LT_STATE_DISABLED;
+		lst = (dd->ipath_led_override & IPATH_LED_LOG)
+			? INFINIPATH_IBCS_L_STATE_ACTIVE
+			: INFINIPATH_IBCS_L_STATE_DOWN;
+	}
+
+	spin_lock_irqsave(&dd->ipath_gpio_lock, flags);
 	extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
 	extctl = dd->ipath_extctrl & ~(INFINIPATH_EXTC_LED1PRIPORT_ON |
 				       INFINIPATH_EXTC_LED2PRIPORT_ON);
 				       INFINIPATH_EXTC_LED2PRIPORT_ON);
 
 
@@ -806,6 +808,7 @@ static void ipath_setup_pe_setextled(struct ipath_devdata *dd, u64 lst,
 		extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
 		extctl |= INFINIPATH_EXTC_LED1PRIPORT_ON;
 	dd->ipath_extctrl = extctl;
 	dd->ipath_extctrl = extctl;
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
 	ipath_write_kreg(dd, dd->ipath_kregs->kr_extctrl, extctl);
+	spin_unlock_irqrestore(&dd->ipath_gpio_lock, flags);
 }
 }
 
 
 /**
 /**
@@ -955,6 +958,27 @@ static void ipath_init_pe_variables(struct ipath_devdata *dd)
 
 
 	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
 	dd->ipath_i_rcvavail_mask = INFINIPATH_I_RCVAVAIL_MASK;
 	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
 	dd->ipath_i_rcvurg_mask = INFINIPATH_I_RCVURG_MASK;
+
+	/*
+	 * EEPROM error log 0 is TXE Parity errors. 1 is RXE Parity.
+	 * 2 is Some Misc, 3 is reserved for future.
+	 */
+	dd->ipath_eep_st_masks[0].hwerrs_to_log =
+		INFINIPATH_HWE_TXEMEMPARITYERR_MASK <<
+		INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT;
+
+	/* Ignore errors in PIO/PBC on systems with unordered write-combining */
+	if (ipath_unordered_wc())
+		dd->ipath_eep_st_masks[0].hwerrs_to_log &= ~TXE_PIO_PARITY;
+
+	dd->ipath_eep_st_masks[1].hwerrs_to_log =
+		INFINIPATH_HWE_RXEMEMPARITYERR_MASK <<
+		INFINIPATH_HWE_RXEMEMPARITYERR_SHIFT;
+
+	dd->ipath_eep_st_masks[2].errs_to_log =
+		INFINIPATH_E_INVALIDADDR | INFINIPATH_E_RESET;
+
+
 }
 }
 
 
 /* setup the MSI stuff again after a reset.  I'd like to just call
 /* setup the MSI stuff again after a reset.  I'd like to just call
@@ -1082,7 +1106,7 @@ bail:
  * ipath_pe_put_tid - write a TID in chip
  * ipath_pe_put_tid - write a TID in chip
  * @dd: the infinipath device
  * @dd: the infinipath device
  * @tidptr: pointer to the expected TID (in chip) to udpate
  * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
  * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
  * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
  *
  *
  * This exists as a separate routine to allow for special locking etc.
  * This exists as a separate routine to allow for special locking etc.
@@ -1108,7 +1132,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
 				      "BUG: Physical page address 0x%lx "
 				      "BUG: Physical page address 0x%lx "
 				      "has bits set in 31-29\n", pa);
 				      "has bits set in 31-29\n", pa);
 
 
-		if (type == 0)
+		if (type == RCVHQ_RCV_TYPE_EAGER)
 			pa |= dd->ipath_tidtemplate;
 			pa |= dd->ipath_tidtemplate;
 		else /* for now, always full 4KB page */
 		else /* for now, always full 4KB page */
 			pa |= 2 << 29;
 			pa |= 2 << 29;
@@ -1132,7 +1156,7 @@ static void ipath_pe_put_tid(struct ipath_devdata *dd, u64 __iomem *tidptr,
  * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
  * ipath_pe_put_tid_2 - write a TID in chip, Revision 2 or higher
  * @dd: the infinipath device
  * @dd: the infinipath device
  * @tidptr: pointer to the expected TID (in chip) to udpate
  * @tidptr: pointer to the expected TID (in chip) to udpate
- * @tidtype: 0 for eager, 1 for expected
+ * @tidtype: RCVHQ_RCV_TYPE_EAGER (1) for eager, RCVHQ_RCV_TYPE_EXPECTED (0) for expected
  * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
  * @pa: physical address of in memory buffer; ipath_tidinvalid if freeing
  *
  *
  * This exists as a separate routine to allow for selection of the
  * This exists as a separate routine to allow for selection of the
@@ -1157,7 +1181,7 @@ static void ipath_pe_put_tid_2(struct ipath_devdata *dd, u64 __iomem *tidptr,
 				      "BUG: Physical page address 0x%lx "
 				      "BUG: Physical page address 0x%lx "
 				      "has bits set in 31-29\n", pa);
 				      "has bits set in 31-29\n", pa);
 
 
-		if (type == 0)
+		if (type == RCVHQ_RCV_TYPE_EAGER)
 			pa |= dd->ipath_tidtemplate;
 			pa |= dd->ipath_tidtemplate;
 		else /* for now, always full 4KB page */
 		else /* for now, always full 4KB page */
 			pa |= 2 << 29;
 			pa |= 2 << 29;
@@ -1196,7 +1220,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
 		 port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
 		 port * dd->ipath_rcvtidcnt * sizeof(*tidbase));
 
 
 	for (i = 0; i < dd->ipath_rcvtidcnt; i++)
 	for (i = 0; i < dd->ipath_rcvtidcnt; i++)
-		ipath_pe_put_tid(dd, &tidbase[i], 0, tidinv);
+		ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EXPECTED,
+				 tidinv);
 
 
 	tidbase = (u64 __iomem *)
 	tidbase = (u64 __iomem *)
 		((char __iomem *)(dd->ipath_kregbase) +
 		((char __iomem *)(dd->ipath_kregbase) +
@@ -1204,7 +1229,8 @@ static void ipath_pe_clear_tids(struct ipath_devdata *dd, unsigned port)
 		 port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
 		 port * dd->ipath_rcvegrcnt * sizeof(*tidbase));
 
 
 	for (i = 0; i < dd->ipath_rcvegrcnt; i++)
 	for (i = 0; i < dd->ipath_rcvegrcnt; i++)
-		ipath_pe_put_tid(dd, &tidbase[i], 1, tidinv);
+		ipath_pe_put_tid(dd, &tidbase[i], RCVHQ_RCV_TYPE_EAGER,
+				 tidinv);
 }
 }
 
 
 /**
 /**
@@ -1311,13 +1337,6 @@ static int ipath_pe_get_base_info(struct ipath_portdata *pd, void *kbase)
 
 
 	dd = pd->port_dd;
 	dd = pd->port_dd;
 
 
-	if (dd != NULL && dd->ipath_minrev >= 2) {
-		ipath_cdbg(PROC, "IBA6120 Rev2, allow multiple PBC write\n");
-		kinfo->spi_runtime_flags |= IPATH_RUNTIME_PBC_REWRITE;
-		ipath_cdbg(PROC, "IBA6120 Rev2, allow loose DMA alignment\n");
-		kinfo->spi_runtime_flags |= IPATH_RUNTIME_LOOSE_DMA_ALIGN;
-	}
-
 done:
 done:
 	kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
 	kinfo->spi_runtime_flags |= IPATH_RUNTIME_PCIE;
 	return 0;
 	return 0;
@@ -1354,7 +1373,6 @@ static int ipath_pe_txe_recover(struct ipath_devdata *dd)
 		dev_info(&dd->pcidev->dev,
 		dev_info(&dd->pcidev->dev,
 			"Recovering from TXE PIO parity error\n");
 			"Recovering from TXE PIO parity error\n");
 	}
 	}
-	ipath_disarm_senderrbufs(dd, 1);
 	return 1;
 	return 1;
 }
 }
 
 

+ 21 - 5
drivers/infiniband/hw/ipath/ipath_init_chip.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -133,7 +133,8 @@ static int create_port0_egr(struct ipath_devdata *dd)
 				   dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
 				   dd->ipath_ibmaxlen, PCI_DMA_FROMDEVICE);
 		dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
 		dd->ipath_f_put_tid(dd, e + (u64 __iomem *)
 				    ((char __iomem *) dd->ipath_kregbase +
 				    ((char __iomem *) dd->ipath_kregbase +
-				     dd->ipath_rcvegrbase), 0,
+				     dd->ipath_rcvegrbase),
+				    RCVHQ_RCV_TYPE_EAGER,
 				    dd->ipath_port0_skbinfo[e].phys);
 				    dd->ipath_port0_skbinfo[e].phys);
 	}
 	}
 
 
@@ -310,7 +311,12 @@ static int init_chip_first(struct ipath_devdata *dd,
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiosize);
 	dd->ipath_piosize2k = val & ~0U;
 	dd->ipath_piosize2k = val & ~0U;
 	dd->ipath_piosize4k = val >> 32;
 	dd->ipath_piosize4k = val >> 32;
-	dd->ipath_ibmtu = 4096;	/* default to largest legal MTU */
+	/*
+	 * Note: the chips support a maximum MTU of 4096, but the driver
+	 * hasn't implemented this feature yet, so set the initial value
+	 * to 2048.
+	 */
+	dd->ipath_ibmtu = 2048;
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
 	val = ipath_read_kreg64(dd, dd->ipath_kregs->kr_sendpiobufcnt);
 	dd->ipath_piobcnt2k = val & ~0U;
 	dd->ipath_piobcnt2k = val & ~0U;
 	dd->ipath_piobcnt4k = val >> 32;
 	dd->ipath_piobcnt4k = val >> 32;
@@ -340,6 +346,10 @@ static int init_chip_first(struct ipath_devdata *dd,
 
 
 	spin_lock_init(&dd->ipath_tid_lock);
 	spin_lock_init(&dd->ipath_tid_lock);
 
 
+	spin_lock_init(&dd->ipath_gpio_lock);
+	spin_lock_init(&dd->ipath_eep_st_lock);
+	sema_init(&dd->ipath_eep_sem, 1);
+
 done:
 done:
 	*pdp = pd;
 	*pdp = pd;
 	return ret;
 	return ret;
@@ -646,7 +656,7 @@ static int init_housekeeping(struct ipath_devdata *dd,
 	ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
 	ret = dd->ipath_f_get_boardname(dd, boardn, sizeof boardn);
 
 
 	snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
 	snprintf(dd->ipath_boardversion, sizeof(dd->ipath_boardversion),
-		 "Driver %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
+		 "ChipABI %u.%u, %s, InfiniPath%u %u.%u, PCI %u, "
 		 "SW Compat %u\n",
 		 "SW Compat %u\n",
 		 IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
 		 IPATH_CHIP_VERS_MAJ, IPATH_CHIP_VERS_MIN, boardn,
 		 (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
 		 (unsigned)(dd->ipath_revision >> INFINIPATH_R_ARCH_SHIFT) &
@@ -727,7 +737,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 	uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
 	uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
 	if (ipath_kpiobufs == 0) {
 	if (ipath_kpiobufs == 0) {
 		/* not set by user (this is default) */
 		/* not set by user (this is default) */
-		if (piobufs >= (uports * IPATH_MIN_USER_PORT_BUFCNT) + 32)
+		if (piobufs > 144)
 			kpiobufs = 32;
 			kpiobufs = 32;
 		else
 		else
 			kpiobufs = 16;
 			kpiobufs = 16;
@@ -767,6 +777,12 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
 		   piobufs, dd->ipath_pbufsport, uports);
 		   piobufs, dd->ipath_pbufsport, uports);
 
 
 	dd->ipath_f_early_init(dd);
 	dd->ipath_f_early_init(dd);
+	/*
+	 * cancel any possible active sends from early driver load.
+	 * Follows early_init because some chips have to initialize
+	 * PIO buffers in early_init to avoid false parity errors.
+	 */
+	ipath_cancel_sends(dd);
 
 
 	/* early_init sets rcvhdrentsize and rcvhdrsize, so this must be
 	/* early_init sets rcvhdrentsize and rcvhdrsize, so this must be
 	 * done after early_init */
 	 * done after early_init */

+ 124 - 17
drivers/infiniband/hw/ipath/ipath_intr.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -93,7 +93,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
 
 
 	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
 	if (sbuf[0] || sbuf[1] || (piobcnt > 128 && (sbuf[2] || sbuf[3]))) {
 		int i;
 		int i;
-		if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG)) {
+		if (ipath_debug & (__IPATH_PKTDBG|__IPATH_DBG) &&
+			dd->ipath_lastcancel > jiffies) {
 			__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
 			__IPATH_DBG_WHICH(__IPATH_PKTDBG|__IPATH_DBG,
 					  "SendbufErrs %lx %lx", sbuf[0],
 					  "SendbufErrs %lx %lx", sbuf[0],
 					  sbuf[1]);
 					  sbuf[1]);
@@ -108,7 +109,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
 					ipath_clrpiobuf(dd, i);
 					ipath_clrpiobuf(dd, i);
 				ipath_disarm_piobufs(dd, i, 1);
 				ipath_disarm_piobufs(dd, i, 1);
 			}
 			}
-		dd->ipath_lastcancel = jiffies+3; /* no armlaunch for a bit */
+		/* ignore armlaunch errs for a bit */
+		dd->ipath_lastcancel = jiffies+3;
 	}
 	}
 }
 }
 
 
@@ -130,6 +132,17 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
 	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
 	 INFINIPATH_E_SMINPKTLEN | INFINIPATH_E_SPKTLEN | \
 	 INFINIPATH_E_INVALIDADDR)
 	 INFINIPATH_E_INVALIDADDR)
 
 
+/*
+ * this is similar to E_SUM_ERRS, but can't ignore armlaunch, don't ignore
+ * errors not related to freeze and cancelling buffers.  Can't ignore
+ * armlaunch because could get more while still cleaning up, and need
+ * to cancel those as they happen.
+ */
+#define E_SPKT_ERRS_IGNORE \
+	 (INFINIPATH_E_SDROPPEDDATAPKT | INFINIPATH_E_SDROPPEDSMPPKT | \
+	 INFINIPATH_E_SMAXPKTLEN | INFINIPATH_E_SMINPKTLEN | \
+	 INFINIPATH_E_SPKTLEN)
+
 /*
 /*
  * these are errors that can occur when the link changes state while
  * these are errors that can occur when the link changes state while
  * a packet is being sent or received.  This doesn't cover things
  * a packet is being sent or received.  This doesn't cover things
@@ -290,12 +303,7 @@ static void handle_e_ibstatuschanged(struct ipath_devdata *dd,
 		 * Flush all queued sends when link went to DOWN or INIT,
 		 * Flush all queued sends when link went to DOWN or INIT,
 		 * to be sure that they don't block SMA and other MAD packets
 		 * to be sure that they don't block SMA and other MAD packets
 		 */
 		 */
-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
-				 INFINIPATH_S_ABORT);
-		ipath_disarm_piobufs(dd, dd->ipath_lastport_piobuf,
-							(unsigned)(dd->ipath_piobcnt2k +
-					dd->ipath_piobcnt4k) -
-					dd->ipath_lastport_piobuf);
+		ipath_cancel_sends(dd);
 	}
 	}
 	else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
 	else if (lstate == IPATH_IBSTATE_INIT || lstate == IPATH_IBSTATE_ARM ||
 	    lstate == IPATH_IBSTATE_ACTIVE) {
 	    lstate == IPATH_IBSTATE_ACTIVE) {
@@ -505,6 +513,7 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 	int i, iserr = 0;
 	int i, iserr = 0;
 	int chkerrpkts = 0, noprint = 0;
 	int chkerrpkts = 0, noprint = 0;
 	unsigned supp_msgs;
 	unsigned supp_msgs;
+	int log_idx;
 
 
 	supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
 	supp_msgs = handle_frequent_errors(dd, errs, msg, &noprint);
 
 
@@ -518,6 +527,13 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 	if (errs & INFINIPATH_E_HARDWARE) {
 	if (errs & INFINIPATH_E_HARDWARE) {
 		/* reuse same msg buf */
 		/* reuse same msg buf */
 		dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
 		dd->ipath_f_handle_hwerrors(dd, msg, sizeof msg);
+	} else {
+		u64 mask;
+		for (log_idx = 0; log_idx < IPATH_EEP_LOG_CNT; ++log_idx) {
+			mask = dd->ipath_eep_st_masks[log_idx].errs_to_log;
+			if (errs & mask)
+				ipath_inc_eeprom_err(dd, log_idx, 1);
+		}
 	}
 	}
 
 
 	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
 	if (!noprint && (errs & ~dd->ipath_e_bitsextant))
@@ -675,6 +691,17 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 					chkerrpkts = 1;
 					chkerrpkts = 1;
 				dd->ipath_lastrcvhdrqtails[i] = tl;
 				dd->ipath_lastrcvhdrqtails[i] = tl;
 				pd->port_hdrqfull++;
 				pd->port_hdrqfull++;
+				if (test_bit(IPATH_PORT_WAITING_OVERFLOW,
+					     &pd->port_flag)) {
+					clear_bit(
+					  IPATH_PORT_WAITING_OVERFLOW,
+					  &pd->port_flag);
+					set_bit(
+					  IPATH_PORT_WAITING_OVERFLOW,
+					  &pd->int_flag);
+					wake_up_interruptible(
+					  &pd->port_wait);
+				}
 			}
 			}
 		}
 		}
 	}
 	}
@@ -744,6 +771,72 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
 	return chkerrpkts;
 	return chkerrpkts;
 }
 }
 
 
+
+/*
+ * try to cleanup as much as possible for anything that might have gone
+ * wrong while in freeze mode, such as pio buffers being written by user
+ * processes (causing armlaunch), send errors due to going into freeze mode,
+ * etc., and try to avoid causing extra interrupts while doing so.
+ * Forcibly update the in-memory pioavail register copies after cleanup
+ * because the chip won't do it for anything changing while in freeze mode
+ * (we don't want to wait for the next pio buffer state change).
+ * Make sure that we don't lose any important interrupts by using the chip
+ * feature that says that writing 0 to a bit in *clear that is set in
+ * *status will cause an interrupt to be generated again (if allowed by
+ * the *mask value).
+ */
+void ipath_clear_freeze(struct ipath_devdata *dd)
+{
+	int i, im;
+	__le64 val;
+
+	/* disable error interrupts, to avoid confusion */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
+
+	/*
+	 * clear all sends, because they have may been
+	 * completed by usercode while in freeze mode, and
+	 * therefore would not be sent, and eventually
+	 * might cause the process to run out of bufs
+	 */
+	ipath_cancel_sends(dd);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+			 dd->ipath_control);
+
+	/* ensure pio avail updates continue */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+		 dd->ipath_sendctrl & ~IPATH_S_PIOBUFAVAILUPD);
+	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+		 dd->ipath_sendctrl);
+
+	/*
+	 * We just enabled pioavailupdate, so dma copy is almost certainly
+	 * not yet right, so read the registers directly.  Similar to init
+	 */
+	for (i = 0; i < dd->ipath_pioavregs; i++) {
+		/* deal with 6110 chip bug */
+		im = i > 3 ? ((i&1) ? i-1 : i+1) : i;
+		val = ipath_read_kreg64(dd, 0x1000+(im*sizeof(u64)));
+		dd->ipath_pioavailregs_dma[i] = dd->ipath_pioavailshadow[i]
+			= le64_to_cpu(val);
+	}
+
+	/*
+	 * force new interrupt if any hwerr, error or interrupt bits are
+	 * still set, and clear "safe" send packet errors related to freeze
+	 * and cancelling sends.  Re-enable error interrupts before possible
+	 * force of re-interrupt on pending interrupts.
+	 */
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_hwerrclear, 0ULL);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_errorclear,
+		E_SPKT_ERRS_IGNORE);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask,
+		~dd->ipath_maskederrs);
+	ipath_write_kreg(dd, dd->ipath_kregs->kr_intclear, 0ULL);
+}
+
+
 /* this is separate to allow for better optimization of ipath_intr() */
 /* this is separate to allow for better optimization of ipath_intr() */
 
 
 static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
 static void ipath_bad_intr(struct ipath_devdata *dd, u32 * unexpectp)
@@ -872,14 +965,25 @@ static void handle_urcv(struct ipath_devdata *dd, u32 istat)
 		   dd->ipath_i_rcvurg_mask);
 		   dd->ipath_i_rcvurg_mask);
 	for (i = 1; i < dd->ipath_cfgports; i++) {
 	for (i = 1; i < dd->ipath_cfgports; i++) {
 		struct ipath_portdata *pd = dd->ipath_pd[i];
 		struct ipath_portdata *pd = dd->ipath_pd[i];
-		if (portr & (1 << i) && pd && pd->port_cnt &&
-			test_bit(IPATH_PORT_WAITING_RCV, &pd->port_flag)) {
-			clear_bit(IPATH_PORT_WAITING_RCV,
-				  &pd->port_flag);
-			clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
-				  &dd->ipath_rcvctrl);
-			wake_up_interruptible(&pd->port_wait);
-			rcvdint = 1;
+		if (portr & (1 << i) && pd && pd->port_cnt) {
+			if (test_bit(IPATH_PORT_WAITING_RCV,
+				     &pd->port_flag)) {
+				clear_bit(IPATH_PORT_WAITING_RCV,
+					  &pd->port_flag);
+				set_bit(IPATH_PORT_WAITING_RCV,
+					&pd->int_flag);
+				clear_bit(i + INFINIPATH_R_INTRAVAIL_SHIFT,
+					  &dd->ipath_rcvctrl);
+				wake_up_interruptible(&pd->port_wait);
+				rcvdint = 1;
+			} else if (test_bit(IPATH_PORT_WAITING_URG,
+					    &pd->port_flag)) {
+				clear_bit(IPATH_PORT_WAITING_URG,
+					  &pd->port_flag);
+				set_bit(IPATH_PORT_WAITING_URG,
+					&pd->int_flag);
+				wake_up_interruptible(&pd->port_wait);
+			}
 		}
 		}
 	}
 	}
 	if (rcvdint) {
 	if (rcvdint) {
@@ -905,6 +1009,9 @@ irqreturn_t ipath_intr(int irq, void *data)
 
 
 	ipath_stats.sps_ints++;
 	ipath_stats.sps_ints++;
 
 
+	if (dd->ipath_int_counter != (u32) -1)
+		dd->ipath_int_counter++;
+
 	if (!(dd->ipath_flags & IPATH_PRESENT)) {
 	if (!(dd->ipath_flags & IPATH_PRESENT)) {
 		/*
 		/*
 		 * This return value is not great, but we do not want the
 		 * This return value is not great, but we do not want the

+ 78 - 7
drivers/infiniband/hw/ipath/ipath_kernel.h

@@ -1,7 +1,7 @@
 #ifndef _IPATH_KERNEL_H
 #ifndef _IPATH_KERNEL_H
 #define _IPATH_KERNEL_H
 #define _IPATH_KERNEL_H
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -57,6 +57,24 @@
 extern struct infinipath_stats ipath_stats;
 extern struct infinipath_stats ipath_stats;
 
 
 #define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
 #define IPATH_CHIP_SWVERSION IPATH_CHIP_VERS_MAJ
+/*
+ * First-cut critierion for "device is active" is
+ * two thousand dwords combined Tx, Rx traffic per
+ * 5-second interval. SMA packets are 64 dwords,
+ * and occur "a few per second", presumably each way.
+ */
+#define IPATH_TRAFFIC_ACTIVE_THRESHOLD (2000)
+/*
+ * Struct used to indicate which errors are logged in each of the
+ * error-counters that are logged to EEPROM. A counter is incremented
+ * _once_ (saturating at 255) for each event with any bits set in
+ * the error or hwerror register masks below.
+ */
+#define IPATH_EEP_LOG_CNT (4)
+struct ipath_eep_log_mask {
+	u64 errs_to_log;
+	u64 hwerrs_to_log;
+};
 
 
 struct ipath_portdata {
 struct ipath_portdata {
 	void **port_rcvegrbuf;
 	void **port_rcvegrbuf;
@@ -109,6 +127,8 @@ struct ipath_portdata {
 	u32 port_tidcursor;
 	u32 port_tidcursor;
 	/* next expected TID to check */
 	/* next expected TID to check */
 	unsigned long port_flag;
 	unsigned long port_flag;
+	/* what happened */
+	unsigned long int_flag;
 	/* WAIT_RCV that timed out, no interrupt */
 	/* WAIT_RCV that timed out, no interrupt */
 	u32 port_rcvwait_to;
 	u32 port_rcvwait_to;
 	/* WAIT_PIO that timed out, no interrupt */
 	/* WAIT_PIO that timed out, no interrupt */
@@ -137,6 +157,8 @@ struct ipath_portdata {
 	u32 userversion;
 	u32 userversion;
 	/* Bitmask of active slaves */
 	/* Bitmask of active slaves */
 	u32 active_slaves;
 	u32 active_slaves;
+	/* Type of packets or conditions we want to poll for */
+	u16 poll_type;
 };
 };
 
 
 struct sk_buff;
 struct sk_buff;
@@ -275,6 +297,8 @@ struct ipath_devdata {
 	u32 ipath_lastport_piobuf;
 	u32 ipath_lastport_piobuf;
 	/* is a stats timer active */
 	/* is a stats timer active */
 	u32 ipath_stats_timer_active;
 	u32 ipath_stats_timer_active;
+	/* number of interrupts for this device -- saturates... */
+	u32 ipath_int_counter;
 	/* dwords sent read from counter */
 	/* dwords sent read from counter */
 	u32 ipath_lastsword;
 	u32 ipath_lastsword;
 	/* dwords received read from counter */
 	/* dwords received read from counter */
@@ -369,9 +393,6 @@ struct ipath_devdata {
 	struct class_device *diag_class_dev;
 	struct class_device *diag_class_dev;
 	/* timer used to prevent stats overflow, error throttling, etc. */
 	/* timer used to prevent stats overflow, error throttling, etc. */
 	struct timer_list ipath_stats_timer;
 	struct timer_list ipath_stats_timer;
-	/* check for stale messages in rcv queue */
-	/* only allow one intr at a time. */
-	unsigned long ipath_rcv_pending;
 	void *ipath_dummy_hdrq;	/* used after port close */
 	void *ipath_dummy_hdrq;	/* used after port close */
 	dma_addr_t ipath_dummy_hdrq_phys;
 	dma_addr_t ipath_dummy_hdrq_phys;
 
 
@@ -399,6 +420,8 @@ struct ipath_devdata {
 	u64 ipath_gpio_out;
 	u64 ipath_gpio_out;
 	/* shadow the gpio mask register */
 	/* shadow the gpio mask register */
 	u64 ipath_gpio_mask;
 	u64 ipath_gpio_mask;
+	/* shadow the gpio output enable, etc... */
+	u64 ipath_extctrl;
 	/* kr_revision shadow */
 	/* kr_revision shadow */
 	u64 ipath_revision;
 	u64 ipath_revision;
 	/*
 	/*
@@ -473,8 +496,6 @@ struct ipath_devdata {
 	u32 ipath_cregbase;
 	u32 ipath_cregbase;
 	/* shadow the control register contents */
 	/* shadow the control register contents */
 	u32 ipath_control;
 	u32 ipath_control;
-	/* shadow the gpio output contents */
-	u32 ipath_extctrl;
 	/* PCI revision register (HTC rev on FPGA) */
 	/* PCI revision register (HTC rev on FPGA) */
 	u32 ipath_pcirev;
 	u32 ipath_pcirev;
 
 
@@ -552,6 +573,9 @@ struct ipath_devdata {
 	u32 ipath_overrun_thresh_errs;
 	u32 ipath_overrun_thresh_errs;
 	u32 ipath_lli_errs;
 	u32 ipath_lli_errs;
 
 
+	/* status check work */
+	struct delayed_work status_work;
+
 	/*
 	/*
 	 * Not all devices managed by a driver instance are the same
 	 * Not all devices managed by a driver instance are the same
 	 * type, so these fields must be per-device.
 	 * type, so these fields must be per-device.
@@ -575,6 +599,37 @@ struct ipath_devdata {
 	u16 ipath_gpio_scl_num;
 	u16 ipath_gpio_scl_num;
 	u64 ipath_gpio_sda;
 	u64 ipath_gpio_sda;
 	u64 ipath_gpio_scl;
 	u64 ipath_gpio_scl;
+
+	/* lock for doing RMW of shadows/regs for ExtCtrl and GPIO */
+	spinlock_t ipath_gpio_lock;
+
+	/* used to override LED behavior */
+	u8 ipath_led_override;  /* Substituted for normal value, if non-zero */
+	u16 ipath_led_override_timeoff; /* delta to next timer event */
+	u8 ipath_led_override_vals[2]; /* Alternates per blink-frame */
+	u8 ipath_led_override_phase; /* Just counts, LSB picks from vals[] */
+	atomic_t ipath_led_override_timer_active;
+	/* Used to flash LEDs in override mode */
+	struct timer_list ipath_led_override_timer;
+
+	/* Support (including locks) for EEPROM logging of errors and time */
+	/* control access to actual counters, timer */
+	spinlock_t ipath_eep_st_lock;
+	/* control high-level access to EEPROM */
+	struct semaphore ipath_eep_sem;
+	/* Below inc'd by ipath_snap_cntrs(), locked by ipath_eep_st_lock */
+	uint64_t ipath_traffic_wds;
+	/* active time is kept in seconds, but logged in hours */
+	atomic_t ipath_active_time;
+	/* Below are nominal shadow of EEPROM, new since last EEPROM update */
+	uint8_t ipath_eep_st_errs[IPATH_EEP_LOG_CNT];
+	uint8_t ipath_eep_st_new_errs[IPATH_EEP_LOG_CNT];
+	uint16_t ipath_eep_hrs;
+	/*
+	 * masks for which bits of errs, hwerrs that cause
+	 * each of the counters to increment.
+	 */
+	struct ipath_eep_log_mask ipath_eep_st_masks[IPATH_EEP_LOG_CNT];
 };
 };
 
 
 /* Private data for file operations */
 /* Private data for file operations */
@@ -592,6 +647,7 @@ int ipath_enable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
 void ipath_disable_wc(struct ipath_devdata *dd);
 int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
 int ipath_count_units(int *npresentp, int *nupp, u32 *maxportsp);
 void ipath_shutdown_device(struct ipath_devdata *);
 void ipath_shutdown_device(struct ipath_devdata *);
+void ipath_clear_freeze(struct ipath_devdata *);
 
 
 struct file_operations;
 struct file_operations;
 int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
 int ipath_cdev_init(int minor, char *name, const struct file_operations *fops,
@@ -627,6 +683,7 @@ int ipath_unordered_wc(void);
 
 
 void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
 void ipath_disarm_piobufs(struct ipath_devdata *, unsigned first,
 			  unsigned cnt);
 			  unsigned cnt);
+void ipath_cancel_sends(struct ipath_devdata *);
 
 
 int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
 int ipath_create_rcvhdrq(struct ipath_devdata *, struct ipath_portdata *);
 void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
 void ipath_free_pddata(struct ipath_devdata *, struct ipath_portdata *);
@@ -685,7 +742,6 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 		 * are 64bit */
 		 * are 64bit */
 #define IPATH_32BITCOUNTERS 0x20000
 #define IPATH_32BITCOUNTERS 0x20000
 		/* can miss port0 rx interrupts */
 		/* can miss port0 rx interrupts */
-#define IPATH_POLL_RX_INTR  0x40000
 #define IPATH_DISABLED      0x80000 /* administratively disabled */
 #define IPATH_DISABLED      0x80000 /* administratively disabled */
 		/* Use GPIO interrupts for new counters */
 		/* Use GPIO interrupts for new counters */
 #define IPATH_GPIO_ERRINTRS 0x100000
 #define IPATH_GPIO_ERRINTRS 0x100000
@@ -704,6 +760,10 @@ int ipath_set_rx_pol_inv(struct ipath_devdata *dd, u8 new_pol_inv);
 #define IPATH_PORT_WAITING_PIO   3
 #define IPATH_PORT_WAITING_PIO   3
 		/* master has not finished initializing */
 		/* master has not finished initializing */
 #define IPATH_PORT_MASTER_UNINIT 4
 #define IPATH_PORT_MASTER_UNINIT 4
+		/* waiting for an urgent packet to arrive */
+#define IPATH_PORT_WAITING_URG 5
+		/* waiting for a header overflow */
+#define IPATH_PORT_WAITING_OVERFLOW 6
 
 
 /* free up any allocated data at closes */
 /* free up any allocated data at closes */
 void ipath_free_data(struct ipath_portdata *dd);
 void ipath_free_data(struct ipath_portdata *dd);
@@ -713,9 +773,20 @@ u32 __iomem *ipath_getpiobuf(struct ipath_devdata *, u32 *);
 void ipath_init_iba6120_funcs(struct ipath_devdata *);
 void ipath_init_iba6120_funcs(struct ipath_devdata *);
 void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_init_iba6110_funcs(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
 void ipath_get_eeprom_info(struct ipath_devdata *);
+int ipath_update_eeprom_log(struct ipath_devdata *dd);
+void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
 u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
 void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
 void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
 
 
+/*
+ * Set LED override, only the two LSBs have "public" meaning, but
+ * any non-zero value substitutes them for the Link and LinkTrain
+ * LED states.
+ */
+#define IPATH_LED_PHYS 1 /* Physical (linktraining) GREEN LED */
+#define IPATH_LED_LOG 2  /* Logical (link) YELLOW LED */
+void ipath_set_led_override(struct ipath_devdata *dd, unsigned int val);
+
 /*
 /*
  * number of words used for protocol header if not set by ipath_userinit();
  * number of words used for protocol header if not set by ipath_userinit();
  */
  */

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_keys.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_layer.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_layer.h

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two

+ 8 - 3
drivers/infiniband/hw/ipath/ipath_mad.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -103,7 +103,7 @@ static int recv_subn_get_nodeinfo(struct ib_smp *smp,
 	/* This is already in network order */
 	/* This is already in network order */
 	nip->sys_guid = to_idev(ibdev)->sys_image_guid;
 	nip->sys_guid = to_idev(ibdev)->sys_image_guid;
 	nip->node_guid = dd->ipath_guid;
 	nip->node_guid = dd->ipath_guid;
-	nip->port_guid = nip->sys_guid;
+	nip->port_guid = dd->ipath_guid;
 	nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
 	nip->partition_cap = cpu_to_be16(ipath_get_npkeys(dd));
 	nip->device_id = cpu_to_be16(dd->ipath_deviceid);
 	nip->device_id = cpu_to_be16(dd->ipath_deviceid);
 	majrev = dd->ipath_majrev;
 	majrev = dd->ipath_majrev;
@@ -292,7 +292,12 @@ static int recv_subn_get_portinfo(struct ib_smp *smp,
 	/* pip->vl_arb_high_cap; // only one VL */
 	/* pip->vl_arb_high_cap; // only one VL */
 	/* pip->vl_arb_low_cap; // only one VL */
 	/* pip->vl_arb_low_cap; // only one VL */
 	/* InitTypeReply = 0 */
 	/* InitTypeReply = 0 */
-	pip->inittypereply_mtucap = IB_MTU_4096;
+	/*
+	 * Note: the chips support a maximum MTU of 4096, but the driver
+	 * hasn't implemented this feature yet, so set the maximum value
+	 * to 2048.
+	 */
+	pip->inittypereply_mtucap = IB_MTU_2048;
 	// HCAs ignore VLStallCount and HOQLife
 	// HCAs ignore VLStallCount and HOQLife
 	/* pip->vlstallcnt_hoqlife; */
 	/* pip->vlstallcnt_hoqlife; */
 	pip->operationalvl_pei_peo_fpi_fpo = 0x10;	/* OVLs = 1 */
 	pip->operationalvl_pei_peo_fpi_fpo = 0x10;	/* OVLs = 1 */

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_mmap.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * licenses.  You may choose to be licensed under the terms of the GNU

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_mr.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two

+ 8 - 11
drivers/infiniband/hw/ipath/ipath_qp.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -336,7 +336,7 @@ static void ipath_reset_qp(struct ipath_qp *qp)
 	qp->qkey = 0;
 	qp->qkey = 0;
 	qp->qp_access_flags = 0;
 	qp->qp_access_flags = 0;
 	qp->s_busy = 0;
 	qp->s_busy = 0;
-	qp->s_flags &= ~IPATH_S_SIGNAL_REQ_WR;
+	qp->s_flags &= IPATH_S_SIGNAL_REQ_WR;
 	qp->s_hdrwords = 0;
 	qp->s_hdrwords = 0;
 	qp->s_psn = 0;
 	qp->s_psn = 0;
 	qp->r_psn = 0;
 	qp->r_psn = 0;
@@ -507,16 +507,13 @@ int ipath_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		    attr->port_num > ibqp->device->phys_port_cnt)
 		    attr->port_num > ibqp->device->phys_port_cnt)
 			goto inval;
 			goto inval;
 
 
+	/*
+	 * Note: the chips support a maximum MTU of 4096, but the driver
+	 * hasn't implemented this feature yet, so don't allow Path MTU
+	 * values greater than 2048.
+	 */
 	if (attr_mask & IB_QP_PATH_MTU)
 	if (attr_mask & IB_QP_PATH_MTU)
-		if (attr->path_mtu > IB_MTU_4096)
-			goto inval;
-
-	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
-		if (attr->max_dest_rd_atomic > 1)
-			goto inval;
-
-	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
-		if (attr->max_rd_atomic > 1)
+		if (attr->path_mtu > IB_MTU_2048)
 			goto inval;
 			goto inval;
 
 
 	if (attr_mask & IB_QP_PATH_MIG_STATE)
 	if (attr_mask & IB_QP_PATH_MIG_STATE)

+ 82 - 34
drivers/infiniband/hw/ipath/ipath_rc.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -125,8 +125,10 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
 			if (len > pmtu) {
 			if (len > pmtu) {
 				len = pmtu;
 				len = pmtu;
 				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
 				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
-			} else
+			} else {
 				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
 				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+				e->sent = 1;
+			}
 			ohdr->u.aeth = ipath_compute_aeth(qp);
 			ohdr->u.aeth = ipath_compute_aeth(qp);
 			hwords++;
 			hwords++;
 			qp->s_ack_rdma_psn = e->psn;
 			qp->s_ack_rdma_psn = e->psn;
@@ -143,6 +145,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
 				cpu_to_be32(e->atomic_data);
 				cpu_to_be32(e->atomic_data);
 			hwords += sizeof(ohdr->u.at) / sizeof(u32);
 			hwords += sizeof(ohdr->u.at) / sizeof(u32);
 			bth2 = e->psn;
 			bth2 = e->psn;
+			e->sent = 1;
 		}
 		}
 		bth0 = qp->s_ack_state << 24;
 		bth0 = qp->s_ack_state << 24;
 		break;
 		break;
@@ -158,6 +161,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
 			ohdr->u.aeth = ipath_compute_aeth(qp);
 			ohdr->u.aeth = ipath_compute_aeth(qp);
 			hwords++;
 			hwords++;
 			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
 			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+			qp->s_ack_queue[qp->s_tail_ack_queue].sent = 1;
 		}
 		}
 		bth0 = qp->s_ack_state << 24;
 		bth0 = qp->s_ack_state << 24;
 		bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
 		bth2 = qp->s_ack_rdma_psn++ & IPATH_PSN_MASK;
@@ -188,7 +192,7 @@ static int ipath_make_rc_ack(struct ipath_qp *qp,
 	}
 	}
 	qp->s_hdrwords = hwords;
 	qp->s_hdrwords = hwords;
 	qp->s_cur_size = len;
 	qp->s_cur_size = len;
-	*bth0p = bth0;
+	*bth0p = bth0 | (1 << 22); /* Set M bit */
 	*bth2p = bth2;
 	*bth2p = bth2;
 	return 1;
 	return 1;
 
 
@@ -240,7 +244,7 @@ int ipath_make_rc_req(struct ipath_qp *qp,
 
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 	hwords = 5;
-	bth0 = 0;
+	bth0 = 1 << 22; /* Set M bit */
 
 
 	/* Send a request. */
 	/* Send a request. */
 	wqe = get_swqe_ptr(qp, qp->s_cur);
 	wqe = get_swqe_ptr(qp, qp->s_cur);
@@ -604,7 +608,7 @@ static void send_rc_ack(struct ipath_qp *qp)
 	}
 	}
 	/* read pkey_index w/o lock (its atomic) */
 	/* read pkey_index w/o lock (its atomic) */
 	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
 	bth0 = ipath_get_pkey(dev->dd, qp->s_pkey_index) |
-		OP(ACKNOWLEDGE) << 24;
+		(OP(ACKNOWLEDGE) << 24) | (1 << 22);
 	if (qp->r_nak_state)
 	if (qp->r_nak_state)
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
 		ohdr->u.aeth = cpu_to_be32((qp->r_msn & IPATH_MSN_MASK) |
 					    (qp->r_nak_state <<
 					    (qp->r_nak_state <<
@@ -806,13 +810,15 @@ static inline void update_last_psn(struct ipath_qp *qp, u32 psn)
  * Called at interrupt level with the QP s_lock held and interrupts disabled.
  * Called at interrupt level with the QP s_lock held and interrupts disabled.
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  * Returns 1 if OK, 0 if current operation should be aborted (NAK).
  */
  */
-static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
+static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode,
+		     u64 val)
 {
 {
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
 	struct ib_wc wc;
 	struct ib_wc wc;
 	struct ipath_swqe *wqe;
 	struct ipath_swqe *wqe;
 	int ret = 0;
 	int ret = 0;
 	u32 ack_psn;
 	u32 ack_psn;
+	int diff;
 
 
 	/*
 	/*
 	 * Remove the QP from the timeout queue (or RNR timeout queue).
 	 * Remove the QP from the timeout queue (or RNR timeout queue).
@@ -840,7 +846,19 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 	 * The MSN might be for a later WQE than the PSN indicates so
 	 * The MSN might be for a later WQE than the PSN indicates so
 	 * only complete WQEs that the PSN finishes.
 	 * only complete WQEs that the PSN finishes.
 	 */
 	 */
-	while (ipath_cmp24(ack_psn, wqe->lpsn) >= 0) {
+	while ((diff = ipath_cmp24(ack_psn, wqe->lpsn)) >= 0) {
+		/*
+		 * RDMA_READ_RESPONSE_ONLY is a special case since
+		 * we want to generate completion events for everything
+		 * before the RDMA read, copy the data, then generate
+		 * the completion for the read.
+		 */
+		if (wqe->wr.opcode == IB_WR_RDMA_READ &&
+		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
+		    diff == 0) {
+			ret = 1;
+			goto bail;
+		}
 		/*
 		/*
 		 * If this request is a RDMA read or atomic, and the ACK is
 		 * If this request is a RDMA read or atomic, and the ACK is
 		 * for a later operation, this ACK NAKs the RDMA read or
 		 * for a later operation, this ACK NAKs the RDMA read or
@@ -851,12 +869,10 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 		 * is sent but before the response is received.
 		 * is sent but before the response is received.
 		 */
 		 */
 		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
 		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
-		     (opcode != OP(RDMA_READ_RESPONSE_LAST) ||
-		      ipath_cmp24(ack_psn, wqe->lpsn) != 0)) ||
+		     (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
 		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
 		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
-		     (opcode != OP(ATOMIC_ACKNOWLEDGE) ||
-		      ipath_cmp24(wqe->psn, psn) != 0))) {
+		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
 			/*
 			/*
 			 * The last valid PSN seen is the previous
 			 * The last valid PSN seen is the previous
 			 * request's.
 			 * request's.
@@ -870,6 +886,9 @@ static int do_rc_ack(struct ipath_qp *qp, u32 aeth, u32 psn, int opcode)
 			 */
 			 */
 			goto bail;
 			goto bail;
 		}
 		}
+		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+			*(u64 *) wqe->sg_list[0].vaddr = val;
 		if (qp->s_num_rd_atomic &&
 		if (qp->s_num_rd_atomic &&
 		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
 		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
 		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
 		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
@@ -1079,6 +1098,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 	int diff;
 	int diff;
 	u32 pad;
 	u32 pad;
 	u32 aeth;
 	u32 aeth;
+	u64 val;
 
 
 	spin_lock_irqsave(&qp->s_lock, flags);
 	spin_lock_irqsave(&qp->s_lock, flags);
 
 
@@ -1118,8 +1138,6 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			data += sizeof(__be32);
 			data += sizeof(__be32);
 		}
 		}
 		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
 		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
-			u64 val;
-
 			if (!header_in_data) {
 			if (!header_in_data) {
 				__be32 *p = ohdr->u.at.atomic_ack_eth;
 				__be32 *p = ohdr->u.at.atomic_ack_eth;
 
 
@@ -1127,12 +1145,13 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 					be32_to_cpu(p[1]);
 					be32_to_cpu(p[1]);
 			} else
 			} else
 				val = be64_to_cpu(((__be64 *) data)[0]);
 				val = be64_to_cpu(((__be64 *) data)[0]);
-			*(u64 *) wqe->sg_list[0].vaddr = val;
-		}
-		if (!do_rc_ack(qp, aeth, psn, opcode) ||
+		} else
+			val = 0;
+		if (!do_rc_ack(qp, aeth, psn, opcode, val) ||
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
 			goto ack_done;
 			goto ack_done;
 		hdrsize += 4;
 		hdrsize += 4;
+		wqe = get_swqe_ptr(qp, qp->s_last);
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
 			goto ack_op_err;
 			goto ack_op_err;
 		/*
 		/*
@@ -1176,13 +1195,12 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		goto bail;
 		goto bail;
 
 
 	case OP(RDMA_READ_RESPONSE_ONLY):
 	case OP(RDMA_READ_RESPONSE_ONLY):
-		if (unlikely(ipath_cmp24(psn, qp->s_last_psn + 1))) {
-			dev->n_rdma_seq++;
-			ipath_restart_rc(qp, qp->s_last_psn + 1, &wc);
+		if (!header_in_data)
+			aeth = be32_to_cpu(ohdr->u.aeth);
+		else
+			aeth = be32_to_cpu(((__be32 *) data)[0]);
+		if (!do_rc_ack(qp, aeth, psn, opcode, 0))
 			goto ack_done;
 			goto ack_done;
-		}
-		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
-			goto ack_op_err;
 		/* Get the number of bytes the message was padded by. */
 		/* Get the number of bytes the message was padded by. */
 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
 		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
 		/*
 		/*
@@ -1197,6 +1215,7 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 		 * have to be careful to copy the data to the right
 		 * have to be careful to copy the data to the right
 		 * location.
 		 * location.
 		 */
 		 */
+		wqe = get_swqe_ptr(qp, qp->s_last);
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
 						  wqe, psn, pmtu);
 						  wqe, psn, pmtu);
 		goto read_last;
 		goto read_last;
@@ -1230,7 +1249,8 @@ static inline void ipath_rc_rcv_resp(struct ipath_ibdev *dev,
 			data += sizeof(__be32);
 			data += sizeof(__be32);
 		}
 		}
 		ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
 		ipath_copy_sge(&qp->s_rdma_read_sge, data, tlen);
-		(void) do_rc_ack(qp, aeth, psn, OP(RDMA_READ_RESPONSE_LAST));
+		(void) do_rc_ack(qp, aeth, psn,
+				 OP(RDMA_READ_RESPONSE_LAST), 0);
 		goto ack_done;
 		goto ack_done;
 	}
 	}
 
 
@@ -1344,8 +1364,11 @@ static inline int ipath_rc_rcv_error(struct ipath_ibdev *dev,
 			e = NULL;
 			e = NULL;
 			break;
 			break;
 		}
 		}
-		if (ipath_cmp24(psn, e->psn) >= 0)
+		if (ipath_cmp24(psn, e->psn) >= 0) {
+			if (prev == qp->s_tail_ack_queue)
+				old_req = 0;
 			break;
 			break;
+		}
 	}
 	}
 	switch (opcode) {
 	switch (opcode) {
 	case OP(RDMA_READ_REQUEST): {
 	case OP(RDMA_READ_REQUEST): {
@@ -1460,6 +1483,22 @@ static void ipath_rc_error(struct ipath_qp *qp, enum ib_wc_status err)
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 	spin_unlock_irqrestore(&qp->s_lock, flags);
 }
 }
 
 
+static inline void ipath_update_ack_queue(struct ipath_qp *qp, unsigned n)
+{
+	unsigned long flags;
+	unsigned next;
+
+	next = n + 1;
+	if (next > IPATH_MAX_RDMA_ATOMIC)
+		next = 0;
+	spin_lock_irqsave(&qp->s_lock, flags);
+	if (n == qp->s_tail_ack_queue) {
+		qp->s_tail_ack_queue = next;
+		qp->s_ack_state = OP(ACKNOWLEDGE);
+	}
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
 /**
 /**
  * ipath_rc_rcv - process an incoming RC packet
  * ipath_rc_rcv - process an incoming RC packet
  * @dev: the device this packet came in on
  * @dev: the device this packet came in on
@@ -1672,6 +1711,9 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_FIRST):
 	case OP(RDMA_WRITE_ONLY):
 	case OP(RDMA_WRITE_ONLY):
 	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
 	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_WRITE)))
+			goto nack_inv;
 		/* consume RWQE */
 		/* consume RWQE */
 		/* RETH comes after BTH */
 		/* RETH comes after BTH */
 		if (!header_in_data)
 		if (!header_in_data)
@@ -1701,9 +1743,6 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			qp->r_sge.sge.length = 0;
 			qp->r_sge.sge.length = 0;
 			qp->r_sge.sge.sge_length = 0;
 			qp->r_sge.sge.sge_length = 0;
 		}
 		}
-		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_WRITE)))
-			goto nack_acc;
 		if (opcode == OP(RDMA_WRITE_FIRST))
 		if (opcode == OP(RDMA_WRITE_FIRST))
 			goto send_middle;
 			goto send_middle;
 		else if (opcode == OP(RDMA_WRITE_ONLY))
 		else if (opcode == OP(RDMA_WRITE_ONLY))
@@ -1717,13 +1756,17 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 		u32 len;
 		u32 len;
 		u8 next;
 		u8 next;
 
 
-		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
-			goto nack_acc;
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_READ)))
+			goto nack_inv;
 		next = qp->r_head_ack_queue + 1;
 		next = qp->r_head_ack_queue + 1;
 		if (next > IPATH_MAX_RDMA_ATOMIC)
 		if (next > IPATH_MAX_RDMA_ATOMIC)
 			next = 0;
 			next = 0;
-		if (unlikely(next == qp->s_tail_ack_queue))
-			goto nack_inv;
+		if (unlikely(next == qp->s_tail_ack_queue)) {
+			if (!qp->s_ack_queue[next].sent)
+				goto nack_inv;
+			ipath_update_ack_queue(qp, next);
+		}
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		e = &qp->s_ack_queue[qp->r_head_ack_queue];
 		/* RETH comes after BTH */
 		/* RETH comes after BTH */
 		if (!header_in_data)
 		if (!header_in_data)
@@ -1758,6 +1801,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 			e->rdma_sge.sge.sge_length = 0;
 			e->rdma_sge.sge.sge_length = 0;
 		}
 		}
 		e->opcode = opcode;
 		e->opcode = opcode;
+		e->sent = 0;
 		e->psn = psn;
 		e->psn = psn;
 		/*
 		/*
 		 * We need to increment the MSN here instead of when we
 		 * We need to increment the MSN here instead of when we
@@ -1789,12 +1833,15 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 
 
 		if (unlikely(!(qp->qp_access_flags &
 		if (unlikely(!(qp->qp_access_flags &
 			       IB_ACCESS_REMOTE_ATOMIC)))
 			       IB_ACCESS_REMOTE_ATOMIC)))
-			goto nack_acc;
+			goto nack_inv;
 		next = qp->r_head_ack_queue + 1;
 		next = qp->r_head_ack_queue + 1;
 		if (next > IPATH_MAX_RDMA_ATOMIC)
 		if (next > IPATH_MAX_RDMA_ATOMIC)
 			next = 0;
 			next = 0;
-		if (unlikely(next == qp->s_tail_ack_queue))
-			goto nack_inv;
+		if (unlikely(next == qp->s_tail_ack_queue)) {
+			if (!qp->s_ack_queue[next].sent)
+				goto nack_inv;
+			ipath_update_ack_queue(qp, next);
+		}
 		if (!header_in_data)
 		if (!header_in_data)
 			ateth = &ohdr->u.atomic_eth;
 			ateth = &ohdr->u.atomic_eth;
 		else
 		else
@@ -1819,6 +1866,7 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
 				      be64_to_cpu(ateth->compare_data),
 				      be64_to_cpu(ateth->compare_data),
 				      sdata);
 				      sdata);
 		e->opcode = opcode;
 		e->opcode = opcode;
+		e->sent = 0;
 		e->psn = psn & IPATH_PSN_MASK;
 		e->psn = psn & IPATH_PSN_MASK;
 		qp->r_msn++;
 		qp->r_msn++;
 		qp->r_psn++;
 		qp->r_psn++;

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_registers.h

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two

+ 26 - 10
drivers/infiniband/hw/ipath/ipath_ruc.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -194,6 +194,8 @@ int ipath_get_rwqe(struct ipath_qp *qp, int wr_id_only)
 			ret = 0;
 			ret = 0;
 			goto bail;
 			goto bail;
 		}
 		}
+		/* Make sure entry is read after head index is read. */
+		smp_rmb();
 		wqe = get_rwqe_ptr(rq, tail);
 		wqe = get_rwqe_ptr(rq, tail);
 		if (++tail >= rq->size)
 		if (++tail >= rq->size)
 			tail = 0;
 			tail = 0;
@@ -267,7 +269,7 @@ again:
 	spin_lock_irqsave(&sqp->s_lock, flags);
 	spin_lock_irqsave(&sqp->s_lock, flags);
 
 
 	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
 	if (!(ib_ipath_state_ops[sqp->state] & IPATH_PROCESS_SEND_OK) ||
-	    qp->s_rnr_timeout) {
+	    sqp->s_rnr_timeout) {
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		spin_unlock_irqrestore(&sqp->s_lock, flags);
 		goto done;
 		goto done;
 	}
 	}
@@ -319,12 +321,22 @@ again:
 		break;
 		break;
 
 
 	case IB_WR_RDMA_WRITE_WITH_IMM:
 	case IB_WR_RDMA_WRITE_WITH_IMM:
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_WRITE))) {
+			wc.status = IB_WC_REM_INV_REQ_ERR;
+			goto err;
+		}
 		wc.wc_flags = IB_WC_WITH_IMM;
 		wc.wc_flags = IB_WC_WITH_IMM;
 		wc.imm_data = wqe->wr.imm_data;
 		wc.imm_data = wqe->wr.imm_data;
 		if (!ipath_get_rwqe(qp, 1))
 		if (!ipath_get_rwqe(qp, 1))
 			goto rnr_nak;
 			goto rnr_nak;
 		/* FALLTHROUGH */
 		/* FALLTHROUGH */
 	case IB_WR_RDMA_WRITE:
 	case IB_WR_RDMA_WRITE:
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_WRITE))) {
+			wc.status = IB_WC_REM_INV_REQ_ERR;
+			goto err;
+		}
 		if (wqe->length == 0)
 		if (wqe->length == 0)
 			break;
 			break;
 		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
 		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, wqe->length,
@@ -354,8 +366,10 @@ again:
 
 
 	case IB_WR_RDMA_READ:
 	case IB_WR_RDMA_READ:
 		if (unlikely(!(qp->qp_access_flags &
 		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_READ)))
-			goto acc_err;
+			       IB_ACCESS_REMOTE_READ))) {
+			wc.status = IB_WC_REM_INV_REQ_ERR;
+			goto err;
+		}
 		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
 		if (unlikely(!ipath_rkey_ok(qp, &sqp->s_sge, wqe->length,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.remote_addr,
 					    wqe->wr.wr.rdma.rkey,
 					    wqe->wr.wr.rdma.rkey,
@@ -369,8 +383,10 @@ again:
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 	case IB_WR_ATOMIC_CMP_AND_SWP:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 	case IB_WR_ATOMIC_FETCH_AND_ADD:
 		if (unlikely(!(qp->qp_access_flags &
 		if (unlikely(!(qp->qp_access_flags &
-			       IB_ACCESS_REMOTE_ATOMIC)))
-			goto acc_err;
+			       IB_ACCESS_REMOTE_ATOMIC))) {
+			wc.status = IB_WC_REM_INV_REQ_ERR;
+			goto err;
+		}
 		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
 		if (unlikely(!ipath_rkey_ok(qp, &qp->r_sge, sizeof(u64),
 					    wqe->wr.wr.atomic.remote_addr,
 					    wqe->wr.wr.atomic.remote_addr,
 					    wqe->wr.wr.atomic.rkey,
 					    wqe->wr.wr.atomic.rkey,
@@ -396,6 +412,8 @@ again:
 
 
 		if (len > sge->length)
 		if (len > sge->length)
 			len = sge->length;
 			len = sge->length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
 		BUG_ON(len == 0);
 		BUG_ON(len == 0);
 		ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
 		ipath_copy_sge(&qp->r_sge, sge->vaddr, len);
 		sge->vaddr += len;
 		sge->vaddr += len;
@@ -503,11 +521,9 @@ void ipath_no_bufs_available(struct ipath_qp *qp, struct ipath_ibdev *dev)
 	 * could be called.  If we are still in the tasklet function,
 	 * could be called.  If we are still in the tasklet function,
 	 * tasklet_hi_schedule() will not call us until the next time
 	 * tasklet_hi_schedule() will not call us until the next time
 	 * tasklet_hi_schedule() is called.
 	 * tasklet_hi_schedule() is called.
-	 * We clear the tasklet flag now since we are committing to return
-	 * from the tasklet function.
+	 * We leave the busy flag set so that another post send doesn't
+	 * try to put the same QP on the piowait list again.
 	 */
 	 */
-	clear_bit(IPATH_S_BUSY, &qp->s_busy);
-	tasklet_unlock(&qp->s_task);
 	want_buffer(dev->dd);
 	want_buffer(dev->dd);
 	dev->n_piowait++;
 	dev->n_piowait++;
 }
 }

+ 3 - 1
drivers/infiniband/hw/ipath/ipath_srq.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -80,6 +80,8 @@ int ipath_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 		wqe->num_sge = wr->num_sge;
 		wqe->num_sge = wr->num_sge;
 		for (i = 0; i < wr->num_sge; i++)
 		for (i = 0; i < wr->num_sge; i++)
 			wqe->sg_list[i] = wr->sg_list[i];
 			wqe->sg_list[i] = wr->sg_list[i];
+		/* Make sure queue entry is written before the head index. */
+		smp_wmb();
 		wq->head = next;
 		wq->head = next;
 		spin_unlock_irqrestore(&srq->rq.lock, flags);
 		spin_unlock_irqrestore(&srq->rq.lock, flags);
 	}
 	}

+ 22 - 3
drivers/infiniband/hw/ipath/ipath_stats.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -55,6 +55,7 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
 	u64 val64;
 	u64 val64;
 	unsigned long t0, t1;
 	unsigned long t0, t1;
 	u64 ret;
 	u64 ret;
+	unsigned long flags;
 
 
 	t0 = jiffies;
 	t0 = jiffies;
 	/* If fast increment counters are only 32 bits, snapshot them,
 	/* If fast increment counters are only 32 bits, snapshot them,
@@ -91,12 +92,18 @@ u64 ipath_snap_cntr(struct ipath_devdata *dd, ipath_creg creg)
 	if (creg == dd->ipath_cregs->cr_wordsendcnt) {
 	if (creg == dd->ipath_cregs->cr_wordsendcnt) {
 		if (val != dd->ipath_lastsword) {
 		if (val != dd->ipath_lastsword) {
 			dd->ipath_sword += val - dd->ipath_lastsword;
 			dd->ipath_sword += val - dd->ipath_lastsword;
+			spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+			dd->ipath_traffic_wds += val - dd->ipath_lastsword;
+			spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
 			dd->ipath_lastsword = val;
 			dd->ipath_lastsword = val;
 		}
 		}
 		val64 = dd->ipath_sword;
 		val64 = dd->ipath_sword;
 	} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
 	} else if (creg == dd->ipath_cregs->cr_wordrcvcnt) {
 		if (val != dd->ipath_lastrword) {
 		if (val != dd->ipath_lastrword) {
 			dd->ipath_rword += val - dd->ipath_lastrword;
 			dd->ipath_rword += val - dd->ipath_lastrword;
+			spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+			dd->ipath_traffic_wds += val - dd->ipath_lastrword;
+			spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
 			dd->ipath_lastrword = val;
 			dd->ipath_lastrword = val;
 		}
 		}
 		val64 = dd->ipath_rword;
 		val64 = dd->ipath_rword;
@@ -200,6 +207,7 @@ void ipath_get_faststats(unsigned long opaque)
 	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
 	struct ipath_devdata *dd = (struct ipath_devdata *) opaque;
 	u32 val;
 	u32 val;
 	static unsigned cnt;
 	static unsigned cnt;
+	unsigned long flags;
 
 
 	/*
 	/*
 	 * don't access the chip while running diags, or memory diags can
 	 * don't access the chip while running diags, or memory diags can
@@ -210,9 +218,20 @@ void ipath_get_faststats(unsigned long opaque)
 		/* but re-arm the timer, for diags case; won't hurt other */
 		/* but re-arm the timer, for diags case; won't hurt other */
 		goto done;
 		goto done;
 
 
+	/*
+	 * We now try to maintain a "active timer", based on traffic
+	 * exceeding a threshold, so we need to check the word-counts
+	 * even if they are 64-bit.
+	 */
+	ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
+	ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
+	spin_lock_irqsave(&dd->ipath_eep_st_lock, flags);
+	if (dd->ipath_traffic_wds  >= IPATH_TRAFFIC_ACTIVE_THRESHOLD)
+		atomic_add(5, &dd->ipath_active_time); /* S/B #define */
+	dd->ipath_traffic_wds = 0;
+	spin_unlock_irqrestore(&dd->ipath_eep_st_lock, flags);
+
 	if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
 	if (dd->ipath_flags & IPATH_32BITCOUNTERS) {
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
-		ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
 		ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
 	}
 	}

+ 42 - 1
drivers/infiniband/hw/ipath/ipath_sysfs.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -596,6 +596,43 @@ bail:
 	return ret;
 	return ret;
 }
 }
 
 
+static ssize_t store_led_override(struct device *dev,
+			  struct device_attribute *attr,
+			  const char *buf,
+			  size_t count)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+	int ret;
+	u16 val;
+
+	ret = ipath_parse_ushort(buf, &val);
+	if (ret > 0)
+		ipath_set_led_override(dd, val);
+	else
+		ipath_dev_err(dd, "attempt to set invalid LED override\n");
+	return ret;
+}
+
+static ssize_t show_logged_errs(struct device *dev,
+				struct device_attribute *attr,
+				char *buf)
+{
+	struct ipath_devdata *dd = dev_get_drvdata(dev);
+	int idx, count;
+
+	/* force consistency with actual EEPROM */
+	if (ipath_update_eeprom_log(dd) != 0)
+		return -ENXIO;
+
+	count = 0;
+	for (idx = 0; idx < IPATH_EEP_LOG_CNT; ++idx) {
+		count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
+			dd->ipath_eep_st_errs[idx],
+			idx == (IPATH_EEP_LOG_CNT - 1) ? '\n' : ' ');
+	}
+
+	return count;
+}
 
 
 static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
 static DRIVER_ATTR(num_units, S_IRUGO, show_num_units, NULL);
 static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
 static DRIVER_ATTR(version, S_IRUGO, show_version, NULL);
@@ -625,6 +662,8 @@ static DEVICE_ATTR(status_str, S_IRUGO, show_status_str, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
 static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
 static DEVICE_ATTR(unit, S_IRUGO, show_unit, NULL);
 static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
 static DEVICE_ATTR(rx_pol_inv, S_IWUSR, NULL, store_rx_pol_inv);
+static DEVICE_ATTR(led_override, S_IWUSR, NULL, store_led_override);
+static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
 
 
 static struct attribute *dev_attributes[] = {
 static struct attribute *dev_attributes[] = {
 	&dev_attr_guid.attr,
 	&dev_attr_guid.attr,
@@ -641,6 +680,8 @@ static struct attribute *dev_attributes[] = {
 	&dev_attr_unit.attr,
 	&dev_attr_unit.attr,
 	&dev_attr_enabled.attr,
 	&dev_attr_enabled.attr,
 	&dev_attr_rx_pol_inv.attr,
 	&dev_attr_rx_pol_inv.attr,
+	&dev_attr_led_override.attr,
+	&dev_attr_logged_errors.attr,
 	NULL
 	NULL
 };
 };
 
 

+ 5 - 4
drivers/infiniband/hw/ipath/ipath_uc.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -58,7 +58,6 @@ static void complete_last_send(struct ipath_qp *qp, struct ipath_swqe *wqe,
 		wc->port_num = 0;
 		wc->port_num = 0;
 		ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
 		ipath_cq_enter(to_icq(qp->ibqp.send_cq), wc, 0);
 	}
 	}
-	wqe = get_swqe_ptr(qp, qp->s_last);
 }
 }
 
 
 /**
 /**
@@ -87,7 +86,7 @@ int ipath_make_uc_req(struct ipath_qp *qp,
 
 
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
 	hwords = 5;
 	hwords = 5;
-	bth0 = 0;
+	bth0 = 1 << 22; /* Set M bit */
 
 
 	/* Get the next send request. */
 	/* Get the next send request. */
 	wqe = get_swqe_ptr(qp, qp->s_last);
 	wqe = get_swqe_ptr(qp, qp->s_last);
@@ -97,8 +96,10 @@ int ipath_make_uc_req(struct ipath_qp *qp,
 		 * Signal the completion of the last send
 		 * Signal the completion of the last send
 		 * (if there is one).
 		 * (if there is one).
 		 */
 		 */
-		if (qp->s_last != qp->s_tail)
+		if (qp->s_last != qp->s_tail) {
 			complete_last_send(qp, wqe, &wc);
 			complete_last_send(qp, wqe, &wc);
+			wqe = get_swqe_ptr(qp, qp->s_last);
+		}
 
 
 		/* Check if send work queue is empty. */
 		/* Check if send work queue is empty. */
 		if (qp->s_tail == qp->s_head)
 		if (qp->s_tail == qp->s_head)

+ 5 - 1
drivers/infiniband/hw/ipath/ipath_ud.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -176,6 +176,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 			dev->n_pkt_drops++;
 			dev->n_pkt_drops++;
 			goto bail_sge;
 			goto bail_sge;
 		}
 		}
+		/* Make sure entry is read after head index is read. */
+		smp_rmb();
 		wqe = get_rwqe_ptr(rq, tail);
 		wqe = get_rwqe_ptr(rq, tail);
 		if (++tail >= rq->size)
 		if (++tail >= rq->size)
 			tail = 0;
 			tail = 0;
@@ -231,6 +233,8 @@ static void ipath_ud_loopback(struct ipath_qp *sqp,
 
 
 		if (len > length)
 		if (len > length)
 			len = length;
 			len = length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
 		BUG_ON(len == 0);
 		BUG_ON(len == 0);
 		ipath_copy_sge(&rsge, sge->vaddr, len);
 		ipath_copy_sge(&rsge, sge->vaddr, len);
 		sge->vaddr += len;
 		sge->vaddr += len;

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_user_pages.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two

+ 18 - 11
drivers/infiniband/hw/ipath/ipath_verbs.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -164,9 +164,11 @@ void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
 	while (length) {
 	while (length) {
 		u32 len = sge->length;
 		u32 len = sge->length;
 
 
-		BUG_ON(len == 0);
 		if (len > length)
 		if (len > length)
 			len = length;
 			len = length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
 		memcpy(sge->vaddr, data, len);
 		memcpy(sge->vaddr, data, len);
 		sge->vaddr += len;
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->length -= len;
@@ -202,9 +204,11 @@ void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
 	while (length) {
 	while (length) {
 		u32 len = sge->length;
 		u32 len = sge->length;
 
 
-		BUG_ON(len == 0);
 		if (len > length)
 		if (len > length)
 			len = length;
 			len = length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
 		sge->vaddr += len;
 		sge->vaddr += len;
 		sge->length -= len;
 		sge->length -= len;
 		sge->sge_length -= len;
 		sge->sge_length -= len;
@@ -323,6 +327,8 @@ static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		wqe->num_sge = wr->num_sge;
 		wqe->num_sge = wr->num_sge;
 		for (i = 0; i < wr->num_sge; i++)
 		for (i = 0; i < wr->num_sge; i++)
 			wqe->sg_list[i] = wr->sg_list[i];
 			wqe->sg_list[i] = wr->sg_list[i];
+		/* Make sure queue entry is written before the head index. */
+		smp_wmb();
 		wq->head = next;
 		wq->head = next;
 		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
 	}
 	}
@@ -948,6 +954,7 @@ int ipath_ib_piobufavail(struct ipath_ibdev *dev)
 		qp = list_entry(dev->piowait.next, struct ipath_qp,
 		qp = list_entry(dev->piowait.next, struct ipath_qp,
 				piowait);
 				piowait);
 		list_del_init(&qp->piowait);
 		list_del_init(&qp->piowait);
+		clear_bit(IPATH_S_BUSY, &qp->s_busy);
 		tasklet_hi_schedule(&qp->s_task);
 		tasklet_hi_schedule(&qp->s_task);
 	}
 	}
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
 	spin_unlock_irqrestore(&dev->pending_lock, flags);
@@ -981,6 +988,8 @@ static int ipath_query_device(struct ib_device *ibdev,
 	props->max_ah = ib_ipath_max_ahs;
 	props->max_ah = ib_ipath_max_ahs;
 	props->max_cqe = ib_ipath_max_cqes;
 	props->max_cqe = ib_ipath_max_cqes;
 	props->max_mr = dev->lk_table.max;
 	props->max_mr = dev->lk_table.max;
+	props->max_fmr = dev->lk_table.max;
+	props->max_map_per_fmr = 32767;
 	props->max_pd = ib_ipath_max_pds;
 	props->max_pd = ib_ipath_max_pds;
 	props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
 	props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
 	props->max_qp_init_rd_atom = 255;
 	props->max_qp_init_rd_atom = 255;
@@ -1051,7 +1060,12 @@ static int ipath_query_port(struct ib_device *ibdev,
 	props->max_vl_num = 1;		/* VLCap = VL0 */
 	props->max_vl_num = 1;		/* VLCap = VL0 */
 	props->init_type_reply = 0;
 	props->init_type_reply = 0;
 
 
-	props->max_mtu = IB_MTU_4096;
+	/*
+	 * Note: the chips support a maximum MTU of 4096, but the driver
+	 * hasn't implemented this feature yet, so set the maximum value
+	 * to 2048.
+	 */
+	props->max_mtu = IB_MTU_2048;
 	switch (dev->dd->ipath_ibmtu) {
 	switch (dev->dd->ipath_ibmtu) {
 	case 4096:
 	case 4096:
 		mtu = IB_MTU_4096;
 		mtu = IB_MTU_4096;
@@ -1361,13 +1375,6 @@ static void __verbs_timer(unsigned long arg)
 {
 {
 	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
 	struct ipath_devdata *dd = (struct ipath_devdata *) arg;
 
 
-	/*
-	 * If port 0 receive packet interrupts are not available, or
-	 * can be missed, poll the receive queue
-	 */
-	if (dd->ipath_flags & IPATH_POLL_RX_INTR)
-		ipath_kreceive(dd);
-
 	/* Handle verbs layer timeouts. */
 	/* Handle verbs layer timeouts. */
 	ipath_ib_timer(dd->verbs_dev);
 	ipath_ib_timer(dd->verbs_dev);
 
 

+ 2 - 1
drivers/infiniband/hw/ipath/ipath_verbs.h

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -321,6 +321,7 @@ struct ipath_sge_state {
  */
  */
 struct ipath_ack_entry {
 struct ipath_ack_entry {
 	u8 opcode;
 	u8 opcode;
+	u8 sent;
 	u32 psn;
 	u32 psn;
 	union {
 	union {
 		struct ipath_sge_state rdma_sge;
 		struct ipath_sge_state rdma_sge;

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_verbs_mcast.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two

+ 1 - 1
drivers/infiniband/hw/ipath/ipath_wc_ppc64.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
  * licenses.  You may choose to be licensed under the terms of the GNU
  * licenses.  You may choose to be licensed under the terms of the GNU

+ 23 - 6
drivers/infiniband/hw/ipath/ipath_wc_x86_64.c

@@ -1,5 +1,5 @@
 /*
 /*
- * Copyright (c) 2006 QLogic, Inc. All rights reserved.
+ * Copyright (c) 2006, 2007 QLogic Corporation. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
  *
  *
  * This software is available to you under a choice of one of two
  * This software is available to you under a choice of one of two
@@ -63,12 +63,29 @@ int ipath_enable_wc(struct ipath_devdata *dd)
 	 * of 2 address matching the length (which has to be a power of 2).
 	 * of 2 address matching the length (which has to be a power of 2).
 	 * For rev1, that means the base address, for rev2, it will be just
 	 * For rev1, that means the base address, for rev2, it will be just
 	 * the PIO buffers themselves.
 	 * the PIO buffers themselves.
+	 * For chips with two sets of buffers, the calculations are
+	 * somewhat more complicated; we need to sum, and the piobufbase
+	 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
+	 * The buffers are still packed, so a single range covers both.
 	 */
 	 */
-	pioaddr = addr + dd->ipath_piobufbase;
-	piolen = (dd->ipath_piobcnt2k +
-		  dd->ipath_piobcnt4k) *
-		ALIGN(dd->ipath_piobcnt2k +
-		      dd->ipath_piobcnt4k, dd->ipath_palign);
+	if (dd->ipath_piobcnt2k && dd->ipath_piobcnt4k) { /* 2 sizes */
+		unsigned long pio2kbase, pio4kbase;
+		pio2kbase = dd->ipath_piobufbase & 0xffffffffUL;
+		pio4kbase = (dd->ipath_piobufbase >> 32) & 0xffffffffUL;
+		if (pio2kbase < pio4kbase) { /* all, for now */
+			pioaddr = addr + pio2kbase;
+			piolen = pio4kbase - pio2kbase +
+				dd->ipath_piobcnt4k * dd->ipath_4kalign;
+		} else {
+			pioaddr = addr + pio4kbase;
+			piolen = pio2kbase - pio4kbase +
+				dd->ipath_piobcnt2k * dd->ipath_palign;
+		}
+	} else {  /* single buffer size (2K, currently) */
+		pioaddr = addr + dd->ipath_piobufbase;
+		piolen = dd->ipath_piobcnt2k * dd->ipath_palign +
+			dd->ipath_piobcnt4k * dd->ipath_4kalign;
+	}
 
 
 	for (bits = 0; !(piolen & (1ULL << bits)); bits++)
 	for (bits = 0; !(piolen & (1ULL << bits)); bits++)
 		/* do nothing */ ;
 		/* do nothing */ ;

+ 0 - 1
drivers/infiniband/hw/mlx4/Kconfig

@@ -1,6 +1,5 @@
 config MLX4_INFINIBAND
 config MLX4_INFINIBAND
 	tristate "Mellanox ConnectX HCA support"
 	tristate "Mellanox ConnectX HCA support"
-	depends on INFINIBAND
 	select MLX4_CORE
 	select MLX4_CORE
 	---help---
 	---help---
 	  This driver provides low-level InfiniBand support for
 	  This driver provides low-level InfiniBand support for

+ 5 - 1
drivers/infiniband/hw/mlx4/main.c

@@ -169,7 +169,7 @@ static int mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
 	props->phys_state	= out_mad->data[33] >> 4;
 	props->phys_state	= out_mad->data[33] >> 4;
 	props->port_cap_flags	= be32_to_cpup((__be32 *) (out_mad->data + 20));
 	props->port_cap_flags	= be32_to_cpup((__be32 *) (out_mad->data + 20));
 	props->gid_tbl_len	= to_mdev(ibdev)->dev->caps.gid_table_len[port];
 	props->gid_tbl_len	= to_mdev(ibdev)->dev->caps.gid_table_len[port];
-	props->max_msg_sz	= 0x80000000;
+	props->max_msg_sz	= to_mdev(ibdev)->dev->caps.max_msg_sz;
 	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len[port];
 	props->pkey_tbl_len	= to_mdev(ibdev)->dev->caps.pkey_table_len[port];
 	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
 	props->bad_pkey_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 46));
 	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
 	props->qkey_viol_cntr	= be16_to_cpup((__be16 *) (out_mad->data + 48));
@@ -523,11 +523,13 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
 		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)	|
 		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
+		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ);
 
 
 	ibdev->ib_dev.query_device	= mlx4_ib_query_device;
 	ibdev->ib_dev.query_device	= mlx4_ib_query_device;
@@ -546,10 +548,12 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->ib_dev.destroy_ah	= mlx4_ib_destroy_ah;
 	ibdev->ib_dev.destroy_ah	= mlx4_ib_destroy_ah;
 	ibdev->ib_dev.create_srq	= mlx4_ib_create_srq;
 	ibdev->ib_dev.create_srq	= mlx4_ib_create_srq;
 	ibdev->ib_dev.modify_srq	= mlx4_ib_modify_srq;
 	ibdev->ib_dev.modify_srq	= mlx4_ib_modify_srq;
+	ibdev->ib_dev.query_srq		= mlx4_ib_query_srq;
 	ibdev->ib_dev.destroy_srq	= mlx4_ib_destroy_srq;
 	ibdev->ib_dev.destroy_srq	= mlx4_ib_destroy_srq;
 	ibdev->ib_dev.post_srq_recv	= mlx4_ib_post_srq_recv;
 	ibdev->ib_dev.post_srq_recv	= mlx4_ib_post_srq_recv;
 	ibdev->ib_dev.create_qp		= mlx4_ib_create_qp;
 	ibdev->ib_dev.create_qp		= mlx4_ib_create_qp;
 	ibdev->ib_dev.modify_qp		= mlx4_ib_modify_qp;
 	ibdev->ib_dev.modify_qp		= mlx4_ib_modify_qp;
+	ibdev->ib_dev.query_qp		= mlx4_ib_query_qp;
 	ibdev->ib_dev.destroy_qp	= mlx4_ib_destroy_qp;
 	ibdev->ib_dev.destroy_qp	= mlx4_ib_destroy_qp;
 	ibdev->ib_dev.post_send		= mlx4_ib_post_send;
 	ibdev->ib_dev.post_send		= mlx4_ib_post_send;
 	ibdev->ib_dev.post_recv		= mlx4_ib_post_recv;
 	ibdev->ib_dev.post_recv		= mlx4_ib_post_recv;

+ 4 - 0
drivers/infiniband/hw/mlx4/mlx4_ib.h

@@ -35,6 +35,7 @@
 
 
 #include <linux/compiler.h>
 #include <linux/compiler.h>
 #include <linux/list.h>
 #include <linux/list.h>
+#include <linux/mutex.h>
 
 
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/ib_umem.h>
 #include <rdma/ib_umem.h>
@@ -255,6 +256,7 @@ struct ib_srq *mlx4_ib_create_srq(struct ib_pd *pd,
 				  struct ib_udata *udata);
 				  struct ib_udata *udata);
 int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 		       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
 		       enum ib_srq_attr_mask attr_mask, struct ib_udata *udata);
+int mlx4_ib_query_srq(struct ib_srq *srq, struct ib_srq_attr *srq_attr);
 int mlx4_ib_destroy_srq(struct ib_srq *srq);
 int mlx4_ib_destroy_srq(struct ib_srq *srq);
 void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
 void mlx4_ib_free_srq_wqe(struct mlx4_ib_srq *srq, int wqe_index);
 int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
@@ -266,6 +268,8 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd,
 int mlx4_ib_destroy_qp(struct ib_qp *qp);
 int mlx4_ib_destroy_qp(struct ib_qp *qp);
 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		      int attr_mask, struct ib_udata *udata);
 		      int attr_mask, struct ib_udata *udata);
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+		     struct ib_qp_init_attr *qp_init_attr);
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 		      struct ib_send_wr **bad_wr);
 		      struct ib_send_wr **bad_wr);
 int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,

+ 137 - 0
drivers/infiniband/hw/mlx4/qp.c

@@ -1455,3 +1455,140 @@ out:
 
 
 	return err;
 	return err;
 }
 }
+
+static inline enum ib_qp_state to_ib_qp_state(enum mlx4_qp_state mlx4_state)
+{
+	switch (mlx4_state) {
+	case MLX4_QP_STATE_RST:      return IB_QPS_RESET;
+	case MLX4_QP_STATE_INIT:     return IB_QPS_INIT;
+	case MLX4_QP_STATE_RTR:      return IB_QPS_RTR;
+	case MLX4_QP_STATE_RTS:      return IB_QPS_RTS;
+	case MLX4_QP_STATE_SQ_DRAINING:
+	case MLX4_QP_STATE_SQD:      return IB_QPS_SQD;
+	case MLX4_QP_STATE_SQER:     return IB_QPS_SQE;
+	case MLX4_QP_STATE_ERR:      return IB_QPS_ERR;
+	default:		     return -1;
+	}
+}
+
+static inline enum ib_mig_state to_ib_mig_state(int mlx4_mig_state)
+{
+	switch (mlx4_mig_state) {
+	case MLX4_QP_PM_ARMED:		return IB_MIG_ARMED;
+	case MLX4_QP_PM_REARM:		return IB_MIG_REARM;
+	case MLX4_QP_PM_MIGRATED:	return IB_MIG_MIGRATED;
+	default: return -1;
+	}
+}
+
+static int to_ib_qp_access_flags(int mlx4_flags)
+{
+	int ib_flags = 0;
+
+	if (mlx4_flags & MLX4_QP_BIT_RRE)
+		ib_flags |= IB_ACCESS_REMOTE_READ;
+	if (mlx4_flags & MLX4_QP_BIT_RWE)
+		ib_flags |= IB_ACCESS_REMOTE_WRITE;
+	if (mlx4_flags & MLX4_QP_BIT_RAE)
+		ib_flags |= IB_ACCESS_REMOTE_ATOMIC;
+
+	return ib_flags;
+}
+
+static void to_ib_ah_attr(struct mlx4_dev *dev, struct ib_ah_attr *ib_ah_attr,
+				struct mlx4_qp_path *path)
+{
+	memset(ib_ah_attr, 0, sizeof *path);
+	ib_ah_attr->port_num	  = path->sched_queue & 0x40 ? 2 : 1;
+
+	if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+		return;
+
+	ib_ah_attr->dlid	  = be16_to_cpu(path->rlid);
+	ib_ah_attr->sl		  = (path->sched_queue >> 2) & 0xf;
+	ib_ah_attr->src_path_bits = path->grh_mylmc & 0x7f;
+	ib_ah_attr->static_rate   = path->static_rate ? path->static_rate - 5 : 0;
+	ib_ah_attr->ah_flags      = (path->grh_mylmc & (1 << 7)) ? IB_AH_GRH : 0;
+	if (ib_ah_attr->ah_flags) {
+		ib_ah_attr->grh.sgid_index = path->mgid_index;
+		ib_ah_attr->grh.hop_limit  = path->hop_limit;
+		ib_ah_attr->grh.traffic_class =
+			(be32_to_cpu(path->tclass_flowlabel) >> 20) & 0xff;
+		ib_ah_attr->grh.flow_label =
+			be32_to_cpu(path->tclass_flowlabel) & 0xffffff;
+		memcpy(ib_ah_attr->grh.dgid.raw,
+			path->rgid, sizeof ib_ah_attr->grh.dgid.raw);
+	}
+}
+
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+		     struct ib_qp_init_attr *qp_init_attr)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+	struct mlx4_ib_qp *qp = to_mqp(ibqp);
+	struct mlx4_qp_context context;
+	int mlx4_state;
+	int err;
+
+	if (qp->state == IB_QPS_RESET) {
+		qp_attr->qp_state = IB_QPS_RESET;
+		goto done;
+	}
+
+	err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
+	if (err)
+		return -EINVAL;
+
+	mlx4_state = be32_to_cpu(context.flags) >> 28;
+
+	qp_attr->qp_state	     = to_ib_qp_state(mlx4_state);
+	qp_attr->path_mtu	     = context.mtu_msgmax >> 5;
+	qp_attr->path_mig_state	     =
+		to_ib_mig_state((be32_to_cpu(context.flags) >> 11) & 0x3);
+	qp_attr->qkey		     = be32_to_cpu(context.qkey);
+	qp_attr->rq_psn		     = be32_to_cpu(context.rnr_nextrecvpsn) & 0xffffff;
+	qp_attr->sq_psn		     = be32_to_cpu(context.next_send_psn) & 0xffffff;
+	qp_attr->dest_qp_num	     = be32_to_cpu(context.remote_qpn) & 0xffffff;
+	qp_attr->qp_access_flags     =
+		to_ib_qp_access_flags(be32_to_cpu(context.params2));
+
+	if (qp->ibqp.qp_type == IB_QPT_RC || qp->ibqp.qp_type == IB_QPT_UC) {
+		to_ib_ah_attr(dev->dev, &qp_attr->ah_attr, &context.pri_path);
+		to_ib_ah_attr(dev->dev, &qp_attr->alt_ah_attr, &context.alt_path);
+		qp_attr->alt_pkey_index = context.alt_path.pkey_index & 0x7f;
+		qp_attr->alt_port_num	= qp_attr->alt_ah_attr.port_num;
+	}
+
+	qp_attr->pkey_index = context.pri_path.pkey_index & 0x7f;
+	qp_attr->port_num   = context.pri_path.sched_queue & 0x40 ? 2 : 1;
+
+	/* qp_attr->en_sqd_async_notify is only applicable in modify qp */
+	qp_attr->sq_draining = mlx4_state == MLX4_QP_STATE_SQ_DRAINING;
+
+	qp_attr->max_rd_atomic = 1 << ((be32_to_cpu(context.params1) >> 21) & 0x7);
+
+	qp_attr->max_dest_rd_atomic =
+		1 << ((be32_to_cpu(context.params2) >> 21) & 0x7);
+	qp_attr->min_rnr_timer	    =
+		(be32_to_cpu(context.rnr_nextrecvpsn) >> 24) & 0x1f;
+	qp_attr->timeout	    = context.pri_path.ackto >> 3;
+	qp_attr->retry_cnt	    = (be32_to_cpu(context.params1) >> 16) & 0x7;
+	qp_attr->rnr_retry	    = (be32_to_cpu(context.params1) >> 13) & 0x7;
+	qp_attr->alt_timeout	    = context.alt_path.ackto >> 3;
+
+done:
+	qp_attr->cur_qp_state	     = qp_attr->qp_state;
+	if (!ibqp->uobject) {
+		qp_attr->cap.max_send_wr     = qp->sq.wqe_cnt;
+		qp_attr->cap.max_recv_wr     = qp->rq.wqe_cnt;
+		qp_attr->cap.max_send_sge    = qp->sq.max_gs;
+		qp_attr->cap.max_recv_sge    = qp->rq.max_gs;
+		qp_attr->cap.max_inline_data = (1 << qp->sq.wqe_shift) -
+			send_wqe_overhead(qp->ibqp.qp_type) -
+			sizeof (struct mlx4_wqe_inline_seg);
+		qp_init_attr->cap	     = qp_attr->cap;
+	}
+
+	return 0;
+}
+

+ 18 - 0
drivers/infiniband/hw/mlx4/srq.c

@@ -240,6 +240,24 @@ int mlx4_ib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 	return 0;
 	return 0;
 }
 }
 
 
+int mlx4_ib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
+{
+	struct mlx4_ib_dev *dev = to_mdev(ibsrq->device);
+	struct mlx4_ib_srq *srq = to_msrq(ibsrq);
+	int ret;
+	int limit_watermark;
+
+	ret = mlx4_srq_query(dev->dev, &srq->msrq, &limit_watermark);
+	if (ret)
+		return ret;
+
+	srq_attr->srq_limit = be16_to_cpu(limit_watermark);
+	srq_attr->max_wr    = srq->msrq.max - 1;
+	srq_attr->max_sge   = srq->msrq.max_gs;
+
+	return 0;
+}
+
 int mlx4_ib_destroy_srq(struct ib_srq *srq)
 int mlx4_ib_destroy_srq(struct ib_srq *srq)
 {
 {
 	struct mlx4_ib_dev *dev = to_mdev(srq->device);
 	struct mlx4_ib_dev *dev = to_mdev(srq->device);

+ 1 - 1
drivers/infiniband/hw/mthca/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_MTHCA
 config INFINIBAND_MTHCA
 	tristate "Mellanox HCA support"
 	tristate "Mellanox HCA support"
-	depends on PCI && INFINIBAND
+	depends on PCI
 	---help---
 	---help---
 	  This is a low-level driver for Mellanox InfiniHost host
 	  This is a low-level driver for Mellanox InfiniHost host
 	  channel adapters (HCAs), including the MT23108 PCI-X HCA
 	  channel adapters (HCAs), including the MT23108 PCI-X HCA

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_allocator.c

@@ -255,7 +255,7 @@ int mthca_buf_alloc(struct mthca_dev *dev, int size, int max_direct,
 			dma_list[i] = t;
 			dma_list[i] = t;
 			pci_unmap_addr_set(&buf->page_list[i], mapping, t);
 			pci_unmap_addr_set(&buf->page_list[i], mapping, t);
 
 
-			memset(buf->page_list[i].buf, 0, PAGE_SIZE);
+			clear_page(buf->page_list[i].buf);
 		}
 		}
 	}
 	}
 
 

+ 1 - 1
drivers/infiniband/hw/mthca/mthca_eq.c

@@ -522,7 +522,7 @@ static int mthca_create_eq(struct mthca_dev *dev,
 		dma_list[i] = t;
 		dma_list[i] = t;
 		pci_unmap_addr_set(&eq->page_list[i], mapping, t);
 		pci_unmap_addr_set(&eq->page_list[i], mapping, t);
 
 
-		memset(eq->page_list[i].buf, 0, PAGE_SIZE);
+		clear_page(eq->page_list[i].buf);
 	}
 	}
 
 
 	for (i = 0; i < eq->nent; ++i)
 	for (i = 0; i < eq->nent; ++i)

+ 1 - 1
drivers/infiniband/ulp/ipoib/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_IPOIB
 config INFINIBAND_IPOIB
 	tristate "IP-over-InfiniBand"
 	tristate "IP-over-InfiniBand"
-	depends on INFINIBAND && NETDEVICES && INET && (IPV6 || IPV6=n)
+	depends on NETDEVICES && INET && (IPV6 || IPV6=n)
 	---help---
 	---help---
 	  Support for the IP-over-InfiniBand protocol (IPoIB). This
 	  Support for the IP-over-InfiniBand protocol (IPoIB). This
 	  transports IP packets over InfiniBand so you can use your IB
 	  transports IP packets over InfiniBand so you can use your IB

+ 1 - 3
drivers/infiniband/ulp/ipoib/ipoib_cm.c

@@ -281,7 +281,6 @@ static int ipoib_cm_send_rep(struct net_device *dev, struct ib_cm_id *cm_id,
 	rep.private_data_len = sizeof data;
 	rep.private_data_len = sizeof data;
 	rep.flow_control = 0;
 	rep.flow_control = 0;
 	rep.rnr_retry_count = req->rnr_retry_count;
 	rep.rnr_retry_count = req->rnr_retry_count;
-	rep.target_ack_delay = 20; /* FIXME */
 	rep.srq = 1;
 	rep.srq = 1;
 	rep.qp_num = qp->qp_num;
 	rep.qp_num = qp->qp_num;
 	rep.starting_psn = psn;
 	rep.starting_psn = psn;
@@ -1148,7 +1147,6 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 {
 {
 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 	struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
 						   cm.skb_task);
 						   cm.skb_task);
-	struct net_device *dev = priv->dev;
 	struct sk_buff *skb;
 	struct sk_buff *skb;
 
 
 	unsigned mtu = priv->mcast_mtu;
 	unsigned mtu = priv->mcast_mtu;
@@ -1162,7 +1160,7 @@ static void ipoib_cm_skb_reap(struct work_struct *work)
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		else if (skb->protocol == htons(ETH_P_IPV6))
 		else if (skb->protocol == htons(ETH_P_IPV6))
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, priv->dev);
 #endif
 #endif
 		dev_kfree_skb_any(skb);
 		dev_kfree_skb_any(skb);
 		spin_lock_irq(&priv->tx_lock);
 		spin_lock_irq(&priv->tx_lock);

+ 17 - 16
drivers/infiniband/ulp/ipoib/ipoib_ib.c

@@ -196,6 +196,13 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 		return;
 		return;
 	}
 	}
 
 
+	/*
+	 * Drop packets that this interface sent, ie multicast packets
+	 * that the HCA has replicated.
+	 */
+	if (wc->slid == priv->local_lid && wc->src_qp == priv->qp->qp_num)
+		goto repost;
+
 	/*
 	/*
 	 * If we can't allocate a new RX buffer, dump
 	 * If we can't allocate a new RX buffer, dump
 	 * this packet and reuse the old buffer.
 	 * this packet and reuse the old buffer.
@@ -213,24 +220,18 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
 	skb_put(skb, wc->byte_len);
 	skb_put(skb, wc->byte_len);
 	skb_pull(skb, IB_GRH_BYTES);
 	skb_pull(skb, IB_GRH_BYTES);
 
 
-	if (wc->slid != priv->local_lid ||
-	    wc->src_qp != priv->qp->qp_num) {
-		skb->protocol = ((struct ipoib_header *) skb->data)->proto;
-		skb_reset_mac_header(skb);
-		skb_pull(skb, IPOIB_ENCAP_LEN);
+	skb->protocol = ((struct ipoib_header *) skb->data)->proto;
+	skb_reset_mac_header(skb);
+	skb_pull(skb, IPOIB_ENCAP_LEN);
 
 
-		dev->last_rx = jiffies;
-		++priv->stats.rx_packets;
-		priv->stats.rx_bytes += skb->len;
+	dev->last_rx = jiffies;
+	++priv->stats.rx_packets;
+	priv->stats.rx_bytes += skb->len;
 
 
-		skb->dev = dev;
-		/* XXX get correct PACKET_ type here */
-		skb->pkt_type = PACKET_HOST;
-		netif_receive_skb(skb);
-	} else {
-		ipoib_dbg_data(priv, "dropping loopback packet\n");
-		dev_kfree_skb_any(skb);
-	}
+	skb->dev = dev;
+	/* XXX get correct PACKET_ type here */
+	skb->pkt_type = PACKET_HOST;
+	netif_receive_skb(skb);
 
 
 repost:
 repost:
 	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))
 	if (unlikely(ipoib_ib_post_receive(dev, wr_id)))

+ 1 - 1
drivers/infiniband/ulp/iser/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_ISER
 config INFINIBAND_ISER
 	tristate "iSCSI Extensions for RDMA (iSER)"
 	tristate "iSCSI Extensions for RDMA (iSER)"
-	depends on INFINIBAND && SCSI && INET
+	depends on SCSI && INET
 	select SCSI_ISCSI_ATTRS
 	select SCSI_ISCSI_ATTRS
 	---help---
 	---help---
 	  Support for the iSCSI Extensions for RDMA (iSER) Protocol
 	  Support for the iSCSI Extensions for RDMA (iSER) Protocol

+ 1 - 1
drivers/infiniband/ulp/srp/Kconfig

@@ -1,6 +1,6 @@
 config INFINIBAND_SRP
 config INFINIBAND_SRP
 	tristate "InfiniBand SCSI RDMA Protocol"
 	tristate "InfiniBand SCSI RDMA Protocol"
-	depends on INFINIBAND && SCSI
+	depends on SCSI
 	---help---
 	---help---
 	  Support for the SCSI RDMA Protocol over InfiniBand.  This
 	  Support for the SCSI RDMA Protocol over InfiniBand.  This
 	  allows you to access storage devices that speak SRP over
 	  allows you to access storage devices that speak SRP over

+ 1 - 1
drivers/net/cxgb3/version.h

@@ -39,6 +39,6 @@
 
 
 /* Firmware version */
 /* Firmware version */
 #define FW_VERSION_MAJOR 4
 #define FW_VERSION_MAJOR 4
-#define FW_VERSION_MINOR 1
+#define FW_VERSION_MINOR 3
 #define FW_VERSION_MICRO 0
 #define FW_VERSION_MICRO 0
 #endif				/* __CHELSIO_VERSION_H */
 #endif				/* __CHELSIO_VERSION_H */

+ 3 - 0
drivers/net/mlx4/fw.c

@@ -138,6 +138,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET		0x35
 #define QUERY_DEV_CAP_ACK_DELAY_OFFSET		0x35
 #define QUERY_DEV_CAP_MTU_WIDTH_OFFSET		0x36
 #define QUERY_DEV_CAP_MTU_WIDTH_OFFSET		0x36
 #define QUERY_DEV_CAP_VL_PORT_OFFSET		0x37
 #define QUERY_DEV_CAP_VL_PORT_OFFSET		0x37
+#define QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET		0x38
 #define QUERY_DEV_CAP_MAX_GID_OFFSET		0x3b
 #define QUERY_DEV_CAP_MAX_GID_OFFSET		0x3b
 #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET	0x3c
 #define QUERY_DEV_CAP_RATE_SUPPORT_OFFSET	0x3c
 #define QUERY_DEV_CAP_MAX_PKEY_OFFSET		0x3f
 #define QUERY_DEV_CAP_MAX_PKEY_OFFSET		0x3f
@@ -220,6 +221,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev_cap->local_ca_ack_delay = field & 0x1f;
 	dev_cap->local_ca_ack_delay = field & 0x1f;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_VL_PORT_OFFSET);
 	dev_cap->num_ports = field & 0xf;
 	dev_cap->num_ports = field & 0xf;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MSG_SZ_OFFSET);
+	dev_cap->max_msg_sz = 1 << (field & 0x1f);
 	MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
 	MLX4_GET(stat_rate, outbox, QUERY_DEV_CAP_RATE_SUPPORT_OFFSET);
 	dev_cap->stat_rate_support = stat_rate;
 	dev_cap->stat_rate_support = stat_rate;
 	MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);
 	MLX4_GET(dev_cap->flags, outbox, QUERY_DEV_CAP_FLAGS_OFFSET);

+ 1 - 0
drivers/net/mlx4/fw.h

@@ -60,6 +60,7 @@ struct mlx4_dev_cap {
 	int max_rdma_global;
 	int max_rdma_global;
 	int local_ca_ack_delay;
 	int local_ca_ack_delay;
 	int num_ports;
 	int num_ports;
+	u32 max_msg_sz;
 	int max_mtu[MLX4_MAX_PORTS + 1];
 	int max_mtu[MLX4_MAX_PORTS + 1];
 	int max_port_width[MLX4_MAX_PORTS + 1];
 	int max_port_width[MLX4_MAX_PORTS + 1];
 	int max_vl[MLX4_MAX_PORTS + 1];
 	int max_vl[MLX4_MAX_PORTS + 1];

+ 1 - 0
drivers/net/mlx4/main.c

@@ -154,6 +154,7 @@ static int __devinit mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev
 	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
 	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
 	dev->caps.mtt_entry_sz	     = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
 	dev->caps.mtt_entry_sz	     = MLX4_MTT_ENTRY_PER_SEG * dev_cap->mtt_entry_sz;
+	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
 	dev->caps.flags		     = dev_cap->flags;
 	dev->caps.flags		     = dev_cap->flags;
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;

+ 1 - 0
drivers/net/mlx4/mlx4.h

@@ -37,6 +37,7 @@
 #ifndef MLX4_H
 #ifndef MLX4_H
 #define MLX4_H
 #define MLX4_H
 
 
+#include <linux/mutex.h>
 #include <linux/radix-tree.h>
 #include <linux/radix-tree.h>
 
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/device.h>

+ 21 - 0
drivers/net/mlx4/qp.c

@@ -277,3 +277,24 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
 	mlx4_CONF_SPECIAL_QP(dev, 0);
 	mlx4_CONF_SPECIAL_QP(dev, 0);
 	mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
 	mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
 }
 }
+
+int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
+		  struct mlx4_qp_context *context)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
+			   MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A);
+	if (!err)
+		memcpy(context, mailbox->buf + 8, sizeof *context);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_qp_query);
+

+ 30 - 0
drivers/net/mlx4/srq.c

@@ -102,6 +102,13 @@ static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
 			MLX4_CMD_TIME_CLASS_B);
 			MLX4_CMD_TIME_CLASS_B);
 }
 }
 
 
+static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
+			  int srq_num)
+{
+	return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ,
+			    MLX4_CMD_TIME_CLASS_A);
+}
+
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
 		   u64 db_rec, struct mlx4_srq *srq)
 		   u64 db_rec, struct mlx4_srq *srq)
 {
 {
@@ -205,6 +212,29 @@ int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark
 }
 }
 EXPORT_SYMBOL_GPL(mlx4_srq_arm);
 EXPORT_SYMBOL_GPL(mlx4_srq_arm);
 
 
+int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_srq_context *srq_context;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	srq_context = mailbox->buf;
+
+	err = mlx4_QUERY_SRQ(dev, mailbox, srq->srqn);
+	if (err)
+		goto err_out;
+	*limit_watermark = srq_context->limit_watermark;
+
+err_out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_srq_query);
+
 int __devinit mlx4_init_srq_table(struct mlx4_dev *dev)
 int __devinit mlx4_init_srq_table(struct mlx4_dev *dev)
 {
 {
 	struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;
 	struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table;

+ 2 - 0
include/linux/mlx4/device.h

@@ -172,6 +172,7 @@ struct mlx4_caps {
 	int			num_pds;
 	int			num_pds;
 	int			reserved_pds;
 	int			reserved_pds;
 	int			mtt_entry_sz;
 	int			mtt_entry_sz;
+	u32			max_msg_sz;
 	u32			page_size_cap;
 	u32			page_size_cap;
 	u32			flags;
 	u32			flags;
 	u16			stat_rate_support;
 	u16			stat_rate_support;
@@ -322,6 +323,7 @@ int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, struct mlx4_mtt *mtt,
 		   u64 db_rec, struct mlx4_srq *srq);
 		   u64 db_rec, struct mlx4_srq *srq);
 void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
 void mlx4_srq_free(struct mlx4_dev *dev, struct mlx4_srq *srq);
 int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
 int mlx4_srq_arm(struct mlx4_dev *dev, struct mlx4_srq *srq, int limit_watermark);
+int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_watermark);
 
 
 int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
 int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);

+ 3 - 0
include/linux/mlx4/qp.h

@@ -282,6 +282,9 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		   struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
 		   struct mlx4_qp_context *context, enum mlx4_qp_optpar optpar,
 		   int sqd_event, struct mlx4_qp *qp);
 		   int sqd_event, struct mlx4_qp *qp);
 
 
+int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
+		  struct mlx4_qp_context *context);
+
 static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 static inline struct mlx4_qp *__mlx4_qp_lookup(struct mlx4_dev *dev, u32 qpn)
 {
 {
 	return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));
 	return radix_tree_lookup(&dev->qp_table_tree, qpn & (dev->caps.num_qps - 1));

+ 0 - 1
include/rdma/ib_cm.h

@@ -385,7 +385,6 @@ struct ib_cm_rep_param {
 	u8		private_data_len;
 	u8		private_data_len;
 	u8		responder_resources;
 	u8		responder_resources;
 	u8		initiator_depth;
 	u8		initiator_depth;
-	u8		target_ack_delay;
 	u8		failover_accepted;
 	u8		failover_accepted;
 	u8		flow_control;
 	u8		flow_control;
 	u8		rnr_retry_count;
 	u8		rnr_retry_count;

+ 3 - 0
include/rdma/ib_mad.h

@@ -111,6 +111,9 @@
 #define IB_QP1_QKEY	0x80010000
 #define IB_QP1_QKEY	0x80010000
 #define IB_QP_SET_QKEY	0x80000000
 #define IB_QP_SET_QKEY	0x80000000
 
 
+#define IB_DEFAULT_PKEY_PARTIAL 0x7FFF
+#define IB_DEFAULT_PKEY_FULL	0xFFFF
+
 enum {
 enum {
 	IB_MGMT_MAD_HDR = 24,
 	IB_MGMT_MAD_HDR = 24,
 	IB_MGMT_MAD_DATA = 232,
 	IB_MGMT_MAD_DATA = 232,