|
@@ -53,6 +53,27 @@
|
|
|
#include "qp.h"
|
|
|
#include "vt.h"
|
|
|
|
|
|
+/*
|
|
|
+ * Note that it is OK to post send work requests in the SQE and ERR
|
|
|
+ * states; rvt_do_send() will process them and generate error
|
|
|
+ * completions as per IB 1.2 C10-96.
|
|
|
+ */
|
|
|
+const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
|
|
|
+ [IB_QPS_RESET] = 0,
|
|
|
+ [IB_QPS_INIT] = RVT_POST_RECV_OK,
|
|
|
+ [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
|
|
|
+ [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
|
|
|
+ RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
|
|
|
+ RVT_PROCESS_NEXT_SEND_OK,
|
|
|
+ [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
|
|
|
+ RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
|
|
|
+ [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
|
|
|
+ RVT_POST_SEND_OK | RVT_FLUSH_SEND,
|
|
|
+ [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
|
|
|
+ RVT_POST_SEND_OK | RVT_FLUSH_SEND,
|
|
|
+};
|
|
|
+EXPORT_SYMBOL(ib_rvt_state_ops);
|
|
|
+
|
|
|
static void get_map_page(struct rvt_qpn_table *qpt,
|
|
|
struct rvt_qpn_map *map,
|
|
|
gfp_t gfp)
|
|
@@ -586,7 +607,7 @@ struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
|
|
|
|
|
|
/*
|
|
|
* Return the address of the RWQ as the offset to mmap.
|
|
|
- * See hfi1_mmap() for details.
|
|
|
+ * See rvt_mmap() for details.
|
|
|
*/
|
|
|
if (udata && udata->outlen >= sizeof(__u64)) {
|
|
|
if (!qp->r_rq.wq) {
|
|
@@ -749,6 +770,118 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
|
|
|
return -EOPNOTSUPP;
|
|
|
}
|
|
|
|
|
|
+/**
|
|
|
+ * rvt_post_one_wr - post one RC, UC, or UD send work request
|
|
|
+ * @qp: the QP to post on
|
|
|
+ * @wr: the work request to send
|
|
|
+ */
|
|
|
+static int rvt_post_one_wr(struct rvt_qp *qp, struct ib_send_wr *wr)
|
|
|
+{
|
|
|
+ struct rvt_swqe *wqe;
|
|
|
+ u32 next;
|
|
|
+ int i;
|
|
|
+ int j;
|
|
|
+ int acc;
|
|
|
+ struct rvt_lkey_table *rkt;
|
|
|
+ struct rvt_pd *pd;
|
|
|
+ struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
|
|
|
+
|
|
|
+ /* IB spec says that num_sge == 0 is OK. */
|
|
|
+ if (unlikely(wr->num_sge > qp->s_max_sge))
|
|
|
+ return -EINVAL;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Don't allow RDMA reads or atomic operations on UC or
|
|
|
+ * undefined operations.
|
|
|
+ * Make sure buffer is large enough to hold the result for atomics.
|
|
|
+ */
|
|
|
+ if (qp->ibqp.qp_type == IB_QPT_UC) {
|
|
|
+ if ((unsigned)wr->opcode >= IB_WR_RDMA_READ)
|
|
|
+ return -EINVAL;
|
|
|
+ } else if (qp->ibqp.qp_type != IB_QPT_RC) {
|
|
|
+ /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
|
|
|
+ if (wr->opcode != IB_WR_SEND &&
|
|
|
+ wr->opcode != IB_WR_SEND_WITH_IMM)
|
|
|
+ return -EINVAL;
|
|
|
+ /* Check UD destination address PD */
|
|
|
+ if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
|
|
|
+ return -EINVAL;
|
|
|
+ } else if ((unsigned)wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD) {
|
|
|
+ return -EINVAL;
|
|
|
+ } else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
|
|
|
+ (wr->num_sge == 0 ||
|
|
|
+ wr->sg_list[0].length < sizeof(u64) ||
|
|
|
+ wr->sg_list[0].addr & (sizeof(u64) - 1))) {
|
|
|
+ return -EINVAL;
|
|
|
+ } else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic) {
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
+
|
|
|
+ next = qp->s_head + 1;
|
|
|
+ if (next >= qp->s_size)
|
|
|
+ next = 0;
|
|
|
+ if (next == qp->s_last)
|
|
|
+ return -ENOMEM;
|
|
|
+
|
|
|
+ rkt = &rdi->lkey_table;
|
|
|
+ pd = ibpd_to_rvtpd(qp->ibqp.pd);
|
|
|
+ wqe = rvt_get_swqe_ptr(qp, qp->s_head);
|
|
|
+
|
|
|
+ if (qp->ibqp.qp_type != IB_QPT_UC &&
|
|
|
+ qp->ibqp.qp_type != IB_QPT_RC)
|
|
|
+ memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
|
|
|
+ else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
|
|
|
+ wr->opcode == IB_WR_RDMA_WRITE ||
|
|
|
+ wr->opcode == IB_WR_RDMA_READ)
|
|
|
+ memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
|
|
|
+ else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
|
|
|
+ wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
|
|
|
+ memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
|
|
|
+ else
|
|
|
+ memcpy(&wqe->wr, wr, sizeof(wqe->wr));
|
|
|
+
|
|
|
+ wqe->length = 0;
|
|
|
+ j = 0;
|
|
|
+ if (wr->num_sge) {
|
|
|
+ acc = wr->opcode >= IB_WR_RDMA_READ ?
|
|
|
+ IB_ACCESS_LOCAL_WRITE : 0;
|
|
|
+ for (i = 0; i < wr->num_sge; i++) {
|
|
|
+ u32 length = wr->sg_list[i].length;
|
|
|
+ int ok;
|
|
|
+
|
|
|
+ if (length == 0)
|
|
|
+ continue;
|
|
|
+ ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
|
|
|
+ &wr->sg_list[i], acc);
|
|
|
+ if (!ok)
|
|
|
+ goto bail_inval_free;
|
|
|
+ wqe->length += length;
|
|
|
+ j++;
|
|
|
+ }
|
|
|
+ wqe->wr.num_sge = j;
|
|
|
+ }
|
|
|
+ if (qp->ibqp.qp_type == IB_QPT_UC ||
|
|
|
+ qp->ibqp.qp_type == IB_QPT_RC) {
|
|
|
+ if (wqe->length > 0x80000000U)
|
|
|
+ goto bail_inval_free;
|
|
|
+ } else {
|
|
|
+ atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
|
|
|
+ }
|
|
|
+ wqe->ssn = qp->s_ssn++;
|
|
|
+ qp->s_head = next;
|
|
|
+
|
|
|
+ return 0;
|
|
|
+
|
|
|
+bail_inval_free:
|
|
|
+ /* release mr holds */
|
|
|
+ while (j) {
|
|
|
+ struct rvt_sge *sge = &wqe->sg_list[--j];
|
|
|
+
|
|
|
+ rvt_put_mr(sge->mr);
|
|
|
+ }
|
|
|
+ return -EINVAL;
|
|
|
+}
|
|
|
+
|
|
|
/**
|
|
|
* rvt_post_send - post a send on a QP
|
|
|
* @ibqp: the QP to post the send on
|
|
@@ -760,20 +893,46 @@ int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
|
|
|
int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
|
|
|
struct ib_send_wr **bad_wr)
|
|
|
{
|
|
|
+ struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
|
|
|
+ struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
|
|
|
+ unsigned long flags = 0;
|
|
|
+ int call_send;
|
|
|
+ unsigned nreq = 0;
|
|
|
+ int err = 0;
|
|
|
+
|
|
|
+ spin_lock_irqsave(&qp->s_lock, flags);
|
|
|
+
|
|
|
/*
|
|
|
- * VT-DRIVER-API: do_send()
|
|
|
- * Driver needs to have a do_send() call which is a single entry point
|
|
|
- * to take an already formed packet and throw it out on the wire. Once
|
|
|
- * the packet is sent the driver needs to make an upcall to rvt so the
|
|
|
- * completion queue can be notified and/or any other outstanding
|
|
|
- * work/book keeping can be finished.
|
|
|
- *
|
|
|
- * Note that there should also be a way for rvt to protect itself
|
|
|
- * against hangs in the driver layer. If a send doesn't actually
|
|
|
- * complete in a timely manor rvt needs to return an error event.
|
|
|
+ * Ensure QP state is such that we can send. If not bail out early,
|
|
|
+ * there is no need to do this every time we post a send.
|
|
|
*/
|
|
|
+ if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
|
|
|
+ spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
+ return -EINVAL;
|
|
|
+ }
|
|
|
|
|
|
- return -EOPNOTSUPP;
|
|
|
+ /*
|
|
|
+ * If the send queue is empty, and we only have a single WR then just go
|
|
|
+ * ahead and kick the send engine into gear. Otherwise we will always
|
|
|
+ * just schedule the send to happen later.
|
|
|
+ */
|
|
|
+ call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
|
|
|
+
|
|
|
+ for (; wr; wr = wr->next) {
|
|
|
+ err = rvt_post_one_wr(qp, wr);
|
|
|
+ if (unlikely(err)) {
|
|
|
+ *bad_wr = wr;
|
|
|
+ goto bail;
|
|
|
+ }
|
|
|
+ nreq++;
|
|
|
+ }
|
|
|
+bail:
|
|
|
+ if (nreq && !call_send)
|
|
|
+ rdi->driver_f.schedule_send(qp);
|
|
|
+ spin_unlock_irqrestore(&qp->s_lock, flags);
|
|
|
+ if (nreq && call_send)
|
|
|
+ rdi->driver_f.do_send(qp);
|
|
|
+ return err;
|
|
|
}
|
|
|
|
|
|
/**
|