소스 검색

RDMA/ocrdma: Do proper cleanup even if FW is in error state

If any mailbox command reports timeout, save the state in the driver,
to prevent issuing any more commands to the HW.  Do proper cleanup
even if FW is in error state.

Signed-off-by: Mitesh Ahuja <mitesh.ahuja@emulex.Com>
Signed-off-by: Selvin Xavier <selvin.xavier@emulex.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
Mitesh Ahuja 11 년 전
부모
커밋
6dab02648c
3개의 변경된 파일19개의 추가작업 그리고 2개의 파일을 삭제
  1. 1 0
      drivers/infiniband/hw/ocrdma/ocrdma.h
  2. 7 1
      drivers/infiniband/hw/ocrdma/ocrdma_hw.c
  3. 11 1
      drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

+ 1 - 0
drivers/infiniband/hw/ocrdma/ocrdma.h

@@ -137,6 +137,7 @@ struct mqe_ctx {
 	u16 cqe_status;
 	u16 ext_status;
 	bool cmd_done;
+	bool fw_error_state;
 };
 
 struct ocrdma_hw_mr {

+ 7 - 1
drivers/infiniband/hw/ocrdma/ocrdma_hw.c

@@ -966,8 +966,12 @@ static int ocrdma_wait_mqe_cmpl(struct ocrdma_dev *dev)
 				    msecs_to_jiffies(30000));
 	if (status)
 		return 0;
-	else
+	else {
+		dev->mqe_ctx.fw_error_state = true;
+		pr_err("%s(%d) mailbox timeout: fw not responding\n",
+		       __func__, dev->id);
 		return -1;
+	}
 }
 
 /* issue a mailbox command on the MQ */
@@ -979,6 +983,8 @@ static int ocrdma_mbx_cmd(struct ocrdma_dev *dev, struct ocrdma_mqe *mqe)
 	struct ocrdma_mbx_rsp *rsp = NULL;
 
 	mutex_lock(&dev->mqe_ctx.lock);
+	if (dev->mqe_ctx.fw_error_state)
+		goto mbx_err;
 	ocrdma_post_mqe(dev, mqe);
 	status = ocrdma_wait_mqe_cmpl(dev);
 	if (status)

+ 11 - 1
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

@@ -329,7 +329,10 @@ static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
 	struct ocrdma_pd *pd = uctx->cntxt_pd;
 	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
 
-	BUG_ON(uctx->pd_in_use);
+	if (uctx->pd_in_use) {
+		pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
+		       __func__, dev->id, pd->id);
+	}
 	uctx->cntxt_pd = NULL;
 	status = _ocrdma_dealloc_pd(dev, pd);
 	return status;
@@ -844,6 +847,13 @@ int ocrdma_dereg_mr(struct ib_mr *ib_mr)
 	if (mr->umem)
 		ib_umem_release(mr->umem);
 	kfree(mr);
+
+	/* Don't stop cleanup, in case FW is unresponsive */
+	if (dev->mqe_ctx.fw_error_state) {
+		status = 0;
+		pr_err("%s(%d) fw not responding.\n",
+		       __func__, dev->id);
+	}
 	return status;
 }