|
@@ -34,33 +34,29 @@
|
|
#include <linux/kthread.h>
|
|
#include <linux/kthread.h>
|
|
#include <linux/types.h>
|
|
#include <linux/types.h>
|
|
#include <linux/debugfs.h>
|
|
#include <linux/debugfs.h>
|
|
|
|
+#include <linux/blk-mq.h>
|
|
|
|
|
|
#include <asm/uaccess.h>
|
|
#include <asm/uaccess.h>
|
|
#include <asm/types.h>
|
|
#include <asm/types.h>
|
|
|
|
|
|
#include <linux/nbd.h>
|
|
#include <linux/nbd.h>
|
|
|
|
|
|
|
|
+#define NBD_TIMEDOUT 0
|
|
|
|
+#define NBD_DISCONNECT_REQUESTED 1
|
|
|
|
+
|
|
struct nbd_device {
|
|
struct nbd_device {
|
|
u32 flags;
|
|
u32 flags;
|
|
|
|
+ unsigned long runtime_flags;
|
|
struct socket * sock; /* If == NULL, device is not ready, yet */
|
|
struct socket * sock; /* If == NULL, device is not ready, yet */
|
|
int magic;
|
|
int magic;
|
|
|
|
|
|
- spinlock_t queue_lock;
|
|
|
|
- struct list_head queue_head; /* Requests waiting result */
|
|
|
|
- struct request *active_req;
|
|
|
|
- wait_queue_head_t active_wq;
|
|
|
|
- struct list_head waiting_queue; /* Requests to be sent */
|
|
|
|
- wait_queue_head_t waiting_wq;
|
|
|
|
|
|
+ struct blk_mq_tag_set tag_set;
|
|
|
|
|
|
struct mutex tx_lock;
|
|
struct mutex tx_lock;
|
|
struct gendisk *disk;
|
|
struct gendisk *disk;
|
|
int blksize;
|
|
int blksize;
|
|
loff_t bytesize;
|
|
loff_t bytesize;
|
|
- int xmit_timeout;
|
|
|
|
- bool timedout;
|
|
|
|
- bool disconnect; /* a disconnect has been requested by user */
|
|
|
|
|
|
|
|
- struct timer_list timeout_timer;
|
|
|
|
/* protects initialization and shutdown of the socket */
|
|
/* protects initialization and shutdown of the socket */
|
|
spinlock_t sock_lock;
|
|
spinlock_t sock_lock;
|
|
struct task_struct *task_recv;
|
|
struct task_struct *task_recv;
|
|
@@ -71,6 +67,11 @@ struct nbd_device {
|
|
#endif
|
|
#endif
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+struct nbd_cmd {
|
|
|
|
+ struct nbd_device *nbd;
|
|
|
|
+ struct list_head list;
|
|
|
|
+};
|
|
|
|
+
|
|
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
|
#if IS_ENABLED(CONFIG_DEBUG_FS)
|
|
static struct dentry *nbd_dbg_dir;
|
|
static struct dentry *nbd_dbg_dir;
|
|
#endif
|
|
#endif
|
|
@@ -83,18 +84,6 @@ static unsigned int nbds_max = 16;
|
|
static struct nbd_device *nbd_dev;
|
|
static struct nbd_device *nbd_dev;
|
|
static int max_part;
|
|
static int max_part;
|
|
|
|
|
|
-/*
|
|
|
|
- * Use just one lock (or at most 1 per NIC). Two arguments for this:
|
|
|
|
- * 1. Each NIC is essentially a synchronization point for all servers
|
|
|
|
- * accessed through that NIC so there's no need to have more locks
|
|
|
|
- * than NICs anyway.
|
|
|
|
- * 2. More locks lead to more "Dirty cache line bouncing" which will slow
|
|
|
|
- * down each lock to the point where they're actually slower than just
|
|
|
|
- * a single lock.
|
|
|
|
- * Thanks go to Jens Axboe and Al Viro for their LKML emails explaining this!
|
|
|
|
- */
|
|
|
|
-static DEFINE_SPINLOCK(nbd_lock);
|
|
|
|
-
|
|
|
|
static inline struct device *nbd_to_dev(struct nbd_device *nbd)
|
|
static inline struct device *nbd_to_dev(struct nbd_device *nbd)
|
|
{
|
|
{
|
|
return disk_to_dev(nbd->disk);
|
|
return disk_to_dev(nbd->disk);
|
|
@@ -153,18 +142,16 @@ static int nbd_size_set(struct nbd_device *nbd, struct block_device *bdev,
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-static void nbd_end_request(struct nbd_device *nbd, struct request *req)
|
|
|
|
|
|
+static void nbd_end_request(struct nbd_cmd *cmd)
|
|
{
|
|
{
|
|
|
|
+ struct nbd_device *nbd = cmd->nbd;
|
|
|
|
+ struct request *req = blk_mq_rq_from_pdu(cmd);
|
|
int error = req->errors ? -EIO : 0;
|
|
int error = req->errors ? -EIO : 0;
|
|
- struct request_queue *q = req->q;
|
|
|
|
- unsigned long flags;
|
|
|
|
|
|
|
|
- dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", req,
|
|
|
|
|
|
+ dev_dbg(nbd_to_dev(nbd), "request %p: %s\n", cmd,
|
|
error ? "failed" : "done");
|
|
error ? "failed" : "done");
|
|
|
|
|
|
- spin_lock_irqsave(q->queue_lock, flags);
|
|
|
|
- __blk_end_request_all(req, error);
|
|
|
|
- spin_unlock_irqrestore(q->queue_lock, flags);
|
|
|
|
|
|
+ blk_mq_complete_request(req, error);
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -172,40 +159,49 @@ static void nbd_end_request(struct nbd_device *nbd, struct request *req)
|
|
*/
|
|
*/
|
|
static void sock_shutdown(struct nbd_device *nbd)
|
|
static void sock_shutdown(struct nbd_device *nbd)
|
|
{
|
|
{
|
|
- spin_lock_irq(&nbd->sock_lock);
|
|
|
|
|
|
+ struct socket *sock;
|
|
|
|
+
|
|
|
|
+ spin_lock(&nbd->sock_lock);
|
|
|
|
|
|
if (!nbd->sock) {
|
|
if (!nbd->sock) {
|
|
spin_unlock_irq(&nbd->sock_lock);
|
|
spin_unlock_irq(&nbd->sock_lock);
|
|
return;
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+ sock = nbd->sock;
|
|
dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
|
|
dev_warn(disk_to_dev(nbd->disk), "shutting down socket\n");
|
|
- kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
|
|
|
|
- sockfd_put(nbd->sock);
|
|
|
|
nbd->sock = NULL;
|
|
nbd->sock = NULL;
|
|
- spin_unlock_irq(&nbd->sock_lock);
|
|
|
|
|
|
+ spin_unlock(&nbd->sock_lock);
|
|
|
|
|
|
- del_timer(&nbd->timeout_timer);
|
|
|
|
|
|
+ kernel_sock_shutdown(sock, SHUT_RDWR);
|
|
|
|
+ sockfd_put(sock);
|
|
}
|
|
}
|
|
|
|
|
|
-static void nbd_xmit_timeout(unsigned long arg)
|
|
|
|
|
|
+static enum blk_eh_timer_return nbd_xmit_timeout(struct request *req,
|
|
|
|
+ bool reserved)
|
|
{
|
|
{
|
|
- struct nbd_device *nbd = (struct nbd_device *)arg;
|
|
|
|
- unsigned long flags;
|
|
|
|
-
|
|
|
|
- if (list_empty(&nbd->queue_head))
|
|
|
|
- return;
|
|
|
|
|
|
+ struct nbd_cmd *cmd = blk_mq_rq_to_pdu(req);
|
|
|
|
+ struct nbd_device *nbd = cmd->nbd;
|
|
|
|
+ struct socket *sock = NULL;
|
|
|
|
|
|
- spin_lock_irqsave(&nbd->sock_lock, flags);
|
|
|
|
|
|
+ spin_lock(&nbd->sock_lock);
|
|
|
|
|
|
- nbd->timedout = true;
|
|
|
|
|
|
+ set_bit(NBD_TIMEDOUT, &nbd->runtime_flags);
|
|
|
|
|
|
- if (nbd->sock)
|
|
|
|
- kernel_sock_shutdown(nbd->sock, SHUT_RDWR);
|
|
|
|
|
|
+ if (nbd->sock) {
|
|
|
|
+ sock = nbd->sock;
|
|
|
|
+ get_file(sock->file);
|
|
|
|
+ }
|
|
|
|
|
|
- spin_unlock_irqrestore(&nbd->sock_lock, flags);
|
|
|
|
|
|
+ spin_unlock(&nbd->sock_lock);
|
|
|
|
+ if (sock) {
|
|
|
|
+ kernel_sock_shutdown(sock, SHUT_RDWR);
|
|
|
|
+ sockfd_put(sock);
|
|
|
|
+ }
|
|
|
|
|
|
|
|
+ req->errors++;
|
|
dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
|
|
dev_err(nbd_to_dev(nbd), "Connection timed out, shutting down connection\n");
|
|
|
|
+ return BLK_EH_HANDLED;
|
|
}
|
|
}
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -255,9 +251,6 @@ static int sock_xmit(struct nbd_device *nbd, int send, void *buf, int size,
|
|
|
|
|
|
tsk_restore_flags(current, pflags, PF_MEMALLOC);
|
|
tsk_restore_flags(current, pflags, PF_MEMALLOC);
|
|
|
|
|
|
- if (!send && nbd->xmit_timeout)
|
|
|
|
- mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
|
|
|
|
-
|
|
|
|
return result;
|
|
return result;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -273,8 +266,9 @@ static inline int sock_send_bvec(struct nbd_device *nbd, struct bio_vec *bvec,
|
|
}
|
|
}
|
|
|
|
|
|
/* always call with the tx_lock held */
|
|
/* always call with the tx_lock held */
|
|
-static int nbd_send_req(struct nbd_device *nbd, struct request *req)
|
|
|
|
|
|
+static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd)
|
|
{
|
|
{
|
|
|
|
+ struct request *req = blk_mq_rq_from_pdu(cmd);
|
|
int result, flags;
|
|
int result, flags;
|
|
struct nbd_request request;
|
|
struct nbd_request request;
|
|
unsigned long size = blk_rq_bytes(req);
|
|
unsigned long size = blk_rq_bytes(req);
|
|
@@ -298,10 +292,10 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
|
|
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
|
|
request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
|
|
request.len = htonl(size);
|
|
request.len = htonl(size);
|
|
}
|
|
}
|
|
- memcpy(request.handle, &req, sizeof(req));
|
|
|
|
|
|
+ memcpy(request.handle, &req->tag, sizeof(req->tag));
|
|
|
|
|
|
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
|
|
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
|
|
- req, nbdcmd_to_ascii(type),
|
|
|
|
|
|
+ cmd, nbdcmd_to_ascii(type),
|
|
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
|
|
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
|
|
result = sock_xmit(nbd, 1, &request, sizeof(request),
|
|
result = sock_xmit(nbd, 1, &request, sizeof(request),
|
|
(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
|
|
(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
|
|
@@ -323,7 +317,7 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
|
|
if (!rq_iter_last(bvec, iter))
|
|
if (!rq_iter_last(bvec, iter))
|
|
flags = MSG_MORE;
|
|
flags = MSG_MORE;
|
|
dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
|
|
dev_dbg(nbd_to_dev(nbd), "request %p: sending %d bytes data\n",
|
|
- req, bvec.bv_len);
|
|
|
|
|
|
+ cmd, bvec.bv_len);
|
|
result = sock_send_bvec(nbd, &bvec, flags);
|
|
result = sock_send_bvec(nbd, &bvec, flags);
|
|
if (result <= 0) {
|
|
if (result <= 0) {
|
|
dev_err(disk_to_dev(nbd->disk),
|
|
dev_err(disk_to_dev(nbd->disk),
|
|
@@ -336,29 +330,6 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
-static struct request *nbd_find_request(struct nbd_device *nbd,
|
|
|
|
- struct request *xreq)
|
|
|
|
-{
|
|
|
|
- struct request *req, *tmp;
|
|
|
|
- int err;
|
|
|
|
-
|
|
|
|
- err = wait_event_interruptible(nbd->active_wq, nbd->active_req != xreq);
|
|
|
|
- if (unlikely(err))
|
|
|
|
- return ERR_PTR(err);
|
|
|
|
-
|
|
|
|
- spin_lock(&nbd->queue_lock);
|
|
|
|
- list_for_each_entry_safe(req, tmp, &nbd->queue_head, queuelist) {
|
|
|
|
- if (req != xreq)
|
|
|
|
- continue;
|
|
|
|
- list_del_init(&req->queuelist);
|
|
|
|
- spin_unlock(&nbd->queue_lock);
|
|
|
|
- return req;
|
|
|
|
- }
|
|
|
|
- spin_unlock(&nbd->queue_lock);
|
|
|
|
-
|
|
|
|
- return ERR_PTR(-ENOENT);
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
|
|
static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
|
|
{
|
|
{
|
|
int result;
|
|
int result;
|
|
@@ -370,11 +341,14 @@ static inline int sock_recv_bvec(struct nbd_device *nbd, struct bio_vec *bvec)
|
|
}
|
|
}
|
|
|
|
|
|
/* NULL returned = something went wrong, inform userspace */
|
|
/* NULL returned = something went wrong, inform userspace */
|
|
-static struct request *nbd_read_stat(struct nbd_device *nbd)
|
|
|
|
|
|
+static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd)
|
|
{
|
|
{
|
|
int result;
|
|
int result;
|
|
struct nbd_reply reply;
|
|
struct nbd_reply reply;
|
|
- struct request *req;
|
|
|
|
|
|
+ struct nbd_cmd *cmd;
|
|
|
|
+ struct request *req = NULL;
|
|
|
|
+ u16 hwq;
|
|
|
|
+ int tag;
|
|
|
|
|
|
reply.magic = 0;
|
|
reply.magic = 0;
|
|
result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
|
|
result = sock_xmit(nbd, 0, &reply, sizeof(reply), MSG_WAITALL);
|
|
@@ -390,25 +364,27 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
|
|
return ERR_PTR(-EPROTO);
|
|
return ERR_PTR(-EPROTO);
|
|
}
|
|
}
|
|
|
|
|
|
- req = nbd_find_request(nbd, *(struct request **)reply.handle);
|
|
|
|
- if (IS_ERR(req)) {
|
|
|
|
- result = PTR_ERR(req);
|
|
|
|
- if (result != -ENOENT)
|
|
|
|
- return ERR_PTR(result);
|
|
|
|
|
|
+ memcpy(&tag, reply.handle, sizeof(int));
|
|
|
|
|
|
- dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%p)\n",
|
|
|
|
- reply.handle);
|
|
|
|
- return ERR_PTR(-EBADR);
|
|
|
|
|
|
+ hwq = blk_mq_unique_tag_to_hwq(tag);
|
|
|
|
+ if (hwq < nbd->tag_set.nr_hw_queues)
|
|
|
|
+ req = blk_mq_tag_to_rq(nbd->tag_set.tags[hwq],
|
|
|
|
+ blk_mq_unique_tag_to_tag(tag));
|
|
|
|
+ if (!req || !blk_mq_request_started(req)) {
|
|
|
|
+ dev_err(disk_to_dev(nbd->disk), "Unexpected reply (%d) %p\n",
|
|
|
|
+ tag, req);
|
|
|
|
+ return ERR_PTR(-ENOENT);
|
|
}
|
|
}
|
|
|
|
+ cmd = blk_mq_rq_to_pdu(req);
|
|
|
|
|
|
if (ntohl(reply.error)) {
|
|
if (ntohl(reply.error)) {
|
|
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
|
|
dev_err(disk_to_dev(nbd->disk), "Other side returned error (%d)\n",
|
|
ntohl(reply.error));
|
|
ntohl(reply.error));
|
|
req->errors++;
|
|
req->errors++;
|
|
- return req;
|
|
|
|
|
|
+ return cmd;
|
|
}
|
|
}
|
|
|
|
|
|
- dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
|
|
|
|
|
|
+ dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", cmd);
|
|
if (rq_data_dir(req) != WRITE) {
|
|
if (rq_data_dir(req) != WRITE) {
|
|
struct req_iterator iter;
|
|
struct req_iterator iter;
|
|
struct bio_vec bvec;
|
|
struct bio_vec bvec;
|
|
@@ -419,13 +395,13 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
|
|
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
|
|
dev_err(disk_to_dev(nbd->disk), "Receive data failed (result %d)\n",
|
|
result);
|
|
result);
|
|
req->errors++;
|
|
req->errors++;
|
|
- return req;
|
|
|
|
|
|
+ return cmd;
|
|
}
|
|
}
|
|
dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
|
|
dev_dbg(nbd_to_dev(nbd), "request %p: got %d bytes data\n",
|
|
- req, bvec.bv_len);
|
|
|
|
|
|
+ cmd, bvec.bv_len);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
- return req;
|
|
|
|
|
|
+ return cmd;
|
|
}
|
|
}
|
|
|
|
|
|
static ssize_t pid_show(struct device *dev,
|
|
static ssize_t pid_show(struct device *dev,
|
|
@@ -444,7 +420,7 @@ static struct device_attribute pid_attr = {
|
|
|
|
|
|
static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
|
|
static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
|
|
{
|
|
{
|
|
- struct request *req;
|
|
|
|
|
|
+ struct nbd_cmd *cmd;
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
BUG_ON(nbd->magic != NBD_MAGIC);
|
|
BUG_ON(nbd->magic != NBD_MAGIC);
|
|
@@ -460,13 +436,13 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
|
|
nbd_size_update(nbd, bdev);
|
|
nbd_size_update(nbd, bdev);
|
|
|
|
|
|
while (1) {
|
|
while (1) {
|
|
- req = nbd_read_stat(nbd);
|
|
|
|
- if (IS_ERR(req)) {
|
|
|
|
- ret = PTR_ERR(req);
|
|
|
|
|
|
+ cmd = nbd_read_stat(nbd);
|
|
|
|
+ if (IS_ERR(cmd)) {
|
|
|
|
+ ret = PTR_ERR(cmd);
|
|
break;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
- nbd_end_request(nbd, req);
|
|
|
|
|
|
+ nbd_end_request(cmd);
|
|
}
|
|
}
|
|
|
|
|
|
nbd_size_clear(nbd, bdev);
|
|
nbd_size_clear(nbd, bdev);
|
|
@@ -475,44 +451,37 @@ static int nbd_thread_recv(struct nbd_device *nbd, struct block_device *bdev)
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
-static void nbd_clear_que(struct nbd_device *nbd)
|
|
|
|
|
|
+static void nbd_clear_req(struct request *req, void *data, bool reserved)
|
|
{
|
|
{
|
|
- struct request *req;
|
|
|
|
|
|
+ struct nbd_cmd *cmd;
|
|
|
|
+
|
|
|
|
+ if (!blk_mq_request_started(req))
|
|
|
|
+ return;
|
|
|
|
+ cmd = blk_mq_rq_to_pdu(req);
|
|
|
|
+ req->errors++;
|
|
|
|
+ nbd_end_request(cmd);
|
|
|
|
+}
|
|
|
|
|
|
|
|
+static void nbd_clear_que(struct nbd_device *nbd)
|
|
|
|
+{
|
|
BUG_ON(nbd->magic != NBD_MAGIC);
|
|
BUG_ON(nbd->magic != NBD_MAGIC);
|
|
|
|
|
|
/*
|
|
/*
|
|
* Because we have set nbd->sock to NULL under the tx_lock, all
|
|
* Because we have set nbd->sock to NULL under the tx_lock, all
|
|
- * modifications to the list must have completed by now. For
|
|
|
|
- * the same reason, the active_req must be NULL.
|
|
|
|
- *
|
|
|
|
- * As a consequence, we don't need to take the spin lock while
|
|
|
|
- * purging the list here.
|
|
|
|
|
|
+ * modifications to the list must have completed by now.
|
|
*/
|
|
*/
|
|
BUG_ON(nbd->sock);
|
|
BUG_ON(nbd->sock);
|
|
- BUG_ON(nbd->active_req);
|
|
|
|
|
|
|
|
- while (!list_empty(&nbd->queue_head)) {
|
|
|
|
- req = list_entry(nbd->queue_head.next, struct request,
|
|
|
|
- queuelist);
|
|
|
|
- list_del_init(&req->queuelist);
|
|
|
|
- req->errors++;
|
|
|
|
- nbd_end_request(nbd, req);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- while (!list_empty(&nbd->waiting_queue)) {
|
|
|
|
- req = list_entry(nbd->waiting_queue.next, struct request,
|
|
|
|
- queuelist);
|
|
|
|
- list_del_init(&req->queuelist);
|
|
|
|
- req->errors++;
|
|
|
|
- nbd_end_request(nbd, req);
|
|
|
|
- }
|
|
|
|
|
|
+ blk_mq_tagset_busy_iter(&nbd->tag_set, nbd_clear_req, NULL);
|
|
dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
|
|
dev_dbg(disk_to_dev(nbd->disk), "queue cleared\n");
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
-static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
|
|
|
|
|
|
+static void nbd_handle_cmd(struct nbd_cmd *cmd)
|
|
{
|
|
{
|
|
|
|
+ struct request *req = blk_mq_rq_from_pdu(cmd);
|
|
|
|
+ struct nbd_device *nbd = cmd->nbd;
|
|
|
|
+
|
|
if (req->cmd_type != REQ_TYPE_FS)
|
|
if (req->cmd_type != REQ_TYPE_FS)
|
|
goto error_out;
|
|
goto error_out;
|
|
|
|
|
|
@@ -526,6 +495,7 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
|
|
req->errors = 0;
|
|
req->errors = 0;
|
|
|
|
|
|
mutex_lock(&nbd->tx_lock);
|
|
mutex_lock(&nbd->tx_lock);
|
|
|
|
+ nbd->task_send = current;
|
|
if (unlikely(!nbd->sock)) {
|
|
if (unlikely(!nbd->sock)) {
|
|
mutex_unlock(&nbd->tx_lock);
|
|
mutex_unlock(&nbd->tx_lock);
|
|
dev_err(disk_to_dev(nbd->disk),
|
|
dev_err(disk_to_dev(nbd->disk),
|
|
@@ -533,106 +503,30 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
|
|
goto error_out;
|
|
goto error_out;
|
|
}
|
|
}
|
|
|
|
|
|
- nbd->active_req = req;
|
|
|
|
-
|
|
|
|
- if (nbd->xmit_timeout && list_empty_careful(&nbd->queue_head))
|
|
|
|
- mod_timer(&nbd->timeout_timer, jiffies + nbd->xmit_timeout);
|
|
|
|
-
|
|
|
|
- if (nbd_send_req(nbd, req) != 0) {
|
|
|
|
|
|
+ if (nbd_send_cmd(nbd, cmd) != 0) {
|
|
dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
|
|
dev_err(disk_to_dev(nbd->disk), "Request send failed\n");
|
|
req->errors++;
|
|
req->errors++;
|
|
- nbd_end_request(nbd, req);
|
|
|
|
- } else {
|
|
|
|
- spin_lock(&nbd->queue_lock);
|
|
|
|
- list_add_tail(&req->queuelist, &nbd->queue_head);
|
|
|
|
- spin_unlock(&nbd->queue_lock);
|
|
|
|
|
|
+ nbd_end_request(cmd);
|
|
}
|
|
}
|
|
|
|
|
|
- nbd->active_req = NULL;
|
|
|
|
|
|
+ nbd->task_send = NULL;
|
|
mutex_unlock(&nbd->tx_lock);
|
|
mutex_unlock(&nbd->tx_lock);
|
|
- wake_up_all(&nbd->active_wq);
|
|
|
|
|
|
|
|
return;
|
|
return;
|
|
|
|
|
|
error_out:
|
|
error_out:
|
|
req->errors++;
|
|
req->errors++;
|
|
- nbd_end_request(nbd, req);
|
|
|
|
|
|
+ nbd_end_request(cmd);
|
|
}
|
|
}
|
|
|
|
|
|
-static int nbd_thread_send(void *data)
|
|
|
|
|
|
+static int nbd_queue_rq(struct blk_mq_hw_ctx *hctx,
|
|
|
|
+ const struct blk_mq_queue_data *bd)
|
|
{
|
|
{
|
|
- struct nbd_device *nbd = data;
|
|
|
|
- struct request *req;
|
|
|
|
-
|
|
|
|
- nbd->task_send = current;
|
|
|
|
-
|
|
|
|
- set_user_nice(current, MIN_NICE);
|
|
|
|
- while (!kthread_should_stop() || !list_empty(&nbd->waiting_queue)) {
|
|
|
|
- /* wait for something to do */
|
|
|
|
- wait_event_interruptible(nbd->waiting_wq,
|
|
|
|
- kthread_should_stop() ||
|
|
|
|
- !list_empty(&nbd->waiting_queue));
|
|
|
|
-
|
|
|
|
- /* extract request */
|
|
|
|
- if (list_empty(&nbd->waiting_queue))
|
|
|
|
- continue;
|
|
|
|
-
|
|
|
|
- spin_lock_irq(&nbd->queue_lock);
|
|
|
|
- req = list_entry(nbd->waiting_queue.next, struct request,
|
|
|
|
- queuelist);
|
|
|
|
- list_del_init(&req->queuelist);
|
|
|
|
- spin_unlock_irq(&nbd->queue_lock);
|
|
|
|
-
|
|
|
|
- /* handle request */
|
|
|
|
- nbd_handle_req(nbd, req);
|
|
|
|
- }
|
|
|
|
|
|
+ struct nbd_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
|
|
|
|
|
|
- nbd->task_send = NULL;
|
|
|
|
-
|
|
|
|
- return 0;
|
|
|
|
-}
|
|
|
|
-
|
|
|
|
-/*
|
|
|
|
- * We always wait for result of write, for now. It would be nice to make it optional
|
|
|
|
- * in future
|
|
|
|
- * if ((rq_data_dir(req) == WRITE) && (nbd->flags & NBD_WRITE_NOCHK))
|
|
|
|
- * { printk( "Warning: Ignoring result!\n"); nbd_end_request( req ); }
|
|
|
|
- */
|
|
|
|
-
|
|
|
|
-static void nbd_request_handler(struct request_queue *q)
|
|
|
|
- __releases(q->queue_lock) __acquires(q->queue_lock)
|
|
|
|
-{
|
|
|
|
- struct request *req;
|
|
|
|
-
|
|
|
|
- while ((req = blk_fetch_request(q)) != NULL) {
|
|
|
|
- struct nbd_device *nbd;
|
|
|
|
-
|
|
|
|
- spin_unlock_irq(q->queue_lock);
|
|
|
|
-
|
|
|
|
- nbd = req->rq_disk->private_data;
|
|
|
|
-
|
|
|
|
- BUG_ON(nbd->magic != NBD_MAGIC);
|
|
|
|
-
|
|
|
|
- dev_dbg(nbd_to_dev(nbd), "request %p: dequeued (flags=%x)\n",
|
|
|
|
- req, req->cmd_type);
|
|
|
|
-
|
|
|
|
- if (unlikely(!nbd->sock)) {
|
|
|
|
- dev_err_ratelimited(disk_to_dev(nbd->disk),
|
|
|
|
- "Attempted send on closed socket\n");
|
|
|
|
- req->errors++;
|
|
|
|
- nbd_end_request(nbd, req);
|
|
|
|
- spin_lock_irq(q->queue_lock);
|
|
|
|
- continue;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- spin_lock_irq(&nbd->queue_lock);
|
|
|
|
- list_add_tail(&req->queuelist, &nbd->waiting_queue);
|
|
|
|
- spin_unlock_irq(&nbd->queue_lock);
|
|
|
|
-
|
|
|
|
- wake_up(&nbd->waiting_wq);
|
|
|
|
-
|
|
|
|
- spin_lock_irq(q->queue_lock);
|
|
|
|
- }
|
|
|
|
|
|
+ blk_mq_start_request(bd->rq);
|
|
|
|
+ nbd_handle_cmd(cmd);
|
|
|
|
+ return BLK_MQ_RQ_QUEUE_OK;
|
|
}
|
|
}
|
|
|
|
|
|
static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
|
|
static int nbd_set_socket(struct nbd_device *nbd, struct socket *sock)
|
|
@@ -657,15 +551,13 @@ out:
|
|
/* Reset all properties of an NBD device */
|
|
/* Reset all properties of an NBD device */
|
|
static void nbd_reset(struct nbd_device *nbd)
|
|
static void nbd_reset(struct nbd_device *nbd)
|
|
{
|
|
{
|
|
- nbd->disconnect = false;
|
|
|
|
- nbd->timedout = false;
|
|
|
|
|
|
+ nbd->runtime_flags = 0;
|
|
nbd->blksize = 1024;
|
|
nbd->blksize = 1024;
|
|
nbd->bytesize = 0;
|
|
nbd->bytesize = 0;
|
|
set_capacity(nbd->disk, 0);
|
|
set_capacity(nbd->disk, 0);
|
|
nbd->flags = 0;
|
|
nbd->flags = 0;
|
|
- nbd->xmit_timeout = 0;
|
|
|
|
|
|
+ nbd->tag_set.timeout = 0;
|
|
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
|
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, nbd->disk->queue);
|
|
- del_timer_sync(&nbd->timeout_timer);
|
|
|
|
}
|
|
}
|
|
|
|
|
|
static void nbd_bdev_reset(struct block_device *bdev)
|
|
static void nbd_bdev_reset(struct block_device *bdev)
|
|
@@ -700,33 +592,37 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
|
{
|
|
{
|
|
switch (cmd) {
|
|
switch (cmd) {
|
|
case NBD_DISCONNECT: {
|
|
case NBD_DISCONNECT: {
|
|
- struct request sreq;
|
|
|
|
|
|
+ struct request *sreq;
|
|
|
|
|
|
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
|
|
dev_info(disk_to_dev(nbd->disk), "NBD_DISCONNECT\n");
|
|
if (!nbd->sock)
|
|
if (!nbd->sock)
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
+ sreq = blk_mq_alloc_request(bdev_get_queue(bdev), WRITE, 0);
|
|
|
|
+ if (!sreq)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+
|
|
mutex_unlock(&nbd->tx_lock);
|
|
mutex_unlock(&nbd->tx_lock);
|
|
fsync_bdev(bdev);
|
|
fsync_bdev(bdev);
|
|
mutex_lock(&nbd->tx_lock);
|
|
mutex_lock(&nbd->tx_lock);
|
|
- blk_rq_init(NULL, &sreq);
|
|
|
|
- sreq.cmd_type = REQ_TYPE_DRV_PRIV;
|
|
|
|
|
|
+ sreq->cmd_type = REQ_TYPE_DRV_PRIV;
|
|
|
|
|
|
/* Check again after getting mutex back. */
|
|
/* Check again after getting mutex back. */
|
|
- if (!nbd->sock)
|
|
|
|
|
|
+ if (!nbd->sock) {
|
|
|
|
+ blk_mq_free_request(sreq);
|
|
return -EINVAL;
|
|
return -EINVAL;
|
|
|
|
+ }
|
|
|
|
|
|
- nbd->disconnect = true;
|
|
|
|
|
|
+ set_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags);
|
|
|
|
|
|
- nbd_send_req(nbd, &sreq);
|
|
|
|
|
|
+ nbd_send_cmd(nbd, blk_mq_rq_to_pdu(sreq));
|
|
|
|
+ blk_mq_free_request(sreq);
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
|
|
case NBD_CLEAR_SOCK:
|
|
case NBD_CLEAR_SOCK:
|
|
sock_shutdown(nbd);
|
|
sock_shutdown(nbd);
|
|
nbd_clear_que(nbd);
|
|
nbd_clear_que(nbd);
|
|
- BUG_ON(!list_empty(&nbd->queue_head));
|
|
|
|
- BUG_ON(!list_empty(&nbd->waiting_queue));
|
|
|
|
kill_bdev(bdev);
|
|
kill_bdev(bdev);
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
@@ -758,13 +654,7 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
|
return nbd_size_set(nbd, bdev, nbd->blksize, arg);
|
|
return nbd_size_set(nbd, bdev, nbd->blksize, arg);
|
|
|
|
|
|
case NBD_SET_TIMEOUT:
|
|
case NBD_SET_TIMEOUT:
|
|
- nbd->xmit_timeout = arg * HZ;
|
|
|
|
- if (arg)
|
|
|
|
- mod_timer(&nbd->timeout_timer,
|
|
|
|
- jiffies + nbd->xmit_timeout);
|
|
|
|
- else
|
|
|
|
- del_timer_sync(&nbd->timeout_timer);
|
|
|
|
-
|
|
|
|
|
|
+ nbd->tag_set.timeout = arg * HZ;
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
case NBD_SET_FLAGS:
|
|
case NBD_SET_FLAGS:
|
|
@@ -772,7 +662,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
case NBD_DO_IT: {
|
|
case NBD_DO_IT: {
|
|
- struct task_struct *thread;
|
|
|
|
int error;
|
|
int error;
|
|
|
|
|
|
if (nbd->task_recv)
|
|
if (nbd->task_recv)
|
|
@@ -786,18 +675,9 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
|
|
|
|
|
nbd_parse_flags(nbd, bdev);
|
|
nbd_parse_flags(nbd, bdev);
|
|
|
|
|
|
- thread = kthread_run(nbd_thread_send, nbd, "%s",
|
|
|
|
- nbd_name(nbd));
|
|
|
|
- if (IS_ERR(thread)) {
|
|
|
|
- mutex_lock(&nbd->tx_lock);
|
|
|
|
- nbd->task_recv = NULL;
|
|
|
|
- return PTR_ERR(thread);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
nbd_dev_dbg_init(nbd);
|
|
nbd_dev_dbg_init(nbd);
|
|
error = nbd_thread_recv(nbd, bdev);
|
|
error = nbd_thread_recv(nbd, bdev);
|
|
nbd_dev_dbg_close(nbd);
|
|
nbd_dev_dbg_close(nbd);
|
|
- kthread_stop(thread);
|
|
|
|
|
|
|
|
mutex_lock(&nbd->tx_lock);
|
|
mutex_lock(&nbd->tx_lock);
|
|
nbd->task_recv = NULL;
|
|
nbd->task_recv = NULL;
|
|
@@ -807,9 +687,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
|
kill_bdev(bdev);
|
|
kill_bdev(bdev);
|
|
nbd_bdev_reset(bdev);
|
|
nbd_bdev_reset(bdev);
|
|
|
|
|
|
- if (nbd->disconnect) /* user requested, ignore socket errors */
|
|
|
|
|
|
+ /* user requested, ignore socket errors */
|
|
|
|
+ if (test_bit(NBD_DISCONNECT_REQUESTED, &nbd->runtime_flags))
|
|
error = 0;
|
|
error = 0;
|
|
- if (nbd->timedout)
|
|
|
|
|
|
+ if (test_bit(NBD_TIMEDOUT, &nbd->runtime_flags))
|
|
error = -ETIMEDOUT;
|
|
error = -ETIMEDOUT;
|
|
|
|
|
|
nbd_reset(nbd);
|
|
nbd_reset(nbd);
|
|
@@ -825,10 +706,10 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
case NBD_PRINT_DEBUG:
|
|
case NBD_PRINT_DEBUG:
|
|
- dev_info(disk_to_dev(nbd->disk),
|
|
|
|
- "next = %p, prev = %p, head = %p\n",
|
|
|
|
- nbd->queue_head.next, nbd->queue_head.prev,
|
|
|
|
- &nbd->queue_head);
|
|
|
|
|
|
+ /*
|
|
|
|
+ * For compatibility only, we no longer keep a list of
|
|
|
|
+ * outstanding requests.
|
|
|
|
+ */
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
return -ENOTTY;
|
|
return -ENOTTY;
|
|
@@ -935,7 +816,7 @@ static int nbd_dev_dbg_init(struct nbd_device *nbd)
|
|
|
|
|
|
debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
|
|
debugfs_create_file("tasks", 0444, dir, nbd, &nbd_dbg_tasks_ops);
|
|
debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
|
|
debugfs_create_u64("size_bytes", 0444, dir, &nbd->bytesize);
|
|
- debugfs_create_u32("timeout", 0444, dir, &nbd->xmit_timeout);
|
|
|
|
|
|
+ debugfs_create_u32("timeout", 0444, dir, &nbd->tag_set.timeout);
|
|
debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
|
|
debugfs_create_u32("blocksize", 0444, dir, &nbd->blksize);
|
|
debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
|
|
debugfs_create_file("flags", 0444, dir, nbd, &nbd_dbg_flags_ops);
|
|
|
|
|
|
@@ -987,6 +868,24 @@ static void nbd_dbg_close(void)
|
|
|
|
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
+static int nbd_init_request(void *data, struct request *rq,
|
|
|
|
+ unsigned int hctx_idx, unsigned int request_idx,
|
|
|
|
+ unsigned int numa_node)
|
|
|
|
+{
|
|
|
|
+ struct nbd_cmd *cmd = blk_mq_rq_to_pdu(rq);
|
|
|
|
+
|
|
|
|
+ cmd->nbd = data;
|
|
|
|
+ INIT_LIST_HEAD(&cmd->list);
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static struct blk_mq_ops nbd_mq_ops = {
|
|
|
|
+ .queue_rq = nbd_queue_rq,
|
|
|
|
+ .map_queue = blk_mq_map_queue,
|
|
|
|
+ .init_request = nbd_init_request,
|
|
|
|
+ .timeout = nbd_xmit_timeout,
|
|
|
|
+};
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* And here should be modules and kernel interface
|
|
* And here should be modules and kernel interface
|
|
* (Just smiley confuses emacs :-)
|
|
* (Just smiley confuses emacs :-)
|
|
@@ -1035,16 +934,34 @@ static int __init nbd_init(void)
|
|
if (!disk)
|
|
if (!disk)
|
|
goto out;
|
|
goto out;
|
|
nbd_dev[i].disk = disk;
|
|
nbd_dev[i].disk = disk;
|
|
|
|
+
|
|
|
|
+ nbd_dev[i].tag_set.ops = &nbd_mq_ops;
|
|
|
|
+ nbd_dev[i].tag_set.nr_hw_queues = 1;
|
|
|
|
+ nbd_dev[i].tag_set.queue_depth = 128;
|
|
|
|
+ nbd_dev[i].tag_set.numa_node = NUMA_NO_NODE;
|
|
|
|
+ nbd_dev[i].tag_set.cmd_size = sizeof(struct nbd_cmd);
|
|
|
|
+ nbd_dev[i].tag_set.flags = BLK_MQ_F_SHOULD_MERGE |
|
|
|
|
+ BLK_MQ_F_SG_MERGE | BLK_MQ_F_BLOCKING;
|
|
|
|
+ nbd_dev[i].tag_set.driver_data = &nbd_dev[i];
|
|
|
|
+
|
|
|
|
+ err = blk_mq_alloc_tag_set(&nbd_dev[i].tag_set);
|
|
|
|
+ if (err) {
|
|
|
|
+ put_disk(disk);
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* The new linux 2.5 block layer implementation requires
|
|
* The new linux 2.5 block layer implementation requires
|
|
* every gendisk to have its very own request_queue struct.
|
|
* every gendisk to have its very own request_queue struct.
|
|
* These structs are big so we dynamically allocate them.
|
|
* These structs are big so we dynamically allocate them.
|
|
*/
|
|
*/
|
|
- disk->queue = blk_init_queue(nbd_request_handler, &nbd_lock);
|
|
|
|
|
|
+ disk->queue = blk_mq_init_queue(&nbd_dev[i].tag_set);
|
|
if (!disk->queue) {
|
|
if (!disk->queue) {
|
|
|
|
+ blk_mq_free_tag_set(&nbd_dev[i].tag_set);
|
|
put_disk(disk);
|
|
put_disk(disk);
|
|
goto out;
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* Tell the block layer that we are not a rotational device
|
|
* Tell the block layer that we are not a rotational device
|
|
*/
|
|
*/
|
|
@@ -1069,16 +986,8 @@ static int __init nbd_init(void)
|
|
for (i = 0; i < nbds_max; i++) {
|
|
for (i = 0; i < nbds_max; i++) {
|
|
struct gendisk *disk = nbd_dev[i].disk;
|
|
struct gendisk *disk = nbd_dev[i].disk;
|
|
nbd_dev[i].magic = NBD_MAGIC;
|
|
nbd_dev[i].magic = NBD_MAGIC;
|
|
- INIT_LIST_HEAD(&nbd_dev[i].waiting_queue);
|
|
|
|
- spin_lock_init(&nbd_dev[i].queue_lock);
|
|
|
|
spin_lock_init(&nbd_dev[i].sock_lock);
|
|
spin_lock_init(&nbd_dev[i].sock_lock);
|
|
- INIT_LIST_HEAD(&nbd_dev[i].queue_head);
|
|
|
|
mutex_init(&nbd_dev[i].tx_lock);
|
|
mutex_init(&nbd_dev[i].tx_lock);
|
|
- init_timer(&nbd_dev[i].timeout_timer);
|
|
|
|
- nbd_dev[i].timeout_timer.function = nbd_xmit_timeout;
|
|
|
|
- nbd_dev[i].timeout_timer.data = (unsigned long)&nbd_dev[i];
|
|
|
|
- init_waitqueue_head(&nbd_dev[i].active_wq);
|
|
|
|
- init_waitqueue_head(&nbd_dev[i].waiting_wq);
|
|
|
|
disk->major = NBD_MAJOR;
|
|
disk->major = NBD_MAJOR;
|
|
disk->first_minor = i << part_shift;
|
|
disk->first_minor = i << part_shift;
|
|
disk->fops = &nbd_fops;
|
|
disk->fops = &nbd_fops;
|
|
@@ -1091,6 +1000,7 @@ static int __init nbd_init(void)
|
|
return 0;
|
|
return 0;
|
|
out:
|
|
out:
|
|
while (i--) {
|
|
while (i--) {
|
|
|
|
+ blk_mq_free_tag_set(&nbd_dev[i].tag_set);
|
|
blk_cleanup_queue(nbd_dev[i].disk->queue);
|
|
blk_cleanup_queue(nbd_dev[i].disk->queue);
|
|
put_disk(nbd_dev[i].disk);
|
|
put_disk(nbd_dev[i].disk);
|
|
}
|
|
}
|
|
@@ -1110,6 +1020,7 @@ static void __exit nbd_cleanup(void)
|
|
if (disk) {
|
|
if (disk) {
|
|
del_gendisk(disk);
|
|
del_gendisk(disk);
|
|
blk_cleanup_queue(disk->queue);
|
|
blk_cleanup_queue(disk->queue);
|
|
|
|
+ blk_mq_free_tag_set(&nbd_dev[i].tag_set);
|
|
put_disk(disk);
|
|
put_disk(disk);
|
|
}
|
|
}
|
|
}
|
|
}
|