|
@@ -44,6 +44,7 @@
|
|
#include <linux/mutex.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/scatterlist.h>
|
|
#include <linux/bitmap.h>
|
|
#include <linux/bitmap.h>
|
|
|
|
+#include <linux/llist.h>
|
|
|
|
|
|
#include <xen/xen.h>
|
|
#include <xen/xen.h>
|
|
#include <xen/xenbus.h>
|
|
#include <xen/xenbus.h>
|
|
@@ -64,10 +65,17 @@ enum blkif_state {
|
|
BLKIF_STATE_SUSPENDED,
|
|
BLKIF_STATE_SUSPENDED,
|
|
};
|
|
};
|
|
|
|
|
|
|
|
+struct grant {
|
|
|
|
+ grant_ref_t gref;
|
|
|
|
+ unsigned long pfn;
|
|
|
|
+ struct llist_node node;
|
|
|
|
+};
|
|
|
|
+
|
|
struct blk_shadow {
|
|
struct blk_shadow {
|
|
struct blkif_request req;
|
|
struct blkif_request req;
|
|
struct request *request;
|
|
struct request *request;
|
|
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
|
|
+ struct grant *grants_used[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
};
|
|
};
|
|
|
|
|
|
static DEFINE_MUTEX(blkfront_mutex);
|
|
static DEFINE_MUTEX(blkfront_mutex);
|
|
@@ -97,6 +105,8 @@ struct blkfront_info
|
|
struct work_struct work;
|
|
struct work_struct work;
|
|
struct gnttab_free_callback callback;
|
|
struct gnttab_free_callback callback;
|
|
struct blk_shadow shadow[BLK_RING_SIZE];
|
|
struct blk_shadow shadow[BLK_RING_SIZE];
|
|
|
|
+ struct llist_head persistent_gnts;
|
|
|
|
+ unsigned int persistent_gnts_c;
|
|
unsigned long shadow_free;
|
|
unsigned long shadow_free;
|
|
unsigned int feature_flush;
|
|
unsigned int feature_flush;
|
|
unsigned int flush_op;
|
|
unsigned int flush_op;
|
|
@@ -104,6 +114,7 @@ struct blkfront_info
|
|
unsigned int feature_secdiscard:1;
|
|
unsigned int feature_secdiscard:1;
|
|
unsigned int discard_granularity;
|
|
unsigned int discard_granularity;
|
|
unsigned int discard_alignment;
|
|
unsigned int discard_alignment;
|
|
|
|
+ unsigned int feature_persistent:1;
|
|
int is_ready;
|
|
int is_ready;
|
|
};
|
|
};
|
|
|
|
|
|
@@ -287,21 +298,36 @@ static int blkif_queue_request(struct request *req)
|
|
unsigned long id;
|
|
unsigned long id;
|
|
unsigned int fsect, lsect;
|
|
unsigned int fsect, lsect;
|
|
int i, ref;
|
|
int i, ref;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Used to store if we are able to queue the request by just using
|
|
|
|
+ * existing persistent grants, or if we have to get new grants,
|
|
|
|
+ * as there are not sufficiently many free.
|
|
|
|
+ */
|
|
|
|
+ bool new_persistent_gnts;
|
|
grant_ref_t gref_head;
|
|
grant_ref_t gref_head;
|
|
|
|
+ struct page *granted_page;
|
|
|
|
+ struct grant *gnt_list_entry = NULL;
|
|
struct scatterlist *sg;
|
|
struct scatterlist *sg;
|
|
|
|
|
|
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
|
|
if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
|
|
return 1;
|
|
return 1;
|
|
|
|
|
|
- if (gnttab_alloc_grant_references(
|
|
|
|
- BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
|
|
|
|
- gnttab_request_free_callback(
|
|
|
|
- &info->callback,
|
|
|
|
- blkif_restart_queue_callback,
|
|
|
|
- info,
|
|
|
|
- BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
|
|
- return 1;
|
|
|
|
- }
|
|
|
|
|
|
+ /* Check if we have enought grants to allocate a requests */
|
|
|
|
+ if (info->persistent_gnts_c < BLKIF_MAX_SEGMENTS_PER_REQUEST) {
|
|
|
|
+ new_persistent_gnts = 1;
|
|
|
|
+ if (gnttab_alloc_grant_references(
|
|
|
|
+ BLKIF_MAX_SEGMENTS_PER_REQUEST - info->persistent_gnts_c,
|
|
|
|
+ &gref_head) < 0) {
|
|
|
|
+ gnttab_request_free_callback(
|
|
|
|
+ &info->callback,
|
|
|
|
+ blkif_restart_queue_callback,
|
|
|
|
+ info,
|
|
|
|
+ BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
|
|
+ return 1;
|
|
|
|
+ }
|
|
|
|
+ } else
|
|
|
|
+ new_persistent_gnts = 0;
|
|
|
|
|
|
/* Fill out a communications ring structure. */
|
|
/* Fill out a communications ring structure. */
|
|
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
|
|
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
|
|
@@ -341,18 +367,73 @@ static int blkif_queue_request(struct request *req)
|
|
BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
|
|
|
|
|
for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
|
|
for_each_sg(info->sg, sg, ring_req->u.rw.nr_segments, i) {
|
|
- buffer_mfn = pfn_to_mfn(page_to_pfn(sg_page(sg)));
|
|
|
|
fsect = sg->offset >> 9;
|
|
fsect = sg->offset >> 9;
|
|
lsect = fsect + (sg->length >> 9) - 1;
|
|
lsect = fsect + (sg->length >> 9) - 1;
|
|
- /* install a grant reference. */
|
|
|
|
- ref = gnttab_claim_grant_reference(&gref_head);
|
|
|
|
- BUG_ON(ref == -ENOSPC);
|
|
|
|
|
|
|
|
- gnttab_grant_foreign_access_ref(
|
|
|
|
- ref,
|
|
|
|
|
|
+ if (info->persistent_gnts_c) {
|
|
|
|
+ BUG_ON(llist_empty(&info->persistent_gnts));
|
|
|
|
+ gnt_list_entry = llist_entry(
|
|
|
|
+ llist_del_first(&info->persistent_gnts),
|
|
|
|
+ struct grant, node);
|
|
|
|
+
|
|
|
|
+ ref = gnt_list_entry->gref;
|
|
|
|
+ buffer_mfn = pfn_to_mfn(gnt_list_entry->pfn);
|
|
|
|
+ info->persistent_gnts_c--;
|
|
|
|
+ } else {
|
|
|
|
+ ref = gnttab_claim_grant_reference(&gref_head);
|
|
|
|
+ BUG_ON(ref == -ENOSPC);
|
|
|
|
+
|
|
|
|
+ gnt_list_entry =
|
|
|
|
+ kmalloc(sizeof(struct grant),
|
|
|
|
+ GFP_ATOMIC);
|
|
|
|
+ if (!gnt_list_entry)
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+
|
|
|
|
+ granted_page = alloc_page(GFP_ATOMIC);
|
|
|
|
+ if (!granted_page) {
|
|
|
|
+ kfree(gnt_list_entry);
|
|
|
|
+ return -ENOMEM;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ gnt_list_entry->pfn =
|
|
|
|
+ page_to_pfn(granted_page);
|
|
|
|
+ gnt_list_entry->gref = ref;
|
|
|
|
+
|
|
|
|
+ buffer_mfn = pfn_to_mfn(page_to_pfn(
|
|
|
|
+ granted_page));
|
|
|
|
+ gnttab_grant_foreign_access_ref(ref,
|
|
info->xbdev->otherend_id,
|
|
info->xbdev->otherend_id,
|
|
- buffer_mfn,
|
|
|
|
- rq_data_dir(req));
|
|
|
|
|
|
+ buffer_mfn, 0);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ info->shadow[id].grants_used[i] = gnt_list_entry;
|
|
|
|
+
|
|
|
|
+ if (rq_data_dir(req)) {
|
|
|
|
+ char *bvec_data;
|
|
|
|
+ void *shared_data;
|
|
|
|
+
|
|
|
|
+ BUG_ON(sg->offset + sg->length > PAGE_SIZE);
|
|
|
|
+
|
|
|
|
+ shared_data = kmap_atomic(
|
|
|
|
+ pfn_to_page(gnt_list_entry->pfn));
|
|
|
|
+ bvec_data = kmap_atomic(sg_page(sg));
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * this does not wipe data stored outside the
|
|
|
|
+ * range sg->offset..sg->offset+sg->length.
|
|
|
|
+ * Therefore, blkback *could* see data from
|
|
|
|
+ * previous requests. This is OK as long as
|
|
|
|
+ * persistent grants are shared with just one
|
|
|
|
+ * domain. It may need refactoring if this
|
|
|
|
+ * changes
|
|
|
|
+ */
|
|
|
|
+ memcpy(shared_data + sg->offset,
|
|
|
|
+ bvec_data + sg->offset,
|
|
|
|
+ sg->length);
|
|
|
|
+
|
|
|
|
+ kunmap_atomic(bvec_data);
|
|
|
|
+ kunmap_atomic(shared_data);
|
|
|
|
+ }
|
|
|
|
|
|
info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
|
|
info->shadow[id].frame[i] = mfn_to_pfn(buffer_mfn);
|
|
ring_req->u.rw.seg[i] =
|
|
ring_req->u.rw.seg[i] =
|
|
@@ -368,7 +449,8 @@ static int blkif_queue_request(struct request *req)
|
|
/* Keep a private copy so we can reissue requests when recovering. */
|
|
/* Keep a private copy so we can reissue requests when recovering. */
|
|
info->shadow[id].req = *ring_req;
|
|
info->shadow[id].req = *ring_req;
|
|
|
|
|
|
- gnttab_free_grant_references(gref_head);
|
|
|
|
|
|
+ if (new_persistent_gnts)
|
|
|
|
+ gnttab_free_grant_references(gref_head);
|
|
|
|
|
|
return 0;
|
|
return 0;
|
|
}
|
|
}
|
|
@@ -480,12 +562,13 @@ static int xlvbd_init_blk_queue(struct gendisk *gd, u16 sector_size)
|
|
static void xlvbd_flush(struct blkfront_info *info)
|
|
static void xlvbd_flush(struct blkfront_info *info)
|
|
{
|
|
{
|
|
blk_queue_flush(info->rq, info->feature_flush);
|
|
blk_queue_flush(info->rq, info->feature_flush);
|
|
- printk(KERN_INFO "blkfront: %s: %s: %s\n",
|
|
|
|
|
|
+ printk(KERN_INFO "blkfront: %s: %s: %s %s\n",
|
|
info->gd->disk_name,
|
|
info->gd->disk_name,
|
|
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
|
|
info->flush_op == BLKIF_OP_WRITE_BARRIER ?
|
|
"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
|
|
"barrier" : (info->flush_op == BLKIF_OP_FLUSH_DISKCACHE ?
|
|
"flush diskcache" : "barrier or flush"),
|
|
"flush diskcache" : "barrier or flush"),
|
|
- info->feature_flush ? "enabled" : "disabled");
|
|
|
|
|
|
+ info->feature_flush ? "enabled" : "disabled",
|
|
|
|
+ info->feature_persistent ? "using persistent grants" : "");
|
|
}
|
|
}
|
|
|
|
|
|
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
|
|
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
|
|
@@ -707,6 +790,9 @@ static void blkif_restart_queue(struct work_struct *work)
|
|
|
|
|
|
static void blkif_free(struct blkfront_info *info, int suspend)
|
|
static void blkif_free(struct blkfront_info *info, int suspend)
|
|
{
|
|
{
|
|
|
|
+ struct llist_node *all_gnts;
|
|
|
|
+ struct grant *persistent_gnt;
|
|
|
|
+
|
|
/* Prevent new requests being issued until we fix things up. */
|
|
/* Prevent new requests being issued until we fix things up. */
|
|
spin_lock_irq(&info->io_lock);
|
|
spin_lock_irq(&info->io_lock);
|
|
info->connected = suspend ?
|
|
info->connected = suspend ?
|
|
@@ -714,6 +800,17 @@ static void blkif_free(struct blkfront_info *info, int suspend)
|
|
/* No more blkif_request(). */
|
|
/* No more blkif_request(). */
|
|
if (info->rq)
|
|
if (info->rq)
|
|
blk_stop_queue(info->rq);
|
|
blk_stop_queue(info->rq);
|
|
|
|
+
|
|
|
|
+ /* Remove all persistent grants */
|
|
|
|
+ if (info->persistent_gnts_c) {
|
|
|
|
+ all_gnts = llist_del_all(&info->persistent_gnts);
|
|
|
|
+ llist_for_each_entry(persistent_gnt, all_gnts, node) {
|
|
|
|
+ gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL);
|
|
|
|
+ kfree(persistent_gnt);
|
|
|
|
+ }
|
|
|
|
+ info->persistent_gnts_c = 0;
|
|
|
|
+ }
|
|
|
|
+
|
|
/* No more gnttab callback work. */
|
|
/* No more gnttab callback work. */
|
|
gnttab_cancel_free_callback(&info->callback);
|
|
gnttab_cancel_free_callback(&info->callback);
|
|
spin_unlock_irq(&info->io_lock);
|
|
spin_unlock_irq(&info->io_lock);
|
|
@@ -734,13 +831,43 @@ static void blkif_free(struct blkfront_info *info, int suspend)
|
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
-static void blkif_completion(struct blk_shadow *s)
|
|
|
|
|
|
+static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info,
|
|
|
|
+ struct blkif_response *bret)
|
|
{
|
|
{
|
|
int i;
|
|
int i;
|
|
- /* Do not let BLKIF_OP_DISCARD as nr_segment is in the same place
|
|
|
|
- * flag. */
|
|
|
|
- for (i = 0; i < s->req.u.rw.nr_segments; i++)
|
|
|
|
- gnttab_end_foreign_access(s->req.u.rw.seg[i].gref, 0, 0UL);
|
|
|
|
|
|
+ struct bio_vec *bvec;
|
|
|
|
+ struct req_iterator iter;
|
|
|
|
+ unsigned long flags;
|
|
|
|
+ char *bvec_data;
|
|
|
|
+ void *shared_data;
|
|
|
|
+ unsigned int offset = 0;
|
|
|
|
+
|
|
|
|
+ if (bret->operation == BLKIF_OP_READ) {
|
|
|
|
+ /*
|
|
|
|
+ * Copy the data received from the backend into the bvec.
|
|
|
|
+ * Since bv_offset can be different than 0, and bv_len different
|
|
|
|
+ * than PAGE_SIZE, we have to keep track of the current offset,
|
|
|
|
+ * to be sure we are copying the data from the right shared page.
|
|
|
|
+ */
|
|
|
|
+ rq_for_each_segment(bvec, s->request, iter) {
|
|
|
|
+ BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE);
|
|
|
|
+ i = offset >> PAGE_SHIFT;
|
|
|
|
+ BUG_ON(i >= s->req.u.rw.nr_segments);
|
|
|
|
+ shared_data = kmap_atomic(
|
|
|
|
+ pfn_to_page(s->grants_used[i]->pfn));
|
|
|
|
+ bvec_data = bvec_kmap_irq(bvec, &flags);
|
|
|
|
+ memcpy(bvec_data, shared_data + bvec->bv_offset,
|
|
|
|
+ bvec->bv_len);
|
|
|
|
+ bvec_kunmap_irq(bvec_data, &flags);
|
|
|
|
+ kunmap_atomic(shared_data);
|
|
|
|
+ offset += bvec->bv_len;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ /* Add the persistent grant into the list of free grants */
|
|
|
|
+ for (i = 0; i < s->req.u.rw.nr_segments; i++) {
|
|
|
|
+ llist_add(&s->grants_used[i]->node, &info->persistent_gnts);
|
|
|
|
+ info->persistent_gnts_c++;
|
|
|
|
+ }
|
|
}
|
|
}
|
|
|
|
|
|
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
|
static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
|
@@ -783,7 +910,7 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
|
|
req = info->shadow[id].request;
|
|
req = info->shadow[id].request;
|
|
|
|
|
|
if (bret->operation != BLKIF_OP_DISCARD)
|
|
if (bret->operation != BLKIF_OP_DISCARD)
|
|
- blkif_completion(&info->shadow[id]);
|
|
|
|
|
|
+ blkif_completion(&info->shadow[id], info, bret);
|
|
|
|
|
|
if (add_id_to_freelist(info, id)) {
|
|
if (add_id_to_freelist(info, id)) {
|
|
WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
|
|
WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
|
|
@@ -942,6 +1069,11 @@ again:
|
|
message = "writing protocol";
|
|
message = "writing protocol";
|
|
goto abort_transaction;
|
|
goto abort_transaction;
|
|
}
|
|
}
|
|
|
|
+ err = xenbus_printf(xbt, dev->nodename,
|
|
|
|
+ "feature-persistent", "%u", 1);
|
|
|
|
+ if (err)
|
|
|
|
+ dev_warn(&dev->dev,
|
|
|
|
+ "writing persistent grants feature to xenbus");
|
|
|
|
|
|
err = xenbus_transaction_end(xbt, 0);
|
|
err = xenbus_transaction_end(xbt, 0);
|
|
if (err) {
|
|
if (err) {
|
|
@@ -1029,6 +1161,8 @@ static int blkfront_probe(struct xenbus_device *dev,
|
|
spin_lock_init(&info->io_lock);
|
|
spin_lock_init(&info->io_lock);
|
|
info->xbdev = dev;
|
|
info->xbdev = dev;
|
|
info->vdevice = vdevice;
|
|
info->vdevice = vdevice;
|
|
|
|
+ init_llist_head(&info->persistent_gnts);
|
|
|
|
+ info->persistent_gnts_c = 0;
|
|
info->connected = BLKIF_STATE_DISCONNECTED;
|
|
info->connected = BLKIF_STATE_DISCONNECTED;
|
|
INIT_WORK(&info->work, blkif_restart_queue);
|
|
INIT_WORK(&info->work, blkif_restart_queue);
|
|
|
|
|
|
@@ -1093,7 +1227,7 @@ static int blkif_recover(struct blkfront_info *info)
|
|
req->u.rw.seg[j].gref,
|
|
req->u.rw.seg[j].gref,
|
|
info->xbdev->otherend_id,
|
|
info->xbdev->otherend_id,
|
|
pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
|
|
pfn_to_mfn(info->shadow[req->u.rw.id].frame[j]),
|
|
- rq_data_dir(info->shadow[req->u.rw.id].request));
|
|
|
|
|
|
+ 0);
|
|
}
|
|
}
|
|
info->shadow[req->u.rw.id].req = *req;
|
|
info->shadow[req->u.rw.id].req = *req;
|
|
|
|
|
|
@@ -1225,7 +1359,7 @@ static void blkfront_connect(struct blkfront_info *info)
|
|
unsigned long sector_size;
|
|
unsigned long sector_size;
|
|
unsigned int binfo;
|
|
unsigned int binfo;
|
|
int err;
|
|
int err;
|
|
- int barrier, flush, discard;
|
|
|
|
|
|
+ int barrier, flush, discard, persistent;
|
|
|
|
|
|
switch (info->connected) {
|
|
switch (info->connected) {
|
|
case BLKIF_STATE_CONNECTED:
|
|
case BLKIF_STATE_CONNECTED:
|
|
@@ -1303,6 +1437,14 @@ static void blkfront_connect(struct blkfront_info *info)
|
|
if (!err && discard)
|
|
if (!err && discard)
|
|
blkfront_setup_discard(info);
|
|
blkfront_setup_discard(info);
|
|
|
|
|
|
|
|
+ err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
|
|
|
|
+ "feature-persistent", "%u", &persistent,
|
|
|
|
+ NULL);
|
|
|
|
+ if (err)
|
|
|
|
+ info->feature_persistent = 0;
|
|
|
|
+ else
|
|
|
|
+ info->feature_persistent = persistent;
|
|
|
|
+
|
|
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
|
|
err = xlvbd_alloc_gendisk(sectors, info, binfo, sector_size);
|
|
if (err) {
|
|
if (err) {
|
|
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
|
|
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
|