|
@@ -96,6 +96,8 @@ static int atomic_dec_return_safe(atomic_t *v)
|
|
#define RBD_MINORS_PER_MAJOR 256
|
|
#define RBD_MINORS_PER_MAJOR 256
|
|
#define RBD_SINGLE_MAJOR_PART_SHIFT 4
|
|
#define RBD_SINGLE_MAJOR_PART_SHIFT 4
|
|
|
|
|
|
|
|
+#define RBD_MAX_PARENT_CHAIN_LEN 16
|
|
|
|
+
|
|
#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
|
|
#define RBD_SNAP_DEV_NAME_PREFIX "snap_"
|
|
#define RBD_MAX_SNAP_NAME_LEN \
|
|
#define RBD_MAX_SNAP_NAME_LEN \
|
|
(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
|
|
(NAME_MAX - (sizeof (RBD_SNAP_DEV_NAME_PREFIX) - 1))
|
|
@@ -426,7 +428,7 @@ static ssize_t rbd_add_single_major(struct bus_type *bus, const char *buf,
|
|
size_t count);
|
|
size_t count);
|
|
static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
|
|
static ssize_t rbd_remove_single_major(struct bus_type *bus, const char *buf,
|
|
size_t count);
|
|
size_t count);
|
|
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping);
|
|
|
|
|
|
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth);
|
|
static void rbd_spec_put(struct rbd_spec *spec);
|
|
static void rbd_spec_put(struct rbd_spec *spec);
|
|
|
|
|
|
static int rbd_dev_id_to_minor(int dev_id)
|
|
static int rbd_dev_id_to_minor(int dev_id)
|
|
@@ -1863,9 +1865,11 @@ static void rbd_osd_req_callback(struct ceph_osd_request *osd_req,
|
|
rbd_osd_read_callback(obj_request);
|
|
rbd_osd_read_callback(obj_request);
|
|
break;
|
|
break;
|
|
case CEPH_OSD_OP_SETALLOCHINT:
|
|
case CEPH_OSD_OP_SETALLOCHINT:
|
|
- rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE);
|
|
|
|
|
|
+ rbd_assert(osd_req->r_ops[1].op == CEPH_OSD_OP_WRITE ||
|
|
|
|
+ osd_req->r_ops[1].op == CEPH_OSD_OP_WRITEFULL);
|
|
/* fall through */
|
|
/* fall through */
|
|
case CEPH_OSD_OP_WRITE:
|
|
case CEPH_OSD_OP_WRITE:
|
|
|
|
+ case CEPH_OSD_OP_WRITEFULL:
|
|
rbd_osd_write_callback(obj_request);
|
|
rbd_osd_write_callback(obj_request);
|
|
break;
|
|
break;
|
|
case CEPH_OSD_OP_STAT:
|
|
case CEPH_OSD_OP_STAT:
|
|
@@ -2401,7 +2405,10 @@ static void rbd_img_obj_request_fill(struct rbd_obj_request *obj_request,
|
|
opcode = CEPH_OSD_OP_ZERO;
|
|
opcode = CEPH_OSD_OP_ZERO;
|
|
}
|
|
}
|
|
} else if (op_type == OBJ_OP_WRITE) {
|
|
} else if (op_type == OBJ_OP_WRITE) {
|
|
- opcode = CEPH_OSD_OP_WRITE;
|
|
|
|
|
|
+ if (!offset && length == object_size)
|
|
|
|
+ opcode = CEPH_OSD_OP_WRITEFULL;
|
|
|
|
+ else
|
|
|
|
+ opcode = CEPH_OSD_OP_WRITE;
|
|
osd_req_op_alloc_hint_init(osd_request, num_ops,
|
|
osd_req_op_alloc_hint_init(osd_request, num_ops,
|
|
object_size, object_size);
|
|
object_size, object_size);
|
|
num_ops++;
|
|
num_ops++;
|
|
@@ -3760,6 +3767,7 @@ static int rbd_init_disk(struct rbd_device *rbd_dev)
|
|
/* set io sizes to object size */
|
|
/* set io sizes to object size */
|
|
segment_size = rbd_obj_bytes(&rbd_dev->header);
|
|
segment_size = rbd_obj_bytes(&rbd_dev->header);
|
|
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
|
|
blk_queue_max_hw_sectors(q, segment_size / SECTOR_SIZE);
|
|
|
|
+ q->limits.max_sectors = queue_max_hw_sectors(q);
|
|
blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
|
|
blk_queue_max_segments(q, segment_size / SECTOR_SIZE);
|
|
blk_queue_max_segment_size(q, segment_size);
|
|
blk_queue_max_segment_size(q, segment_size);
|
|
blk_queue_io_min(q, segment_size);
|
|
blk_queue_io_min(q, segment_size);
|
|
@@ -5125,44 +5133,51 @@ out_err:
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
-static int rbd_dev_probe_parent(struct rbd_device *rbd_dev)
|
|
|
|
|
|
+/*
|
|
|
|
+ * @depth is rbd_dev_image_probe() -> rbd_dev_probe_parent() ->
|
|
|
|
+ * rbd_dev_image_probe() recursion depth, which means it's also the
|
|
|
|
+ * length of the already discovered part of the parent chain.
|
|
|
|
+ */
|
|
|
|
+static int rbd_dev_probe_parent(struct rbd_device *rbd_dev, int depth)
|
|
{
|
|
{
|
|
struct rbd_device *parent = NULL;
|
|
struct rbd_device *parent = NULL;
|
|
- struct rbd_spec *parent_spec;
|
|
|
|
- struct rbd_client *rbdc;
|
|
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
if (!rbd_dev->parent_spec)
|
|
if (!rbd_dev->parent_spec)
|
|
return 0;
|
|
return 0;
|
|
- /*
|
|
|
|
- * We need to pass a reference to the client and the parent
|
|
|
|
- * spec when creating the parent rbd_dev. Images related by
|
|
|
|
- * parent/child relationships always share both.
|
|
|
|
- */
|
|
|
|
- parent_spec = rbd_spec_get(rbd_dev->parent_spec);
|
|
|
|
- rbdc = __rbd_get_client(rbd_dev->rbd_client);
|
|
|
|
|
|
|
|
- ret = -ENOMEM;
|
|
|
|
- parent = rbd_dev_create(rbdc, parent_spec, NULL);
|
|
|
|
- if (!parent)
|
|
|
|
|
|
+ if (++depth > RBD_MAX_PARENT_CHAIN_LEN) {
|
|
|
|
+ pr_info("parent chain is too long (%d)\n", depth);
|
|
|
|
+ ret = -EINVAL;
|
|
goto out_err;
|
|
goto out_err;
|
|
|
|
+ }
|
|
|
|
|
|
- ret = rbd_dev_image_probe(parent, false);
|
|
|
|
|
|
+ parent = rbd_dev_create(rbd_dev->rbd_client, rbd_dev->parent_spec,
|
|
|
|
+ NULL);
|
|
|
|
+ if (!parent) {
|
|
|
|
+ ret = -ENOMEM;
|
|
|
|
+ goto out_err;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Images related by parent/child relationships always share
|
|
|
|
+ * rbd_client and spec/parent_spec, so bump their refcounts.
|
|
|
|
+ */
|
|
|
|
+ __rbd_get_client(rbd_dev->rbd_client);
|
|
|
|
+ rbd_spec_get(rbd_dev->parent_spec);
|
|
|
|
+
|
|
|
|
+ ret = rbd_dev_image_probe(parent, depth);
|
|
if (ret < 0)
|
|
if (ret < 0)
|
|
goto out_err;
|
|
goto out_err;
|
|
|
|
+
|
|
rbd_dev->parent = parent;
|
|
rbd_dev->parent = parent;
|
|
atomic_set(&rbd_dev->parent_ref, 1);
|
|
atomic_set(&rbd_dev->parent_ref, 1);
|
|
-
|
|
|
|
return 0;
|
|
return 0;
|
|
|
|
+
|
|
out_err:
|
|
out_err:
|
|
- if (parent) {
|
|
|
|
- rbd_dev_unparent(rbd_dev);
|
|
|
|
|
|
+ rbd_dev_unparent(rbd_dev);
|
|
|
|
+ if (parent)
|
|
rbd_dev_destroy(parent);
|
|
rbd_dev_destroy(parent);
|
|
- } else {
|
|
|
|
- rbd_put_client(rbdc);
|
|
|
|
- rbd_spec_put(parent_spec);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
return ret;
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
|
|
@@ -5280,7 +5295,7 @@ static void rbd_dev_image_release(struct rbd_device *rbd_dev)
|
|
* parent), initiate a watch on its header object before using that
|
|
* parent), initiate a watch on its header object before using that
|
|
* object to get detailed information about the rbd image.
|
|
* object to get detailed information about the rbd image.
|
|
*/
|
|
*/
|
|
-static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
|
|
|
|
|
|
+static int rbd_dev_image_probe(struct rbd_device *rbd_dev, int depth)
|
|
{
|
|
{
|
|
int ret;
|
|
int ret;
|
|
|
|
|
|
@@ -5298,7 +5313,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
|
|
if (ret)
|
|
if (ret)
|
|
goto err_out_format;
|
|
goto err_out_format;
|
|
|
|
|
|
- if (mapping) {
|
|
|
|
|
|
+ if (!depth) {
|
|
ret = rbd_dev_header_watch_sync(rbd_dev);
|
|
ret = rbd_dev_header_watch_sync(rbd_dev);
|
|
if (ret) {
|
|
if (ret) {
|
|
if (ret == -ENOENT)
|
|
if (ret == -ENOENT)
|
|
@@ -5319,7 +5334,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
|
|
* Otherwise this is a parent image, identified by pool, image
|
|
* Otherwise this is a parent image, identified by pool, image
|
|
* and snap ids - need to fill in names for those ids.
|
|
* and snap ids - need to fill in names for those ids.
|
|
*/
|
|
*/
|
|
- if (mapping)
|
|
|
|
|
|
+ if (!depth)
|
|
ret = rbd_spec_fill_snap_id(rbd_dev);
|
|
ret = rbd_spec_fill_snap_id(rbd_dev);
|
|
else
|
|
else
|
|
ret = rbd_spec_fill_names(rbd_dev);
|
|
ret = rbd_spec_fill_names(rbd_dev);
|
|
@@ -5341,12 +5356,12 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
|
|
* Need to warn users if this image is the one being
|
|
* Need to warn users if this image is the one being
|
|
* mapped and has a parent.
|
|
* mapped and has a parent.
|
|
*/
|
|
*/
|
|
- if (mapping && rbd_dev->parent_spec)
|
|
|
|
|
|
+ if (!depth && rbd_dev->parent_spec)
|
|
rbd_warn(rbd_dev,
|
|
rbd_warn(rbd_dev,
|
|
"WARNING: kernel layering is EXPERIMENTAL!");
|
|
"WARNING: kernel layering is EXPERIMENTAL!");
|
|
}
|
|
}
|
|
|
|
|
|
- ret = rbd_dev_probe_parent(rbd_dev);
|
|
|
|
|
|
+ ret = rbd_dev_probe_parent(rbd_dev, depth);
|
|
if (ret)
|
|
if (ret)
|
|
goto err_out_probe;
|
|
goto err_out_probe;
|
|
|
|
|
|
@@ -5357,7 +5372,7 @@ static int rbd_dev_image_probe(struct rbd_device *rbd_dev, bool mapping)
|
|
err_out_probe:
|
|
err_out_probe:
|
|
rbd_dev_unprobe(rbd_dev);
|
|
rbd_dev_unprobe(rbd_dev);
|
|
err_out_watch:
|
|
err_out_watch:
|
|
- if (mapping)
|
|
|
|
|
|
+ if (!depth)
|
|
rbd_dev_header_unwatch_sync(rbd_dev);
|
|
rbd_dev_header_unwatch_sync(rbd_dev);
|
|
out_header_name:
|
|
out_header_name:
|
|
kfree(rbd_dev->header_name);
|
|
kfree(rbd_dev->header_name);
|
|
@@ -5420,7 +5435,7 @@ static ssize_t do_rbd_add(struct bus_type *bus,
|
|
spec = NULL; /* rbd_dev now owns this */
|
|
spec = NULL; /* rbd_dev now owns this */
|
|
rbd_opts = NULL; /* rbd_dev now owns this */
|
|
rbd_opts = NULL; /* rbd_dev now owns this */
|
|
|
|
|
|
- rc = rbd_dev_image_probe(rbd_dev, true);
|
|
|
|
|
|
+ rc = rbd_dev_image_probe(rbd_dev, 0);
|
|
if (rc < 0)
|
|
if (rc < 0)
|
|
goto err_out_rbd_dev;
|
|
goto err_out_rbd_dev;
|
|
|
|
|