On 07/11/2012 02:36 PM, Josh Durgin wrote:
> On 07/11/2012 07:02 AM, Alex Elder wrote:
>> An rbd device's pool name is used to specify the pool to use for an
>> rbd image when it gets mapped (via rbd_add()). However, only the
>> pool id can be relied on to be invariant--the name of a pool can
>> change at any time.
>>
>> This means that the name of the pool is potentially stale as soon as
>> the image is mapped, so it's a bad idea to record this information.
>> So only store the pool id, not its name, in an rbd_dev.
>>
>> Here are a few specific notes about this change:
>> - The "pool" name device attribute (/sys/bus/rbd/devices/<N>/pool)
>> goes away. In its place is a "pool_id" attribute that provide
>> the numeric pool id for the rbd image.
>
> We're using the pool name for udev to provide a predictable device name
> (/dev/rbd/poolname/imagename[@snapname]), so we probably want to keep
> the sysfs attribute. I don't think there's a good way for us to detect
> pool renames right now though, so we should document that the pool
> name reported does not reflect any potential renames.
OK, now that I've thought about this...
I'm going to pull this patch from the series entirely.
In its place, I'll do a patch that makes the pool name
get allocated dynamically (like the patches that follow
this one). I will also put in place a patch that exposes
the pool id via the /sys/bus/rbd/devices/<N>/pool_id though.
Long term we could switch to using the pool id for the
predictable device name. All format 1 images would
share the same pool (which would need to be assigned a
reserved pool id different from the empty string).
Which brings up another thing...
It would be good to provide an efficient tool to allow
a customer to convert a format 1 rbd image into format 2.
It seems like it would mostly involve object renames,
however they would all have to be done transactionally.
-Alex
>> - rbd_add_parse_args() now returns a pointer to a dynamically-
>> allocated copy of the pool name taken from the arguments.
>> - rbd_add() has been changed to handle freeing the pool name,
>> both in error cases and in the normal case after the pool id
>> has been recorded.
>>
>> Signed-off-by: Alex Elder<[email protected]>
>> ---
>> drivers/block/rbd.c | 74
>> +++++++++++++++++++++++++++++++-------------------
>> 1 files changed, 46 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
>> index 3aa0ca0..76e978c 100644
>> --- a/drivers/block/rbd.c
>> +++ b/drivers/block/rbd.c
>> @@ -56,7 +56,6 @@
>> #define RBD_MINORS_PER_MAJOR 256 /* max minors per blkdev */
>>
>> #define RBD_MAX_MD_NAME_LEN (RBD_MAX_OBJ_NAME_LEN +
>> sizeof(RBD_SUFFIX))
>> -#define RBD_MAX_POOL_NAME_LEN 64
>> #define RBD_MAX_SNAP_NAME_LEN 32
>> #define RBD_MAX_OPT_LEN 1024
>>
>> @@ -166,8 +165,7 @@ struct rbd_device {
>> char obj[RBD_MAX_OBJ_NAME_LEN]; /* rbd image name */
>> int obj_len;
>> char obj_md_name[RBD_MAX_MD_NAME_LEN]; /* hdr nm. */
>> - char pool_name[RBD_MAX_POOL_NAME_LEN];
>> - int poolid;
>> + int pool_id;
>>
>> struct ceph_osd_event *watch_event;
>> struct ceph_osd_request *watch_request;
>> @@ -930,7 +928,7 @@ static int rbd_do_request(struct request *rq,
>> layout->fl_stripe_unit = cpu_to_le32(1<< RBD_MAX_OBJ_ORDER);
>> layout->fl_stripe_count = cpu_to_le32(1);
>> layout->fl_object_size = cpu_to_le32(1<< RBD_MAX_OBJ_ORDER);
>> - layout->fl_pg_pool = cpu_to_le32(dev->poolid);
>> + layout->fl_pg_pool = cpu_to_le32(dev->pool_id);
>> ceph_calc_raw_layout(osdc, layout, snapid, ofs,&len,&bno,
>> req, ops);
>>
>> @@ -1653,7 +1651,7 @@ static int rbd_header_add_snap(struct rbd_device
>> *dev,
>> return -EINVAL;
>>
>> monc =&dev->rbd_client->client->monc;
>> - ret = ceph_monc_create_snapid(monc, dev->poolid,&new_snapid);
>> + ret = ceph_monc_create_snapid(monc, dev->pool_id,&new_snapid);
>> dout("created snapid=%lld\n", new_snapid);
>> if (ret< 0)
>> return ret;
>> @@ -1851,12 +1849,12 @@ static ssize_t rbd_client_id_show(struct device
>> *dev,
>> ceph_client_id(rbd_dev->rbd_client->client));
>> }
>>
>> -static ssize_t rbd_pool_show(struct device *dev,
>> +static ssize_t rbd_pool_id_show(struct device *dev,
>> struct device_attribute *attr, char *buf)
>> {
>> struct rbd_device *rbd_dev = dev_to_rbd_dev(dev);
>>
>> - return sprintf(buf, "%s\n", rbd_dev->pool_name);
>> + return sprintf(buf, "%d\n", rbd_dev->pool_id);
>> }
>>
>> static ssize_t rbd_name_show(struct device *dev,
>> @@ -1898,7 +1896,7 @@ static ssize_t rbd_image_refresh(struct device
>> *dev,
>> static DEVICE_ATTR(size, S_IRUGO, rbd_size_show, NULL);
>> static DEVICE_ATTR(major, S_IRUGO, rbd_major_show, NULL);
>> static DEVICE_ATTR(client_id, S_IRUGO, rbd_client_id_show, NULL);
>> -static DEVICE_ATTR(pool, S_IRUGO, rbd_pool_show, NULL);
>> +static DEVICE_ATTR(pool_id, S_IRUGO, rbd_pool_id_show, NULL);
>> static DEVICE_ATTR(name, S_IRUGO, rbd_name_show, NULL);
>> static DEVICE_ATTR(refresh, S_IWUSR, NULL, rbd_image_refresh);
>> static DEVICE_ATTR(current_snap, S_IRUGO, rbd_snap_show, NULL);
>> @@ -1908,7 +1906,7 @@ static struct attribute *rbd_attrs[] = {
>> &dev_attr_size.attr,
>> &dev_attr_major.attr,
>> &dev_attr_client_id.attr,
>> - &dev_attr_pool.attr,
>> + &dev_attr_pool_id.attr,
>> &dev_attr_name.attr,
>> &dev_attr_current_snap.attr,
>> &dev_attr_refresh.attr,
>> @@ -2329,25 +2327,31 @@ static inline char *dup_token(const char **buf,
>> size_t *lenp)
>> }
>>
>> /*
>> - * This fills in the pool_name, obj, obj_len, snap_name, obj_len,
>> - * rbd_dev, rbd_md_name, and name fields of the given rbd_dev, based
>> - * on the list of monitor addresses and other options provided via
>> - * /sys/bus/rbd/add.
>> + * This fills in the obj, obj_len, snap_name, rbd_dev, rbd_md_name,
>> + * and name fields of the given rbd_dev, based on the list of
>> + * monitor addresses and other options provided via /sys/bus/rbd/add.
>> + *
>> + * Returns a pointer to a dynamically-allocated buffer containing
>> + * the pool name provided, or a pointer-coded errno if an error
>> + * occurs.
>> */
>> -static int rbd_add_parse_args(struct rbd_device *rbd_dev,
>> +static char *rbd_add_parse_args(struct rbd_device *rbd_dev,
>> const char *buf,
>> const char **mon_addrs,
>> size_t *mon_addrs_size,
>> char *options,
>> size_t options_size)
>> {
>> - size_t len;
>> + size_t len;
>> + int ret = -EINVAL;
>> + char *pool_name = NULL;
>>
>> /* The first four tokens are required */
>>
>> len = next_token(&buf);
>> if (!len)
>> - return -EINVAL;
>> + goto out_err;
>> +
>> *mon_addrs_size = len + 1;
>> *mon_addrs = buf;
>>
>> @@ -2355,15 +2359,17 @@ static int rbd_add_parse_args(struct rbd_device
>> *rbd_dev,
>>
>> len = copy_token(&buf, options, options_size);
>> if (!len || len>= options_size)
>> - return -EINVAL;
>> + goto out_err;
>>
>> - len = copy_token(&buf, rbd_dev->pool_name, sizeof
>> (rbd_dev->pool_name));
>> - if (!len || len>= sizeof (rbd_dev->pool_name))
>> - return -EINVAL;
>> + pool_name = dup_token(&buf, NULL);
>> + if (!pool_name) {
>> + ret = -ENOMEM;
>> + goto out_err;
>> + }
>>
>> len = copy_token(&buf, rbd_dev->obj, sizeof (rbd_dev->obj));
>> if (!len || len>= sizeof (rbd_dev->obj))
>> - return -EINVAL;
>> + goto out_err;
>>
>> /* We have the object length in hand, save it. */
>>
>> @@ -2382,9 +2388,14 @@ static int rbd_add_parse_args(struct rbd_device
>> *rbd_dev,
>> memcpy(rbd_dev->snap_name, RBD_SNAP_HEAD_NAME,
>> sizeof (RBD_SNAP_HEAD_NAME));
>> else if (len>= sizeof (rbd_dev->snap_name))
>> - return -EINVAL;
>> + goto out_err;
>>
>> - return 0;
>> + return pool_name;
>> +
>> +out_err:
>> + kfree(pool_name);
>> +
>> + return ERR_PTR(ret);
>> }
>>
>> static ssize_t rbd_add(struct bus_type *bus,
>> @@ -2396,6 +2407,7 @@ static ssize_t rbd_add(struct bus_type *bus,
>> size_t mon_addrs_size = 0;
>> char *options = NULL;
>> struct ceph_osd_client *osdc;
>> + char *pool_name;
>> int rc = -ENOMEM;
>>
>> if (!try_module_get(THIS_MODULE))
>> @@ -2425,10 +2437,14 @@ static ssize_t rbd_add(struct bus_type *bus,
>> sprintf(rbd_dev->name, "%s%d", RBD_DRV_NAME, rbd_dev->id);
>>
>> /* parse add command */
>> - rc = rbd_add_parse_args(rbd_dev, buf,&mon_addrs,&mon_addrs_size,
>> - options, count);
>> - if (rc)
>> + pool_name = rbd_add_parse_args(rbd_dev, buf,
>> + &mon_addrs,&mon_addrs_size,
>> + options, count);
>> + if (IS_ERR(pool_name)) {
>> + rc = PTR_ERR(pool_name);
>> + pool_name = NULL;
>> goto err_put_id;
>> + }
>>
>> rbd_dev->rbd_client = rbd_get_client(mon_addrs, mon_addrs_size - 1,
>> options);
>> @@ -2439,10 +2455,10 @@ static ssize_t rbd_add(struct bus_type *bus,
>>
>> /* pick the pool */
>> osdc =&rbd_dev->rbd_client->client->osdc;
>> - rc = ceph_pg_poolid_by_name(osdc->osdmap, rbd_dev->pool_name);
>> + rc = ceph_pg_poolid_by_name(osdc->osdmap, pool_name);
>> if (rc< 0)
>> goto err_out_client;
>> - rbd_dev->poolid = rc;
>> + rbd_dev->pool_id = rc;
>>
>> /* register our block device */
>> rc = register_blkdev(0, rbd_dev->name);
>> @@ -2453,6 +2469,7 @@ static ssize_t rbd_add(struct bus_type *bus,
>> rc = rbd_bus_add_dev(rbd_dev);
>> if (rc)
>> goto err_out_blkdev;
>> + kfree(pool_name);
>>
>> /*
>> * At this point cleanup in the event of an error is the job
>> @@ -2482,6 +2499,7 @@ err_out_blkdev:
>> err_out_client:
>> rbd_put_client(rbd_dev);
>> err_put_id:
>> + kfree(pool_name);
>> rbd_id_put(rbd_dev);
>> err_nomem:
>> kfree(options);
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html