1. To set the replica number of cluster: dog alter cluster-copy -c <copies>
2. To set the replica number of standalone vdi who has neither parent nor children: dog alter vdi-copy -c <copies> <vdiname> 3. To set the replica number of shared vdi who has parent or children, please run dog vdi clone command with -R (--root) option first: dog vdi clone -s <snapshot> -R <src vdi> <dst vdi> It will deep copy a brand new standalone vdi base on source vdi so that we can run dog vdi copies to change destinate vdi's replica number later. Signed-off-by: Ruoyu <lian...@ucweb.com> --- dog/Makefile.am | 2 +- dog/alter.c | 226 +++++++++++++++++++++++++++++++++++++++++++++++ dog/dog.c | 1 + dog/dog.h | 4 + dog/vdi.c | 14 ++- include/internal_proto.h | 2 + sheep/ops.c | 43 +++++++++ 7 files changed, 289 insertions(+), 3 deletions(-) create mode 100644 dog/alter.c diff --git a/dog/Makefile.am b/dog/Makefile.am index a7ead61..18cb114 100644 --- a/dog/Makefile.am +++ b/dog/Makefile.am @@ -25,7 +25,7 @@ sbin_PROGRAMS = dog dog_SOURCES = farm/object_tree.c farm/sha1_file.c farm/snap.c \ farm/trunk.c farm/farm.c farm/slice.c \ - dog.c common.c treeview.c vdi.c node.c cluster.c + dog.c common.c treeview.c vdi.c node.c cluster.c alter.c if BUILD_TRACE dog_SOURCES += trace.c diff --git a/dog/alter.c b/dog/alter.c new file mode 100644 index 0000000..1ca9a18 --- /dev/null +++ b/dog/alter.c @@ -0,0 +1,226 @@ +/* + * Copyright (C) 2011 Nippon Telegraph and Telephone Corporation. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <time.h> +#include <string.h> +#include <ctype.h> +#include <sys/time.h> + +#include "dog.h" + +static struct sd_option alter_options[] = { + {'c', "copies", true, "specify the data redundancy level"}, + { 0, NULL, false, NULL }, +}; + +static struct alter_cmd_data { + uint8_t copies; + uint8_t copy_policy; +} alter_cmd_data; + +#define ALTER_CLUSTER_COPY_PRINT \ + " __\n" \ + " ()'`;\n" \ + " /\\|` Caution! Changing the # of replica will affect\n" \ + " / | all the VDIs to be created later.\n" \ + "(/_)_|_ Are you sure you want to continue? [yes/no]: " + +static int alter_cluster_copy(int argc, char **argv) +{ + int ret, log_length; + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + struct epoch_log *logs; + + if (alter_cmd_data.copy_policy != 0) { + sd_err("changing redundancy level to erasure code is not supported yet."); + return EXIT_USAGE; + } + if (!alter_cmd_data.copies) { + alter_cmd_data.copies = SD_DEFAULT_COPIES; + printf("new cluster redundancy level is not specified, " + "use %d as default.\n", SD_DEFAULT_COPIES); + } + + if (alter_cmd_data.copies > sd_nodes_nr) { + char info[1024]; + snprintf(info, sizeof(info), "Number of copies (%d) is larger " + "than number of nodes (%d).\n" + "Are you sure you want to continue? [yes/no]: ", + alter_cmd_data.copies, sd_nodes_nr); + confirm(info); + } + + log_length = sd_epoch * sizeof(struct epoch_log); + logs = xmalloc(log_length); + sd_init_req(&hdr, SD_OP_STAT_CLUSTER); + hdr.data_length = log_length; + ret = dog_exec_req(&sd_nid, &hdr, logs); + if (ret < 0) + goto failure; + + if (rsp->result != SD_RES_SUCCESS) { + sd_err("Response's result: %s", sd_strerror(rsp->result)); + goto failure; + } + if (logs->copy_policy) { + sd_err("the cluster's copy policy is erasure code, " + "changing it is not supported yet."); + goto failure; + } + if (logs->nr_copies == alter_cmd_data.copies) { + sd_err("the cluster's redundancy level is already set to %d, " + "nothing changed.", alter_cmd_data.copies); + goto failure; + } + + confirm(ALTER_CLUSTER_COPY_PRINT); + + sd_init_req(&hdr, SD_OP_ALTER_CLUSTER_COPY); + hdr.cluster.copies = alter_cmd_data.copies; + hdr.cluster.copy_policy = alter_cmd_data.copy_policy; + ret = send_light_req(&sd_nid, &hdr); + if (ret == 0) { + sd_info("the cluster's redundancy level is set to %d, the old one was %d.", + alter_cmd_data.copies, logs->nr_copies); + goto success; + } else { + sd_err("set the cluster's redundancy level failure."); + goto failure; + } + +success: + free(logs); + return EXIT_SUCCESS; +failure: + free(logs); + return EXIT_FAILURE; +} + +#define ALTER_VDI_COPY_PRINT \ + " __\n" \ + " ()'`;\n" \ + " /\\|` Caution! Changing the # of replica will affect\n" \ + " / | the specified VDI and trigger recovery.\n" \ + "(/_)_|_ Are you sure you want to continue? [yes/no]: " + +static int alter_vdi_copy(int argc, char **argv) +{ + int ret, old_nr_copies; + uint32_t vid, child_vdi_id[MAX_CHILDREN]; + const char *vdiname = argv[optind++]; + char buf[SD_INODE_HEADER_SIZE]; + struct sd_inode *inode = (struct sd_inode *)buf; + struct sd_req hdr; + + if (alter_cmd_data.copy_policy != 0) { + sd_err("changing redundancy level to erasure code is not supported yet."); + return EXIT_USAGE; + } + if (!alter_cmd_data.copies) { + alter_cmd_data.copies = SD_DEFAULT_COPIES; + printf("new vdi redundancy level is not specified, " + "use %d as default.\n", SD_DEFAULT_COPIES); + } + + if (alter_cmd_data.copies > sd_nodes_nr) { + char info[1024]; + snprintf(info, sizeof(info), "Number of copies (%d) is larger " + "than number of nodes (%d).\n" + "Are you sure you want to continue? [yes/no]: ", + alter_cmd_data.copies, sd_nodes_nr); + confirm(info); + } + + ret = read_vdi_obj(vdiname, 0, "", &vid, inode, SD_INODE_HEADER_SIZE); + if (ret != EXIT_SUCCESS) { + sd_err("read %s's vdi object failure.", vdiname); + return EXIT_FAILURE; + } + + if (inode->copy_policy) { + sd_err("%s's copy policy is erasure code, " + "changing it is not supported yet.", vdiname); + return EXIT_FAILURE; + } + + old_nr_copies = inode->nr_copies; + if (old_nr_copies == alter_cmd_data.copies) { + sd_err("%s's redundancy level is already set to %d, " + "nothing changed.", vdiname, old_nr_copies); + return EXIT_FAILURE; + } + + memset(child_vdi_id, 0, sizeof(uint32_t) * MAX_CHILDREN); + if (inode->parent_vdi_id != 0 || + memcmp(inode->child_vdi_id, child_vdi_id, + sizeof(uint32_t) * MAX_CHILDREN) != 0) { + sd_err("only standalone vdi supports changing redundancy level."); + sd_err("please clone it with -R option first."); + return EXIT_FAILURE; + } + + confirm(ALTER_VDI_COPY_PRINT); + + inode->nr_copies = alter_cmd_data.copies; + ret = dog_write_object(vid_to_vdi_oid(vid), 0, inode, + SD_INODE_HEADER_SIZE, 0, 0, old_nr_copies, + inode->copy_policy, false, true); + if (ret != SD_RES_SUCCESS) { + sd_err("overwrite the vdi object's header of %s failure " + "while setting its redundancy level.", vdiname); + return EXIT_FAILURE; + } + + sd_init_req(&hdr, SD_OP_ALTER_VDI_COPY); + hdr.vdi_state.new_vid = vid; + hdr.vdi_state.copies = alter_cmd_data.copies; + hdr.vdi_state.copy_policy = alter_cmd_data.copy_policy; + + ret = send_light_req(&sd_nid, &hdr); + if (ret == 0) { + sd_info("%s's redundancy level is set to %d, the old one was %d.", + vdiname, alter_cmd_data.copies, old_nr_copies); + return EXIT_SUCCESS; + } + sd_err("set %s's redundancy level failure.", vdiname); + return EXIT_FAILURE; +} + +static struct subcommand alter_cmd[] = { + {"cluster-copy", NULL, "caph", "set the cluster's redundancy level", + NULL, CMD_NEED_NODELIST, alter_cluster_copy, alter_options}, + {"vdi-copy", "<vdiname>", "caph", "set the vdi's redundancy level", + NULL, CMD_NEED_ARG|CMD_NEED_NODELIST, alter_vdi_copy, alter_options}, + {NULL,}, +}; + +static int alter_parser(int ch, const char *opt) +{ + switch (ch) { + case 'c': + alter_cmd_data.copies = + parse_copy(opt, &alter_cmd_data.copy_policy); + if (!alter_cmd_data.copies) { + sd_err("Invalid redundancy level %s.", opt); + exit(EXIT_FAILURE); + } + break; + } + + return 0; +} + +struct command alter_command = { + "alter", + alter_cmd, + alter_parser +}; diff --git a/dog/dog.c b/dog/dog.c index 7942b34..e97ef62 100644 --- a/dog/dog.c +++ b/dog/dog.c @@ -157,6 +157,7 @@ static void init_commands(const struct command **commands) vdi_command, node_command, cluster_command, + alter_command, #ifdef HAVE_TRACE trace_command, #endif diff --git a/dog/dog.h b/dog/dog.h index 59d5a1c..9b53b3f 100644 --- a/dog/dog.h +++ b/dog/dog.h @@ -99,9 +99,13 @@ int dog_bnode_writer(uint64_t oid, void *mem, unsigned int len, uint64_t offset, int dog_bnode_reader(uint64_t oid, void **mem, unsigned int len, uint64_t offset); +int read_vdi_obj(const char *vdiname, int snapid, const char *tag, + uint32_t *pvid, struct sd_inode *inode, size_t size); + extern struct command vdi_command; extern struct command node_command; extern struct command cluster_command; +extern struct command alter_command; #ifdef HAVE_TRACE extern struct command trace_command; diff --git a/dog/vdi.c b/dog/vdi.c index 4d7fd54..f14b11e 100644 --- a/dog/vdi.c +++ b/dog/vdi.c @@ -23,6 +23,8 @@ static struct sd_option vdi_options[] = { {'P', "prealloc", false, "preallocate all the data objects"}, + {'R', "root", false, "clone a root vdi whose parent id is 0 and\n" + " prealloc auto enabled"}, {'i', "index", true, "specify the index of data objects"}, {'s', "snapshot", true, "specify a snapshot id or tag name"}, {'x', "exclusive", false, "write in an exclusive mode"}, @@ -51,6 +53,7 @@ static struct vdi_cmd_data { uint8_t copy_policy; uint8_t store_policy; uint64_t oid; + bool root; } vdi_cmd_data = { ~0, }; struct get_vdi_info { @@ -336,7 +339,7 @@ static int find_vdi_name(const char *vdiname, uint32_t snapid, const char *tag, return 0; } -static int read_vdi_obj(const char *vdiname, int snapid, const char *tag, +int read_vdi_obj(const char *vdiname, int snapid, const char *tag, uint32_t *pvid, struct sd_inode *inode, size_t size) { @@ -566,6 +569,9 @@ static int vdi_clone(int argc, char **argv) if (ret != EXIT_SUCCESS) goto out; + if (vdi_cmd_data.root == true) + base_vid = 0; + ret = do_vdi_create(dst_vdi, inode->vdi_size, base_vid, &new_vid, false, inode->nr_copies, inode->copy_policy, inode->store_policy); @@ -2356,7 +2362,7 @@ static struct subcommand vdi_cmd[] = { {"snapshot", "<vdiname>", "saphrv", "create a snapshot", NULL, CMD_NEED_ARG, vdi_snapshot, vdi_options}, - {"clone", "<src vdi> <dst vdi>", "sPcaphrv", "clone an image", + {"clone", "<src vdi> <dst vdi>", "sPRaphrv", "clone an image", NULL, CMD_NEED_ARG, vdi_clone, vdi_options}, {"delete", "<vdiname>", "saph", "delete an image", @@ -2413,6 +2419,10 @@ static int vdi_parser(int ch, const char *opt) case 'P': vdi_cmd_data.prealloc = true; break; + case 'R': + vdi_cmd_data.root = true; + vdi_cmd_data.prealloc = true; + break; case 'i': vdi_cmd_data.index = strtol(opt, &p, 10); if (opt == p) { diff --git a/include/internal_proto.h b/include/internal_proto.h index 0eb7227..ba9cd86 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -101,6 +101,8 @@ #define SD_OP_NFS_DELETE 0xBC #define SD_OP_EXIST 0xBD #define SD_OP_CLUSTER_INFO 0xBE +#define SD_OP_ALTER_CLUSTER_COPY 0xC0 +#define SD_OP_ALTER_VDI_COPY 0xC1 /* internal flags for hdr.flags, must be above 0x80 */ #define SD_FLAG_CMD_RECOVERY 0x0080 diff --git a/sheep/ops.c b/sheep/ops.c index b9550f0..ce1b49c 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -714,6 +714,35 @@ static int cluster_recovery_completion(const struct sd_req *req, return SD_RES_SUCCESS; } +static int cluster_alter_cluster_copy(const struct sd_req *req, + struct sd_rsp *rsp, void *data) +{ + if (req->cluster.copy_policy != 0) + return SD_RES_INVALID_PARMS; + + sys->cinfo.nr_copies = req->cluster.copies; + return set_cluster_config(&sys->cinfo); +} + +static int cluster_alter_vdi_copy(const struct sd_req *req, + struct sd_rsp *rsp, void *data) +{ + if (req->cluster.copy_policy != 0) + return SD_RES_INVALID_PARMS; + + uint32_t vid = req->vdi_state.new_vid; + int nr_copies = req->vdi_state.copies; + struct vnode_info *vinfo; + + add_vdi_state(vid, nr_copies, false, 0); + + vinfo = get_vnode_info(); + start_recovery(vinfo, vinfo, false); + put_vnode_info(vinfo); + + return SD_RES_SUCCESS; +} + static bool node_size_varied(void) { uint64_t new, used, old = sys->this_node.space; @@ -1179,6 +1208,20 @@ static struct sd_op_template sd_ops[] = { .process_main = cluster_disable_recover, }, + [SD_OP_ALTER_CLUSTER_COPY] = { + .name = "ALTER_CLUSTER_COPY", + .type = SD_OP_TYPE_CLUSTER, + .is_admin_op = true, + .process_main = cluster_alter_cluster_copy, + }, + + [SD_OP_ALTER_VDI_COPY] = { + .name = "ALTER_VDI_COPY", + .type = SD_OP_TYPE_CLUSTER, + .is_admin_op = true, + .process_main = cluster_alter_vdi_copy, + }, + /* local operations */ [SD_OP_RELEASE_VDI] = { .name = "RELEASE_VDI", -- 1.8.3.2 -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog