On Mon, 2007-11-05 at 21:24 -0800, Wayne Davison wrote: > On Tue, Nov 06, 2007 at 12:04:19AM -0500, Matt McCutchen wrote: > > This patch adds an option --tr=BAD/GOOD to transliterate filenames. > > Both sides need identical file names in the list when sorting, otherwise > a name could sort into different spots on each side. The iconv code > deals with this by using an index into the unsorted file list, so you > might want to make use of that code.
Oops! I overlooked that issue because I only tested with one file; thanks for pointing it out. I have updated the patch (attached) to activate the need_unsorted_flist code when a --tr option is passed. Matt
This patch adds an option --tr=BAD/GOOD to transliterate filenames. It can be used to remove characters illegal on the destination filesystem. Jeff Weber expressed interest in this: http://lists.samba.org/archive/rsync/2007-October/018996.html This patch is a COMPLETE HACK that covers the most common cases. Others are welcome to improve it. --- old/flist.c +++ new/flist.c @@ -76,13 +76,16 @@ extern struct chmod_mode_struct *chmod_m extern struct filter_list_struct filter_list; extern struct filter_list_struct server_filter_list; +extern int need_unsorted_flist; #ifdef ICONV_OPTION extern int ic_ndx; extern int filesfrom_convert; -extern int need_unsorted_flist; extern iconv_t ic_send, ic_recv; #endif +extern char *tr_opt, *tr_left, *tr_right; +extern int tr_right_len; + #define PTR_SIZE (sizeof (struct file_struct *)) int io_error; @@ -603,6 +606,23 @@ static void send_file_entry(int f, struc stats.total_size += F_LENGTH(file); } +static void transliterate(char *thisname) { + char *p1, *p2, *pleft; + + for (p1 = p2 = thisname; *p1; p1++) { + /* Look up the current character in the left string. */ + pleft = strchr(tr_left, *p1); + if (!pleft) + /* Not found: no change. */ + *p2++ = *p1; + else if (pleft - tr_left < tr_right_len) + /* Store replacement from the right string. */ + *p2++ = tr_right[pleft - tr_left]; + /* Otherwise delete. */ + } + *p2 = '\0'; +} + static struct file_struct *recv_file_entry(struct file_list *flist, int xflags, int f) { @@ -671,6 +691,9 @@ static struct file_struct *recv_file_ent } #endif + if (tr_opt) + transliterate(thisname); + clean_fname(thisname, 0); if (sanitize_paths) @@ -1706,14 +1729,12 @@ void send_extra_file_list(int f, int at_ write_byte(f, 0); -#ifdef ICONV_OPTION if (need_unsorted_flist) { if (!(flist->sorted = new_array(struct file_struct *, flist->used))) out_of_memory("send_extra_file_list"); memcpy(flist->sorted, flist->files, flist->used * sizeof (struct file_struct*)); } else -#endif flist->sorted = flist->files; clean_flist(flist, 0); @@ -2019,7 +2040,6 @@ struct file_list *send_file_list(int f, * receiving side to ask for whatever name it kept. For incremental * recursion mode, the sender marks duplicate dirs so that it can * send them together in a single file-list. */ -#ifdef ICONV_OPTION if (need_unsorted_flist) { if (inc_recurse) { if (!(flist->sorted = new_array(struct file_struct *, flist->used))) @@ -2032,9 +2052,7 @@ struct file_list *send_file_list(int f, flist->low = 0; flist->high = flist->used - 1; } - } else -#endif - { + } else { flist->sorted = flist->files; clean_flist(flist, 0); } @@ -2156,7 +2174,6 @@ struct file_list *recv_file_list(int f) if (show_filelist_p()) finish_filelist_progress(flist); -#ifdef ICONV_OPTION if (need_unsorted_flist) { /* Create an extra array of index pointers that we can sort for * the generator's use (for wading through the files in sorted @@ -2175,9 +2192,7 @@ struct file_list *recv_file_list(int f) (dir_flist->used - dstart) * sizeof (struct file_struct*)); fsort(dir_flist->sorted + dstart, dir_flist->used - dstart); } - } else -#endif - { + } else { flist->sorted = flist->files; if (inc_recurse && dir_flist->used > dstart) { dir_flist->sorted = dir_flist->files; --- old/options.c +++ new/options.c @@ -185,10 +185,12 @@ int list_only = 0; #define MAX_BATCH_NAME_LEN 256 /* Must be less than MAXPATHLEN-13 */ char *batch_name = NULL; -#ifdef ICONV_OPTION int need_unsorted_flist = 0; +#ifdef ICONV_OPTION char *iconv_opt = ICONV_OPTION; #endif +char *tr_opt = NULL, *tr_left = NULL, *tr_right = NULL; +int tr_right_len = 0; struct chmod_mode_struct *chmod_modes = NULL; @@ -423,6 +425,7 @@ void usage(enum logcode F) #ifdef ICONV_OPTION rprintf(F," --iconv=CONVERT_SPEC request charset conversion of filenames\n"); #endif + rprintf(F," --tr=BAD/GOOD transliterate filenames\n"); rprintf(F," -4, --ipv4 prefer IPv4\n"); rprintf(F," -6, --ipv6 prefer IPv6\n"); rprintf(F," --version print version number\n"); @@ -609,6 +612,7 @@ static struct poptOption long_options[] #ifdef ICONV_OPTION {"iconv", 0, POPT_ARG_STRING, &iconv_opt, 0, 0, 0 }, #endif + {"tr", 0, POPT_ARG_STRING, &tr_opt, 0, 0, 0 }, {"ipv4", '4', POPT_ARG_VAL, &default_af_hint, AF_INET, 0, 0 }, {"ipv6", '6', POPT_ARG_VAL, &default_af_hint, AF_INET6, 0, 0 }, {"8-bit-output", '8', POPT_ARG_NONE, &allow_8bit_chars, 0, 0, 0 }, @@ -1620,6 +1624,31 @@ int parse_arguments(int *argc_p, const c } } + /* Easiest way to get a local server right is to do this on both sides */ + if (tr_opt) { + if (*tr_opt) { + char *p; + + need_unsorted_flist = 1; + /* Our mutation shouldn't interfere with transmission of the + * original option to the server. */ + tr_left = strdup(tr_opt); + p = strchr(tr_left, '/'); + if (p != NULL) { + *p = '\0'; + p++; + tr_right = p; + tr_right_len = strlen(tr_right); + if (strchr(tr_right, '/') != NULL) { + snprintf(err_buf, sizeof err_buf, + "--tr cannot transliterate slashes\n"); + return 0; + } + } + } else + tr_opt = NULL; + } + am_starting_up = 0; return 1; @@ -1988,6 +2017,12 @@ void server_options(char **args, int *ar else if (remove_source_files) args[ac++] = "--remove-sent-files"; + if (tr_opt && am_sender) { + if (asprintf(&arg, "--tr=%s", tr_opt) < 0) + goto oom; + args[ac++] = arg; + } + *argc_p = ac; return; --- old/rsync.yo +++ new/rsync.yo @@ -422,6 +422,7 @@ to the detailed description below for a --read-batch=FILE read a batched update from FILE --protocol=NUM force an older protocol version to be used --iconv=CONVERT_SPEC request charset conversion of filenames + --tr=BAD/GOOD transliterate filenames --checksum-seed=NUM set block/file checksum seed (advanced) -4, --ipv4 prefer IPv4 -6, --ipv6 prefer IPv6 @@ -2013,6 +2014,22 @@ specifying matching rules that can match For instance, you can specify extra include/exclude rules if there are filename differences on the two sides that need to be accounted for. +dit(bf(--tr=BAD/GOOD)) Transliterates filenames on the receiver, after the +iconv conversion (if any). This can be used to remove characters illegal +on the destination filesystem. If you use this option, consider saving a +"find . -ls" listing of the source in the destination to help you determine +the original filenames in case of need. + +The argument consists of a string of characters to remove, optionally +followed by a slash and a string of corresponding characters with which to +replace them. The second string may be shorter, in which case any leftover +characters in the first string are simply deleted. For example, +bf(--tr=:\/!) replaces colons with exclamation marks and deletes backslashes. +Slashes cannot be transliterated because it would cause havoc. + +If the receiver is invoked over a remote shell, use bf(--protect-args) to +stop the shell from interpreting any nasty characters in the argument. + dit(bf(-4, --ipv4) or bf(-6, --ipv6)) Tells rsync to prefer IPv4/IPv6 when creating sockets. This only affects sockets that rsync has direct control over, such as the outgoing socket when directly contacting an
-- To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html