On Mon, 2007-11-05 at 21:24 -0800, Wayne Davison wrote:
> On Tue, Nov 06, 2007 at 12:04:19AM -0500, Matt McCutchen wrote:
> > This patch adds an option --tr=BAD/GOOD to transliterate filenames.
> 
> Both sides need identical file names in the list when sorting, otherwise
> a name could sort into different spots on each side.  The iconv code
> deals with this by using an index into the unsorted file list, so you
> might want to make use of that code.

Oops!  I overlooked that issue because I only tested with one file;
thanks for pointing it out.  I have updated the patch (attached) to
activate the need_unsorted_flist code when a --tr option is passed.

Matt
This patch adds an option --tr=BAD/GOOD to transliterate filenames.  It can
be used to remove characters illegal on the destination filesystem.  Jeff
Weber expressed interest in this:

http://lists.samba.org/archive/rsync/2007-October/018996.html

This patch is a COMPLETE HACK that covers the most common cases.  Others
are welcome to improve it.

--- old/flist.c
+++ new/flist.c
@@ -76,13 +76,16 @@ extern struct chmod_mode_struct *chmod_m
 extern struct filter_list_struct filter_list;
 extern struct filter_list_struct server_filter_list;
 
+extern int need_unsorted_flist;
 #ifdef ICONV_OPTION
 extern int ic_ndx;
 extern int filesfrom_convert;
-extern int need_unsorted_flist;
 extern iconv_t ic_send, ic_recv;
 #endif
 
+extern char *tr_opt, *tr_left, *tr_right;
+extern int tr_right_len;
+
 #define PTR_SIZE (sizeof (struct file_struct *))
 
 int io_error;
@@ -603,6 +606,23 @@ static void send_file_entry(int f, struc
 		stats.total_size += F_LENGTH(file);
 }
 
+static void transliterate(char *thisname) {
+	char *p1, *p2, *pleft;
+
+	for (p1 = p2 = thisname; *p1; p1++) {
+		/* Look up the current character in the left string. */
+		pleft = strchr(tr_left, *p1);
+		if (!pleft)
+			/* Not found: no change. */
+			*p2++ = *p1;
+		else if (pleft - tr_left < tr_right_len)
+			/* Store replacement from the right string. */
+			*p2++ = tr_right[pleft - tr_left];
+		/* Otherwise delete. */
+	}
+	*p2 = '\0';
+}
+
 static struct file_struct *recv_file_entry(struct file_list *flist,
 					   int xflags, int f)
 {
@@ -671,6 +691,9 @@ static struct file_struct *recv_file_ent
 	}
 #endif
 
+	if (tr_opt)
+		transliterate(thisname);
+
 	clean_fname(thisname, 0);
 
 	if (sanitize_paths)
@@ -1706,14 +1729,12 @@ void send_extra_file_list(int f, int at_
 
 		write_byte(f, 0);
 
-#ifdef ICONV_OPTION
 		if (need_unsorted_flist) {
 			if (!(flist->sorted = new_array(struct file_struct *, flist->used)))
 				out_of_memory("send_extra_file_list");
 			memcpy(flist->sorted, flist->files,
 			       flist->used * sizeof (struct file_struct*));
 		} else
-#endif
 			flist->sorted = flist->files;
 
 		clean_flist(flist, 0);
@@ -2019,7 +2040,6 @@ struct file_list *send_file_list(int f, 
 	 * receiving side to ask for whatever name it kept.  For incremental
 	 * recursion mode, the sender marks duplicate dirs so that it can
 	 * send them together in a single file-list. */
-#ifdef ICONV_OPTION
 	if (need_unsorted_flist) {
 		if (inc_recurse) {
 			if (!(flist->sorted = new_array(struct file_struct *, flist->used)))
@@ -2032,9 +2052,7 @@ struct file_list *send_file_list(int f, 
 			flist->low = 0;
 			flist->high = flist->used - 1;
 		}
-	} else
-#endif
-	{
+	} else {
 		flist->sorted = flist->files;
 		clean_flist(flist, 0);
 	}
@@ -2156,7 +2174,6 @@ struct file_list *recv_file_list(int f)
 	if (show_filelist_p())
 		finish_filelist_progress(flist);
 
-#ifdef ICONV_OPTION
 	if (need_unsorted_flist) {
 		/* Create an extra array of index pointers that we can sort for
 		 * the generator's use (for wading through the files in sorted
@@ -2175,9 +2192,7 @@ struct file_list *recv_file_list(int f)
 			       (dir_flist->used - dstart) * sizeof (struct file_struct*));
 			fsort(dir_flist->sorted + dstart, dir_flist->used - dstart);
 		}
-	} else
-#endif
-	{
+	} else {
 		flist->sorted = flist->files;
 		if (inc_recurse && dir_flist->used > dstart) {
 			dir_flist->sorted = dir_flist->files;
--- old/options.c
+++ new/options.c
@@ -185,10 +185,12 @@ int list_only = 0;
 #define MAX_BATCH_NAME_LEN 256	/* Must be less than MAXPATHLEN-13 */
 char *batch_name = NULL;
 
-#ifdef ICONV_OPTION
 int need_unsorted_flist = 0;
+#ifdef ICONV_OPTION
 char *iconv_opt = ICONV_OPTION;
 #endif
+char *tr_opt = NULL, *tr_left = NULL, *tr_right = NULL;
+int tr_right_len = 0;
 
 struct chmod_mode_struct *chmod_modes = NULL;
 
@@ -423,6 +425,7 @@ void usage(enum logcode F)
 #ifdef ICONV_OPTION
   rprintf(F,"     --iconv=CONVERT_SPEC    request charset conversion of filenames\n");
 #endif
+  rprintf(F,"     --tr=BAD/GOOD           transliterate filenames\n");
   rprintf(F," -4, --ipv4                  prefer IPv4\n");
   rprintf(F," -6, --ipv6                  prefer IPv6\n");
   rprintf(F,"     --version               print version number\n");
@@ -609,6 +612,7 @@ static struct poptOption long_options[] 
 #ifdef ICONV_OPTION
   {"iconv",            0,  POPT_ARG_STRING, &iconv_opt, 0, 0, 0 },
 #endif
+  {"tr",               0,  POPT_ARG_STRING, &tr_opt, 0, 0, 0 },
   {"ipv4",            '4', POPT_ARG_VAL,    &default_af_hint, AF_INET, 0, 0 },
   {"ipv6",            '6', POPT_ARG_VAL,    &default_af_hint, AF_INET6, 0, 0 },
   {"8-bit-output",    '8', POPT_ARG_NONE,   &allow_8bit_chars, 0, 0, 0 },
@@ -1620,6 +1624,31 @@ int parse_arguments(int *argc_p, const c
 		}
 	}
 
+	/* Easiest way to get a local server right is to do this on both sides */
+	if (tr_opt) {
+		if (*tr_opt) {
+			char *p;
+
+			need_unsorted_flist = 1;
+			/* Our mutation shouldn't interfere with transmission of the
+			 * original option to the server. */
+			tr_left = strdup(tr_opt);
+			p = strchr(tr_left, '/');
+			if (p != NULL) {
+				*p = '\0';
+				p++;
+				tr_right = p;
+				tr_right_len = strlen(tr_right);
+				if (strchr(tr_right, '/') != NULL) {
+					snprintf(err_buf, sizeof err_buf,
+						"--tr cannot transliterate slashes\n");
+					return 0;
+				}
+			}
+		} else
+			tr_opt = NULL;
+	}
+
 	am_starting_up = 0;
 
 	return 1;
@@ -1988,6 +2017,12 @@ void server_options(char **args, int *ar
 	else if (remove_source_files)
 		args[ac++] = "--remove-sent-files";
 
+	if (tr_opt && am_sender) {
+		if (asprintf(&arg, "--tr=%s", tr_opt) < 0)
+			goto oom;
+		args[ac++] = arg;
+	}
+
 	*argc_p = ac;
 	return;
 
--- old/rsync.yo
+++ new/rsync.yo
@@ -422,6 +422,7 @@ to the detailed description below for a 
      --read-batch=FILE       read a batched update from FILE
      --protocol=NUM          force an older protocol version to be used
      --iconv=CONVERT_SPEC    request charset conversion of filenames
+     --tr=BAD/GOOD           transliterate filenames
      --checksum-seed=NUM     set block/file checksum seed (advanced)
  -4, --ipv4                  prefer IPv4
  -6, --ipv6                  prefer IPv6
@@ -2013,6 +2014,22 @@ specifying matching rules that can match
 For instance, you can specify extra include/exclude rules if there are
 filename differences on the two sides that need to be accounted for.
 
+dit(bf(--tr=BAD/GOOD)) Transliterates filenames on the receiver, after the
+iconv conversion (if any).  This can be used to remove characters illegal
+on the destination filesystem.  If you use this option, consider saving a
+"find . -ls" listing of the source in the destination to help you determine
+the original filenames in case of need.
+
+The argument consists of a string of characters to remove, optionally
+followed by a slash and a string of corresponding characters with which to
+replace them.  The second string may be shorter, in which case any leftover
+characters in the first string are simply deleted.  For example,
+bf(--tr=:\/!) replaces colons with exclamation marks and deletes backslashes.
+Slashes cannot be transliterated because it would cause havoc.
+
+If the receiver is invoked over a remote shell, use bf(--protect-args) to
+stop the shell from interpreting any nasty characters in the argument.
+
 dit(bf(-4, --ipv4) or bf(-6, --ipv6)) Tells rsync to prefer IPv4/IPv6
 when creating sockets.  This only affects sockets that rsync has direct
 control over, such as the outgoing socket when directly contacting an
-- 
To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html

Reply via email to