Re: rsync and translating restricted NTFS filename characters

2007-11-09 Thread Jeff Weber
Matt  Wayne:

The transliterate patched worked great.  I can now backup and restore to/from  
a SMB NTFS share.

Many thanks for taking the time to spin me a patch!
Jeff Weber

On Tuesday 06 November 2007 02:30:35 pm Matt McCutchen wrote:
 Jeff,

 Wayne has cleaned up my patch a bit.  The new version is at:

 http://cvs.samba.org/cgi-bin/cvsweb/rsync/patches/transliterate.diff?rev=1.
1content-type=text/x-cvsweb-markup

 Matt

 On Tue, 2007-11-06 at 08:29 -0500, Matt McCutchen wrote:
  On Mon, 2007-11-05 at 21:24 -0800, Wayne Davison wrote:
   On Tue, Nov 06, 2007 at 12:04:19AM -0500, Matt McCutchen wrote:
This patch adds an option --tr=BAD/GOOD to transliterate filenames.
  
   Both sides need identical file names in the list when sorting,
   otherwise a name could sort into different spots on each side.  The
   iconv code deals with this by using an index into the unsorted file
   list, so you might want to make use of that code.
 
  Oops!  I overlooked that issue because I only tested with one file;
  thanks for pointing it out.  I have updated the patch (attached) to
  activate the need_unsorted_flist code when a --tr option is passed.
 
  Matt
-- 
To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html


Re: rsync and translating restricted NTFS filename characters

2007-11-06 Thread Matt McCutchen
On Mon, 2007-11-05 at 21:24 -0800, Wayne Davison wrote:
 On Tue, Nov 06, 2007 at 12:04:19AM -0500, Matt McCutchen wrote:
  This patch adds an option --tr=BAD/GOOD to transliterate filenames.
 
 Both sides need identical file names in the list when sorting, otherwise
 a name could sort into different spots on each side.  The iconv code
 deals with this by using an index into the unsorted file list, so you
 might want to make use of that code.

Oops!  I overlooked that issue because I only tested with one file;
thanks for pointing it out.  I have updated the patch (attached) to
activate the need_unsorted_flist code when a --tr option is passed.

Matt
This patch adds an option --tr=BAD/GOOD to transliterate filenames.  It can
be used to remove characters illegal on the destination filesystem.  Jeff
Weber expressed interest in this:

http://lists.samba.org/archive/rsync/2007-October/018996.html

This patch is a COMPLETE HACK that covers the most common cases.  Others
are welcome to improve it.

--- old/flist.c
+++ new/flist.c
@@ -76,13 +76,16 @@ extern struct chmod_mode_struct *chmod_m
 extern struct filter_list_struct filter_list;
 extern struct filter_list_struct server_filter_list;
 
+extern int need_unsorted_flist;
 #ifdef ICONV_OPTION
 extern int ic_ndx;
 extern int filesfrom_convert;
-extern int need_unsorted_flist;
 extern iconv_t ic_send, ic_recv;
 #endif
 
+extern char *tr_opt, *tr_left, *tr_right;
+extern int tr_right_len;
+
 #define PTR_SIZE (sizeof (struct file_struct *))
 
 int io_error;
@@ -603,6 +606,23 @@ static void send_file_entry(int f, struc
 		stats.total_size += F_LENGTH(file);
 }
 
+static void transliterate(char *thisname) {
+	char *p1, *p2, *pleft;
+
+	for (p1 = p2 = thisname; *p1; p1++) {
+		/* Look up the current character in the left string. */
+		pleft = strchr(tr_left, *p1);
+		if (!pleft)
+			/* Not found: no change. */
+			*p2++ = *p1;
+		else if (pleft - tr_left  tr_right_len)
+			/* Store replacement from the right string. */
+			*p2++ = tr_right[pleft - tr_left];
+		/* Otherwise delete. */
+	}
+	*p2 = '\0';
+}
+
 static struct file_struct *recv_file_entry(struct file_list *flist,
 	   int xflags, int f)
 {
@@ -671,6 +691,9 @@ static struct file_struct *recv_file_ent
 	}
 #endif
 
+	if (tr_opt)
+		transliterate(thisname);
+
 	clean_fname(thisname, 0);
 
 	if (sanitize_paths)
@@ -1706,14 +1729,12 @@ void send_extra_file_list(int f, int at_
 
 		write_byte(f, 0);
 
-#ifdef ICONV_OPTION
 		if (need_unsorted_flist) {
 			if (!(flist-sorted = new_array(struct file_struct *, flist-used)))
 out_of_memory(send_extra_file_list);
 			memcpy(flist-sorted, flist-files,
 			   flist-used * sizeof (struct file_struct*));
 		} else
-#endif
 			flist-sorted = flist-files;
 
 		clean_flist(flist, 0);
@@ -2019,7 +2040,6 @@ struct file_list *send_file_list(int f, 
 	 * receiving side to ask for whatever name it kept.  For incremental
 	 * recursion mode, the sender marks duplicate dirs so that it can
 	 * send them together in a single file-list. */
-#ifdef ICONV_OPTION
 	if (need_unsorted_flist) {
 		if (inc_recurse) {
 			if (!(flist-sorted = new_array(struct file_struct *, flist-used)))
@@ -2032,9 +2052,7 @@ struct file_list *send_file_list(int f, 
 			flist-low = 0;
 			flist-high = flist-used - 1;
 		}
-	} else
-#endif
-	{
+	} else {
 		flist-sorted = flist-files;
 		clean_flist(flist, 0);
 	}
@@ -2156,7 +2174,6 @@ struct file_list *recv_file_list(int f)
 	if (show_filelist_p())
 		finish_filelist_progress(flist);
 
-#ifdef ICONV_OPTION
 	if (need_unsorted_flist) {
 		/* Create an extra array of index pointers that we can sort for
 		 * the generator's use (for wading through the files in sorted
@@ -2175,9 +2192,7 @@ struct file_list *recv_file_list(int f)
 			   (dir_flist-used - dstart) * sizeof (struct file_struct*));
 			fsort(dir_flist-sorted + dstart, dir_flist-used - dstart);
 		}
-	} else
-#endif
-	{
+	} else {
 		flist-sorted = flist-files;
 		if (inc_recurse  dir_flist-used  dstart) {
 			dir_flist-sorted = dir_flist-files;
--- old/options.c
+++ new/options.c
@@ -185,10 +185,12 @@ int list_only = 0;
 #define MAX_BATCH_NAME_LEN 256	/* Must be less than MAXPATHLEN-13 */
 char *batch_name = NULL;
 
-#ifdef ICONV_OPTION
 int need_unsorted_flist = 0;
+#ifdef ICONV_OPTION
 char *iconv_opt = ICONV_OPTION;
 #endif
+char *tr_opt = NULL, *tr_left = NULL, *tr_right = NULL;
+int tr_right_len = 0;
 
 struct chmod_mode_struct *chmod_modes = NULL;
 
@@ -423,6 +425,7 @@ void usage(enum logcode F)
 #ifdef ICONV_OPTION
   rprintf(F, --iconv=CONVERT_SPECrequest charset conversion of filenames\n);
 #endif
+  rprintf(F, --tr=BAD/GOOD   transliterate filenames\n);
   rprintf(F, -4, --ipv4  prefer IPv4\n);
   rprintf(F, -6, --ipv6  prefer IPv6\n);
   rprintf(F, --version   print version number\n);
@@ -609,6 +612,7 @@ static struct poptOption long_options[] 
 #ifdef ICONV_OPTION
   {iconv,0,  POPT_ARG_STRING, 

Re: rsync and translating restricted NTFS filename characters

2007-11-06 Thread Matt McCutchen
Jeff,

Wayne has cleaned up my patch a bit.  The new version is at:

http://cvs.samba.org/cgi-bin/cvsweb/rsync/patches/transliterate.diff?rev=1.1content-type=text/x-cvsweb-markup

Matt

On Tue, 2007-11-06 at 08:29 -0500, Matt McCutchen wrote:
 On Mon, 2007-11-05 at 21:24 -0800, Wayne Davison wrote:
  On Tue, Nov 06, 2007 at 12:04:19AM -0500, Matt McCutchen wrote:
   This patch adds an option --tr=BAD/GOOD to transliterate filenames.
  
  Both sides need identical file names in the list when sorting, otherwise
  a name could sort into different spots on each side.  The iconv code
  deals with this by using an index into the unsorted file list, so you
  might want to make use of that code.
 
 Oops!  I overlooked that issue because I only tested with one file;
 thanks for pointing it out.  I have updated the patch (attached) to
 activate the need_unsorted_flist code when a --tr option is passed.
 
 Matt

-- 
To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html


Re: rsync and translating restricted NTFS filename characters

2007-11-05 Thread Matt McCutchen
On Tue, 2007-10-30 at 10:07 -0500, Jeff Weber wrote: 
 Is there a solution to rsync Linux file paths with restricted NTFS
 characters, to NTFS?  Perhaps there exists a solution to remap. say,
 Linux path ; [colon] to an alternate character ! [exclamation mark] on
 NTFS ?

Rsync currently does not support that sort of character mapping.  I
hacked up a patch to rsync (attached) to add a --tr option that performs
it.  '--tr=:/!' should accomplish what you need.

Wayne: You can include the patch in patches/ if you like; I doubt the
patch is worth your effort to maintain it, but you did include the ACL
patch when it was at a similar stage.

Matt
This patch adds an option --tr=BAD/GOOD to transliterate filenames.  It can
be used to remove characters illegal on the destination filesystem.  Jeff
Weber expressed interest in this:

http://lists.samba.org/archive/rsync/2007-October/018996.html

This patch is a COMPLETE HACK that covers the most common cases.  Others
are welcome to improve it.

--- old/flist.c
+++ new/flist.c
@@ -83,6 +83,9 @@ extern int need_unsorted_flist;
 extern iconv_t ic_send, ic_recv;
 #endif
 
+extern char *tr_opt, *tr_left, *tr_right;
+extern int tr_right_len;
+
 #define PTR_SIZE (sizeof (struct file_struct *))
 
 int io_error;
@@ -603,6 +606,23 @@ static void send_file_entry(int f, struc
 		stats.total_size += F_LENGTH(file);
 }
 
+static void transliterate(char *thisname) {
+	char *p1, *p2, *pleft;
+
+	for (p1 = p2 = thisname; *p1; p1++) {
+		/* Look up the current character in the left string. */
+		pleft = strchr(tr_left, *p1);
+		if (!pleft)
+			/* Not found: no change. */
+			*p2++ = *p1;
+		else if (pleft - tr_left  tr_right_len)
+			/* Store replacement from the right string. */
+			*p2++ = tr_right[pleft - tr_left];
+		/* Otherwise delete. */
+	}
+	*p2 = '\0';
+}
+
 static struct file_struct *recv_file_entry(struct file_list *flist,
 	   int xflags, int f)
 {
@@ -671,6 +691,9 @@ static struct file_struct *recv_file_ent
 	}
 #endif
 
+	if (tr_opt)
+		transliterate(thisname);
+
 	clean_fname(thisname, 0);
 
 	if (sanitize_paths)
--- old/options.c
+++ new/options.c
@@ -189,6 +189,8 @@ char *batch_name = NULL;
 int need_unsorted_flist = 0;
 char *iconv_opt = ICONV_OPTION;
 #endif
+char *tr_opt = NULL, *tr_left = NULL, *tr_right = NULL;
+int tr_right_len = 0;
 
 struct chmod_mode_struct *chmod_modes = NULL;
 
@@ -423,6 +425,7 @@ void usage(enum logcode F)
 #ifdef ICONV_OPTION
   rprintf(F, --iconv=CONVERT_SPECrequest charset conversion of filenames\n);
 #endif
+  rprintf(F, --tr=BAD/GOOD   transliterate filenames\n);
   rprintf(F, -4, --ipv4  prefer IPv4\n);
   rprintf(F, -6, --ipv6  prefer IPv6\n);
   rprintf(F, --version   print version number\n);
@@ -609,6 +612,7 @@ static struct poptOption long_options[] 
 #ifdef ICONV_OPTION
   {iconv,0,  POPT_ARG_STRING, iconv_opt, 0, 0, 0 },
 #endif
+  {tr,   0,  POPT_ARG_STRING, tr_opt, 0, 0, 0 },
   {ipv4,'4', POPT_ARG_VAL,default_af_hint, AF_INET, 0, 0 },
   {ipv6,'6', POPT_ARG_VAL,default_af_hint, AF_INET6, 0, 0 },
   {8-bit-output,'8', POPT_ARG_NONE,   allow_8bit_chars, 0, 0, 0 },
@@ -1620,6 +1624,30 @@ int parse_arguments(int *argc_p, const c
 		}
 	}
 
+	/* Easiest way to get a local server right is to do this on both sides */
+	if (tr_opt) {
+		if (*tr_opt) {
+			char *p;
+
+			/* Our mutation shouldn't interfere with transmission of the
+			 * original option to the server. */
+			tr_left = strdup(tr_opt);
+			p = strchr(tr_left, '/');
+			if (p != NULL) {
+*p = '\0';
+p++;
+tr_right = p;
+tr_right_len = strlen(tr_right);
+if (strchr(tr_right, '/') != NULL) {
+	snprintf(err_buf, sizeof err_buf,
+		--tr cannot transliterate slashes\n);
+	return 0;
+}
+			}
+		} else
+			tr_opt = NULL;
+	}
+
 	am_starting_up = 0;
 
 	return 1;
@@ -1988,6 +2016,12 @@ void server_options(char **args, int *ar
 	else if (remove_source_files)
 		args[ac++] = --remove-sent-files;
 
+	if (tr_opt  am_sender) {
+		if (asprintf(arg, --tr=%s, tr_opt)  0)
+			goto oom;
+		args[ac++] = arg;
+	}
+
 	*argc_p = ac;
 	return;
 
--- old/rsync.yo
+++ new/rsync.yo
@@ -422,6 +422,7 @@ to the detailed description below for a 
  --read-batch=FILE   read a batched update from FILE
  --protocol=NUM  force an older protocol version to be used
  --iconv=CONVERT_SPECrequest charset conversion of filenames
+ --tr=BAD/GOOD   transliterate filenames
  --checksum-seed=NUM set block/file checksum seed (advanced)
  -4, --ipv4  prefer IPv4
  -6, --ipv6  prefer IPv6
@@ -2013,6 +2014,22 @@ specifying matching rules that can match
 For instance, you can specify extra include/exclude rules if there are
 filename differences on the two sides that need to be accounted for.
 
+dit(bf(--tr=BAD/GOOD)) Transliterates filenames on the receiver, after the

Re: rsync and translating restricted NTFS filename characters

2007-11-05 Thread Wayne Davison
On Tue, Nov 06, 2007 at 12:04:19AM -0500, Matt McCutchen wrote:
 This patch adds an option --tr=BAD/GOOD to transliterate filenames.

Both sides need identical file names in the list when sorting, otherwise
a name could sort into different spots on each side.  The iconv code
deals with this by using an index into the unsorted file list, so you
might want to make use of that code.

..wayne..
-- 
To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html


rsync and translating restricted NTFS filename characters

2007-10-30 Thread Jeff Weber
I am backing up files from Linux to a NTFS using rsync.  I have the NTFS
mounted on Linux via CIFS.  I am discovering errors while attempting to
backup files with restricted NTFS characters, like : [colon].  For
example, I am unable to backup my crucial maildirs, as a colon is
encoded in each Linux file path.

Is there a solution to rsync Linux file paths with restricted NTFS
characters, to NTFS?  Perhaps there exists a solution to remap. say,
Linux path ; [colon] to an alternate character ! [exclamation mark] on
NTFS ?

thanks,
Jeff

-- 
To unsubscribe or change options: https://lists.samba.org/mailman/listinfo/rsync
Before posting, read: http://www.catb.org/~esr/faqs/smart-questions.html