After further research and testing, I've produced a patch which handles 
all filenames with special characters by copying the GNU quoting 
behavior, and also treats anything following a tab as a timestamp. This 
increases both ability to handle possible filenames and ability to 
apply patches found in the field.

Specifically, diff quotes and escapes filenames according to the 
following rules.
* Surround the filename with quotes if it contains a byte less than 
0x20, a byte greater than or equal to 0x80, space, backslash, or quote.
* Replace alert, backspace, form feed, newline, carriage return, tab, 
vertical tab, backslash, and quote with \a, \b, \f, \n, \r, \t, \v, \\, 
and \", respectively.
* Replace other bytes less than 0x20 or greater than or equal to 0x80 
with a backslash followed by the three octal digits representing the 
value of the byte.
* Treat valid UTF-8 characters involving sequences of bytes greater 
than or equal to 0x80 the same as other sequences of such bytes.

patch processes quoted and escaped filenames according to the following 
rules.
* If the filename does not begin with a quote, do not modify the 
filename.
* Remove quotes surrounding the filename.
* In quoted filenames, replace \a, \b, \f, \n, \r, \t, \v, \\, and \" 
with alert, backspace, form feed, newline, carriage return, tab, 
vertical tab, backslash, and quote, respectively.
* In quoted filenames, replace a backslash followed by octal digits 
with the byte with that value in octal.
In diff, quote and escape filenames according to the following rules.
* Surround the filename with quotes if it contains a byte less than 0x20,
  a byte greater than or equal to 0x80, space, backslash, or quote.
* Replace alert, backspace, form feed, newline, carriage return, tab, vertical
  tab, backslash, and quote with \a, \b, \f, \n, \r, \t, \v, \\, and \",
  respectively.
* Replace other bytes less than 0x20 or greater than or equal to 0x80 with a
  backslash followed by the three octal digits representing the value of the
  byte.
* Treat valid UTF-8 characters involving sequences of bytes greater than or
  equal to 0x80 the same as other sequences of such bytes.

In patch, process quoted and escaped filenames according to the following
rules.
* If the filename does not begin with a quote, do not modify the filename.
* Remove quotes surrounding the filename.
* In quoted filenames, replace \a, \b, \f, \n, \r, \t, \v, \\, and \" with
  alert, backspace, form feed, newline, carriage return, tab, vertical tab,
  backslash, and quote, respectively.
* In quoted filenames, replace a backslash followed by octal digits with the
  byte with that value in octal.

Also, in patch, treat anything on a +++ or --- line following a tab character
after the beginning of the filename as a timestamp, rather than part of the
filename.

--- toybox/toys/pending/diff.c	2019-08-29 10:50:09.000000000 -0400
+++ toybox-patch/toys/pending/diff.c	2019-08-29 10:40:25.000000000 -0400
@@ -524,12 +524,64 @@ static int cmp(const void *p1, const voi
    return strcmp(* (char * const *)p1, * (char * const *)p2);
 }
 
+// quote and escape filenames that have awkward characters
+char *quote_filename(char *filename)
+{
+  char *to = "abfnrtv\"\\", *from = "\a\b\f\n\r\t\v\"\\";
+  char *result, *s, *t;
+  size_t len = 0;
+  int quote = 0;
+
+  // calculate memory usage and presence of quotes
+  for (s = filename; *s; s++) {
+    if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v'
+      || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\')
+    {
+      quote = 1;
+      len += 2;
+    } else if (*s == ' ') {
+      quote = 1;
+      len++;
+    } else if (*s < 0x20 || *s >= 0x80) {
+      quote = 1;
+      len += 4;
+    } else {
+      len++;
+    }
+  }
+
+  // construct the new string
+  result = xmalloc(len + (quote ? 2 : 0) + 1);
+  t = result;
+  if (quote) *t++ = '"';
+  for (s = filename; *s; s++) {
+    if (*s == '\a' || *s == '\b' || *s == '\f' || *s == '\r' || *s == '\v'
+      || *s == '\n' || *s == '\t' || *s == '"' || *s == '\\')
+    {
+      *t = '\\';
+      t[1] = to[strchr(from, *s) - from];
+      t += 2;
+    } else if (*s < 0x20 || *s >= 0x80) {
+      sprintf(t, "\\%.3o", *s);
+      t += 4;
+    } else {
+      *t++ = *s;
+    }
+  }
+  if (quote) *t++ = '"';
+  *t = 0;
+  return result;
+}
+
 static void show_label(char *prefix, char *filename, struct stat *sb)
 {
   char date[36];
+  char *quoted_file;
 
-  printf("%s %s\t%s\n", prefix, filename,
+  quoted_file = quote_filename(filename);
+  printf("%s %s\t%s\n", prefix, quoted_file,
     format_iso_time(date, sizeof(date), &sb->st_mtim));
+  free(quoted_file);
 }
 
 static void do_diff(char **files)
--- toybox/toys/posix/patch.c	2019-08-29 10:50:09.000000000 -0400
+++ toybox-patch/toys/posix/patch.c	2019-08-29 12:25:12.000000000 -0400
@@ -247,6 +247,35 @@ done:
   return TT.state;
 }
 
+// read a filename that has been quoted or escaped
+char *unquote_file(char *filename) {
+  char *s = filename, *result, *t, *u;
+  int quote = 0, ch;
+
+  // quoted and escaped filenames are larger than the original
+  result = xmalloc(strlen(filename) + 1);
+  t = result;
+  if (*s == '"') {
+    s++;
+    quote = 1;
+  }
+  for (; *s && !(quote && *s == '"' && !s[1]); s++) {
+    // don't accept escape sequences unless the filename is quoted
+    if (quote && *s == '\\' && s[1]) {
+      if (s[1] >= '0' && s[1] < '8') {
+        *t++ = strtoul(s + 1, &u, 8);
+        s = u - 1;
+      } else {
+        ch = unescape(s[1]);
+        *t++ = ch ? ch : s[1];
+		s++;
+      }
+    } else *t++ = *s;
+  }
+  *t = 0;
+  return result;
+}
+
 // Read a patch file and find hunks, opening/creating/deleting files.
 // Call apply_one_hunk() on each hunk.
 
@@ -322,13 +351,12 @@ void patch_main(void)
       finish_oldfile();
 
       // Trim date from end of filename (if any).  We don't care.
-      for (s = patchline+4; *s && (*s!='\t' || !isdigit(s[1])); s++)
-        if (*s=='\\' && s[1]) s++;
+      for (s = patchline+4; *s && *s!='\t'; s++);
       i = atoi(s);
       if (i>1900 && i<=1970) *name = xstrdup("/dev/null");
       else {
         *s = 0;
-        *name = xstrdup(patchline+4);
+        *name = unquote_file(patchline+4);
       }
 
       // We defer actually opening the file because svn produces broken
_______________________________________________
Toybox mailing list
[email protected]
http://lists.landley.net/listinfo.cgi/toybox-landley.net

Reply via email to