On 2025-01-05 11:19, Jim Meyering wrote:
Technically, I don't see how that POSIX guidance for patch can justify
this behavior, since patch's output file name is just "b"

Thanks, good point. I installed the attached to fix that. The basic idea is that "patch" now acts as if openat etc. follow the POSIX.1-2024-encouraged behavior, even on platforms that don't have that behavior.


IMHO, it is fine and even desirable to discourage
NL-afflcted files.

Yes, I had the same thought.

'patch' is a good program to try this new behavior out, as it's not used that often any more, the new behavior shouldn't be triggered except on unusual (and likely malicious) input, and the people who still use 'patch' are generally experts who will know how to deal with the change.

I'm hoping that we don't water the change down to merely warn about the newlines. When I'm running 'patch' I'm likely to miss such warnings (especially for a long patch), and I'm a well-known target for malicious outsiders. I would rather have 'patch' reject these file names.

We could add a 'patch' option to accept file names with newlines. But would that be wise? More-secure operating systems are likely to follow the POSIX.1-2024 advice and override whatever 'patch' does.
From 1da6bf84db2ed0be88ccb47139256e48243a75f0 Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Wed, 8 Jan 2025 09:51:26 -0800
Subject: [PATCH] Check for newlines only when creating a file name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also, check only the last file name component.
In other words, mimic operating systems that follow POSIX.1-2024’s
encouragement to fail with EILSEQ when openat etc. create a file name.
This is more conservative than the previous patch to prohibit
newlines in file names.
* src/patch.c (main, backup_file_name_option, get_some_switches):
* src/util.c (parse_c_string, make_tempfile):
Don’t check for newlines in a file name unless we are definitely
creating a file, as it’s harmless to read and stat file with
newlines in their names if the OS allows that.
* src/safe.c (traverse_another_path, traverse_path): New arg
REJECT_NL.  If set, reject any file name whose last component
contains a newline.  Also, do not do traversal if unsafe.  All
callers changed to pass true if they are creating the file name,
false otherwise, and to not bother checking whether we are unsafe.
(safe_open): Special case for when O_CREAT is set but O_EXCL is not.
* src/util.c (pfatal): Report "Invalid byte sequence" for EILSEQ.
This POSIX wording is less confusing than glibc's "Invalid or
incomplete multibyte or wide character".  Also, this lets
the test cases check for this wording.
* tests/bad-filenames: Adjust to new diagnostic wording.
---
 src/patch.c         |  10 +---
 src/safe.c          | 132 ++++++++++++++++----------------------------
 src/util.c          |  28 ++++++----
 tests/bad-filenames |  32 +++++++++--
 4 files changed, 90 insertions(+), 112 deletions(-)

diff --git a/src/patch.c b/src/patch.c
index 2685f66..fc41452 100644
--- a/src/patch.c
+++ b/src/patch.c
@@ -277,11 +277,7 @@ main (int argc, char **argv)
 	  else if (pch_copy () || pch_rename ())
 	    outname = pch_name (! reverse_flag);
 	  else
-	    {
-	      if (strchr (inname, '\n'))
-		fatal ("input/output file name contains newline");
-	      outname = inname;
-	    }
+	    outname = inname;
 	}
 
       if (pch_git_diff () && ! skip_rest_of_patch)
@@ -904,8 +900,6 @@ backup_file_name_option (char const *option_type)
 {
   if (!*optarg)
     fatal ("backup %s is empty", option_type);
-  if (strchr (optarg, '\n'))
-    fatal ("backup %s contains newline", option_type);
   return xstrdup (optarg);
 }
 
@@ -992,8 +986,6 @@ get_some_switches (int argc, char **argv)
 		noreverse_flag = true;
 		break;
 	    case 'o':
-		if (strchr (optarg, '\n'))
-		  fatal ("output file name contains newline");
 		outfile = xstrdup (optarg);
 		break;
 	    case 'p':
diff --git a/src/safe.c b/src/safe.c
index 2613403..1085042 100644
--- a/src/safe.c
+++ b/src/safe.c
@@ -431,8 +431,12 @@ traverse_next (struct cached_dirfd *dir, char **path, int keepfd,
   return entry;
 }
 
-/* Traverse PATHNAME.  Updates PATHNAME to point to the last path component and
-   returns a file descriptor to its parent directory (which can be AT_FDCWD).
+/* Traverse PATHNAME.
+
+   If REJECT_NL, fail if the PATHNAME's last component contains a newline.
+   Otherwise, if unsafe or PATHNAME is absolute, just return AT_FDCWD.
+   Otherwise, update PATHNAME to point to the last path component and
+   return a file descriptor to its parent directory (which can be AT_FDCWD).
    If KEEPFD is nonnegative, make sure that any cache entry for it is not
    removed from the cache (and KEEPFD remains open).
 
@@ -442,14 +446,23 @@ traverse_next (struct cached_dirfd *dir, char **path, int keepfd,
    up are off the lru list but in the hash table.
     */
 static int
-traverse_another_path (char **pathname, int keepfd)
+traverse_another_path (char **pathname, bool reject_nl, int keepfd)
 {
+  char *path = *pathname;
+  char *last = last_component (path);
+  if (reject_nl && strchr (last, '\n'))
+    {
+      errno = EILSEQ;
+      return DIRFD_INVALID;
+    }
+  if (unsafe || last == path || IS_ABSOLUTE_FILE_NAME (path))
+    return AT_FDCWD;
+
   static struct cached_dirfd cwd = {
     .fd = AT_FDCWD,
   };
 
   intmax_t misses = dirfd_cache_misses;
-  char *path = *pathname;
   struct cached_dirfd *dir = &cwd;
   struct symlink *stack = nullptr;
   idx_t steps = count_path_components (path);
@@ -463,13 +476,6 @@ traverse_another_path (char **pathname, int keepfd)
       return DIRFD_INVALID;
     }
 
-  if (! *path || IS_ABSOLUTE_FILE_NAME (path))
-    return AT_FDCWD;
-
-  char *last = last_component (path);
-  if (last == path)
-    return AT_FDCWD;
-
   if (debug & 32)
     {
       idx_t full_pathlen = last - path;
@@ -553,20 +559,15 @@ fail:
 
 /* Just traverse PATHNAME; see traverse_another_path(). */
 static int
-traverse_path (char **pathname)
+traverse_path (char **pathname, bool reject_nl)
 {
-  return traverse_another_path (pathname, DIRFD_INVALID);
+  return traverse_another_path (pathname, reject_nl, DIRFD_INVALID);
 }
 
 static int
 safe_xstat (char *pathname, struct stat *buf, int flags)
 {
-  int dirfd;
-
-  if (unsafe)
-    return fstatat (AT_FDCWD, pathname, buf, flags);
-
-  dirfd = traverse_path (&pathname);
+  int dirfd = traverse_path (&pathname, false);
   if (dirfd == DIRFD_INVALID)
     return -1;
   return fstatat (dirfd, pathname, buf, flags);
@@ -590,14 +591,22 @@ safe_lstat (char *pathname, struct stat *buf)
 int
 safe_open (char *pathname, int flags, mode_t mode)
 {
-  int dirfd;
-
-  if (unsafe)
-    return open (pathname, flags, mode);
-
-  dirfd = traverse_path (&pathname);
+  int creat_excl = flags & (O_CREAT | O_EXCL);
+  int dirfd = traverse_path (&pathname, creat_excl == (O_CREAT | O_EXCL));
   if (dirfd == DIRFD_INVALID)
     return -1;
+
+  /* If O_CREAT is set but O_EXCL is not, traverse_path does not
+     suffice for checking file names with '\n', so check by hand.  */
+  if (creat_excl == O_CREAT
+      && strchr (last_component (pathname), '\n')
+      && faccessat (dirfd, pathname, F_OK, AT_EACCESS) < 0
+      && errno == ENOENT)
+    {
+      errno = EILSEQ;
+      return -1;
+    }
+
   return openat (dirfd, pathname, flags, mode);
 }
 
@@ -605,21 +614,15 @@ safe_open (char *pathname, int flags, mode_t mode)
 int
 safe_rename (char *oldpath, char *newpath)
 {
-  int olddirfd, newdirfd;
-  int ret;
-
-  if (unsafe)
-    return rename (oldpath, newpath);
-
-  olddirfd = traverse_path (&oldpath);
+  int olddirfd = traverse_path (&oldpath, false);
   if (olddirfd == DIRFD_INVALID)
     return -1;
 
-  newdirfd = traverse_another_path (&newpath, olddirfd);
+  int newdirfd = traverse_another_path (&newpath, true, olddirfd);
   if (newdirfd == DIRFD_INVALID)
     return -1;
 
-  ret = renameat (olddirfd, oldpath, newdirfd, newpath);
+  int ret = renameat (olddirfd, oldpath, newdirfd, newpath);
   if (! ret)
     {
       invalidate_cached_dirfd (olddirfd, oldpath);
@@ -632,12 +635,7 @@ safe_rename (char *oldpath, char *newpath)
 int
 safe_mkdir (char *pathname, mode_t mode)
 {
-  int dirfd;
-
-  if (unsafe)
-    return mkdir (pathname, mode);
-
-  dirfd = traverse_path (&pathname);
+  int dirfd = traverse_path (&pathname, true);
   if (dirfd == DIRFD_INVALID)
     return -1;
   return mkdirat (dirfd, pathname, mode);
@@ -647,17 +645,11 @@ safe_mkdir (char *pathname, mode_t mode)
 int
 safe_rmdir (char *pathname)
 {
-  int dirfd;
-  int ret;
-
-  if (unsafe)
-    return rmdir (pathname);
-
-  dirfd = traverse_path (&pathname);
+  int dirfd = traverse_path (&pathname, false);
   if (dirfd == DIRFD_INVALID)
     return -1;
 
-  ret = unlinkat (dirfd, pathname, AT_REMOVEDIR);
+  int ret = unlinkat (dirfd, pathname, AT_REMOVEDIR);
   if (! ret)
     invalidate_cached_dirfd (dirfd, pathname);
   return ret;
@@ -667,12 +659,7 @@ safe_rmdir (char *pathname)
 int
 safe_unlink (char *pathname)
 {
-  int dirfd;
-
-  if (unsafe)
-    return unlink (pathname);
-
-  dirfd = traverse_path (&pathname);
+  int dirfd = traverse_path (&pathname, false);
   if (dirfd == DIRFD_INVALID)
     return -1;
   return unlinkat (dirfd, pathname, 0);
@@ -682,12 +669,7 @@ safe_unlink (char *pathname)
 int
 safe_symlink (char const *target, char *linkpath)
 {
-  int dirfd;
-
-  if (unsafe)
-    return symlink (target, linkpath);
-
-  dirfd = traverse_path (&linkpath);
+  int dirfd = traverse_path (&linkpath, true);
   if (dirfd == DIRFD_INVALID)
     return -1;
   return symlinkat (target, dirfd, linkpath);
@@ -697,12 +679,7 @@ safe_symlink (char const *target, char *linkpath)
 int
 safe_chmod (char *pathname, mode_t mode)
 {
-  int dirfd;
-
-  if (unsafe)
-    return chmod (pathname, mode);
-
-  dirfd = traverse_path (&pathname);
+  int dirfd = traverse_path (&pathname, false);
   if (dirfd == DIRFD_INVALID)
     return -1;
   return fchmodat (dirfd, pathname, mode, 0);
@@ -712,12 +689,7 @@ safe_chmod (char *pathname, mode_t mode)
 int
 safe_lchown (char *pathname, uid_t owner, gid_t group)
 {
-  int dirfd;
-
-  if (unsafe)
-    return lchown (pathname, owner, group);
-
-  dirfd = traverse_path (&pathname);
+  int dirfd = traverse_path (&pathname, false);
   if (dirfd == DIRFD_INVALID)
     return -1;
   return fchownat (dirfd, pathname, owner, group, AT_SYMLINK_NOFOLLOW);
@@ -727,12 +699,7 @@ safe_lchown (char *pathname, uid_t owner, gid_t group)
 int
 safe_lutimens (char *pathname, struct timespec const times[2])
 {
-  int dirfd;
-
-  if (unsafe)
-    return utimensat (AT_FDCWD, pathname, times, AT_SYMLINK_NOFOLLOW);
-
-  dirfd = traverse_path (&pathname);
+  int dirfd = traverse_path (&pathname, false);
   if (dirfd == DIRFD_INVALID)
     return -1;
   return utimensat (dirfd, pathname, times, AT_SYMLINK_NOFOLLOW);
@@ -742,12 +709,7 @@ safe_lutimens (char *pathname, struct timespec const times[2])
 ssize_t
 safe_readlink (char *pathname, char *buf, size_t bufsiz)
 {
-  int dirfd;
-
-  if (unsafe)
-    return readlink (pathname, buf, bufsiz);
-
-  dirfd = traverse_path (&pathname);
+  int dirfd = traverse_path (&pathname, false);
   if (dirfd == DIRFD_INVALID)
     return -1;
   return readlinkat (dirfd, pathname, buf, bufsiz);
@@ -757,7 +719,7 @@ safe_readlink (char *pathname, char *buf, size_t bufsiz)
 int
 safe_access (char *pathname, int mode)
 {
-  int dirfd = unsafe ? AT_FDCWD : traverse_path (&pathname);
+  int dirfd = traverse_path (&pathname, false);
   if (dirfd == DIRFD_INVALID)
     return -1;
   return faccessat (dirfd, pathname, mode, AT_EACCESS);
diff --git a/src/util.c b/src/util.c
index 6278616..bcbd020 100644
--- a/src/util.c
+++ b/src/util.c
@@ -1018,7 +1018,8 @@ pfatal (char const *format, ...)
   va_start (args, format);
   vfprintf (stderr, format, args);
   va_end (args);
-  fprintf (stderr, " : %s\n", strerror (errnum));
+  fprintf (stderr, " : %s\n",
+	   errnum == EILSEQ ? "Invalid byte sequence" : strerror (errnum));
   fflush (stderr);
   fatal_exit ();
 }
@@ -1383,10 +1384,10 @@ init_time (void)
 }
 
 static char *
-parse_c_string (char const *str, char const **endp)
+parse_c_string (char const *s, char const **endp)
 {
   char *u, *v;
-  char const *s = str;
+
   assert (*s == '"');
   s++;
   u = v = xmalloc (strlen (s));
@@ -1447,11 +1448,6 @@ parse_c_string (char const *str, char const **endp)
 	  default:
 	    goto fail;
 	}
-      if (c == '\n')
-	{
-	  int qlen = ckd_add (&qlen, s - str, 0) ? -1 : qlen;
-	  fatal ("quoted string %.*s...\" contains newline", qlen, str);
-	}
       *v++ = c;
     }
 
@@ -1777,12 +1773,20 @@ make_tempfile (struct outfile *out, char letter, char const *real_name,
 
   if (real_name && ! dry_run)
     {
-      idx_t namelen = strlen (real_name);
-      template = ximalloc (namelen + sizeof ".cXXXXXX");
-      sprintf (mempcpy (template, real_name, namelen), ".%cXXXXXX", letter);
+      /* Use the real name sans any newlines in the last component,
+	 followed by ".", LETTER, and 6 random chars.  */
+      char const *last = last_component (real_name);
+      idx_t realdirlen = last - real_name;
+      template = ximalloc (realdirlen + strlen (last) + sizeof ".cXXXXXX");
+      char *t = mempcpy (template, real_name, realdirlen);
+      for (char const *p = last; *p; p++)
+	t += (*t = *p) != '\n';
+      sprintf (t, ".%cXXXXXX", letter);
     }
   else
     {
+      /* In the temp dir, use the name "p", LETTER, and 6 random chars.  */
+
       static char const *tmpdir;
       static idx_t tmpdirlen;
 
@@ -1796,7 +1800,7 @@ make_tempfile (struct outfile *out, char letter, char const *real_name,
 	  for (int i = 0; i < ARRAY_SIZE (envnames); i++)
 	    {
 	      char const *val = getenv (envnames[i]);
-	      if (val && ! strchr (val, '\n'))
+	      if (val)
 		{
 		  tmpdir = val;
 		  break;
diff --git a/tests/bad-filenames b/tests/bad-filenames
index 01c5ca9..b663dad 100644
--- a/tests/bad-filenames
+++ b/tests/bad-filenames
@@ -126,23 +126,43 @@ check 'patch -f -p1 --dry-run < d.diff || echo status: $?' <<EOF
 checking file g
 EOF
 
+fixup='s/ file ab\.[^ ]*/ file ab.*/'
+check_ignoring_temp_file_name() {
+  pat=\'$1\'
+  unset arg1; eval ${2+"arg1=\\''$2'\\'"}
+  unset arg2; eval ${3+"arg2=\\''$3'\\'"}
+  check '
+    emit_patch '"$pat"' | patch '"$arg1"' '"$arg2"' >patch.out 2>&1
+    status=$?
+    sed "$fixup" patch.out
+    echo status: $status
+  '
+}
+
 nl='
 '
 nlname="a${nl}b"
 quoted_nlname='"a\nb"'
 
-check 'emit_patch "$quoted_nlname" | patch; echo status: $?' <<EOF
-$PATCH: **** quoted string "a\\n..." contains newline
+check_ignoring_temp_file_name "$quoted_nlname" <<EOF
+patching file 'a
+b'
+$PATCH: **** Can't rename file ab.* to 'a
+b' : Invalid byte sequence
 status: 2
 EOF
 
-check 'emit_patch foo | patch "$nlname"; echo status: $?' <<EOF
-$PATCH: **** input/output file name contains newline
+check_ignoring_temp_file_name foo "$nlname" <<EOF
+patching file 'a
+b'
+$PATCH: **** Can't rename file ab.* to 'a
+b' : Invalid byte sequence
 status: 2
 EOF
 
-check 'emit_patch foo | patch -o "$nlname"; echo status: $?' <<EOF
-$PATCH: **** output file name contains newline
+check_ignoring_temp_file_name foo -o "$nlname" <<EOF
+$PATCH: **** Can't create file 'a
+b' : Invalid byte sequence
 status: 2
 EOF
 
-- 
2.45.2

Reply via email to