Module Name:    src
Committed By:   kre
Date:           Sun Dec 11 08:23:11 UTC 2022

Modified Files:
        src/bin/sh: miscbltin.c sh.1

Log Message:
It appears that POSIX intends to add a -d X option to the read command
in its next version, so it can be used as -d '' (to specify a \0 end
character for the record read, rather than the default \n) to accompany
find -print0 and xargs -0 options (also likely to be added).

Add support for -d now.   While here fix a bug where escaped nul
chars (\ \0) in non-raw mode were not being dropped, as they are
when not escaped (if not dropped, they're still not used in any
useful way, they just ended the value at that point).


To generate a diff of this commit:
cvs rdiff -u -r1.52 -r1.53 src/bin/sh/miscbltin.c
cvs rdiff -u -r1.251 -r1.252 src/bin/sh/sh.1

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/bin/sh/miscbltin.c
diff -u src/bin/sh/miscbltin.c:1.52 src/bin/sh/miscbltin.c:1.53
--- src/bin/sh/miscbltin.c:1.52	Fri Aug 19 12:52:31 2022
+++ src/bin/sh/miscbltin.c	Sun Dec 11 08:23:10 2022
@@ -1,4 +1,4 @@
-/*	$NetBSD: miscbltin.c,v 1.52 2022/08/19 12:52:31 kre Exp $	*/
+/*	$NetBSD: miscbltin.c,v 1.53 2022/12/11 08:23:10 kre Exp $	*/
 
 /*-
  * Copyright (c) 1991, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)miscbltin.c	8.4 (Berkeley) 5/4/95";
 #else
-__RCSID("$NetBSD: miscbltin.c,v 1.52 2022/08/19 12:52:31 kre Exp $");
+__RCSID("$NetBSD: miscbltin.c,v 1.53 2022/12/11 08:23:10 kre Exp $");
 #endif
 #endif /* not lint */
 
@@ -91,6 +91,7 @@ readcmd(int argc, char **argv)
 {
 	char **ap;
 	char c;
+	char end;
 	int rflag;
 	char *prompt;
 	const char *ifs;
@@ -102,13 +103,21 @@ readcmd(int argc, char **argv)
 	int saveall = 0;
 	ptrdiff_t wordlen = 0;
 
+	end = '\n';				/* record delimiter */
 	rflag = 0;
 	prompt = NULL;
-	while ((i = nextopt("p:r")) != '\0') {
-		if (i == 'p')
+	while ((i = nextopt("d:p:r")) != '\0') {
+		switch (i) {
+		case 'd':
+			end = *optionarg;	/* even if '\0' */
+			break;
+		case 'p':
 			prompt = optionarg;
-		else
+			break;
+		case 'r':
 			rflag = 1;
+			break;
+		}
 	}
 
 	if (*(ap = argptr) == NULL)
@@ -131,19 +140,19 @@ readcmd(int argc, char **argv)
 			status = 1;
 			break;
 		}
-		if (c == '\0')
-			continue;
-		if (c == '\\' && !rflag) {
+		if (c == '\\' && c != end && !rflag) {
 			if (read(0, &c, 1) != 1) {
 				status = 1;
 				break;
 			}
-			if (c != '\n')
+			if (c != '\n')	/* \ \n is always just removed */
 				goto wdch;
 			continue;
 		}
-		if (c == '\n')
+		if (c == end)
 			break;
+		if (c == '\0')
+			continue;
 		if (strchr(ifs, c))
 			is_ifs = strchr(" \t\n", c) ? 1 : 2;
 		else
@@ -167,6 +176,8 @@ readcmd(int argc, char **argv)
 
 		if (is_ifs == 0) {
   wdch:;
+			if (c == '\0')	/* always ignore attempts to input \0 */
+				continue;
 			/* append this character to the current variable */
 			startword = 0;
 			if (saveall)

Index: src/bin/sh/sh.1
diff -u src/bin/sh/sh.1:1.251 src/bin/sh/sh.1:1.252
--- src/bin/sh/sh.1:1.251	Sun Oct 30 01:19:08 2022
+++ src/bin/sh/sh.1	Sun Dec 11 08:23:10 2022
@@ -1,4 +1,4 @@
-.\"	$NetBSD: sh.1,v 1.251 2022/10/30 01:19:08 kre Exp $
+.\"	$NetBSD: sh.1,v 1.252 2022/12/11 08:23:10 kre Exp $
 .\" Copyright (c) 1991, 1993
 .\"	The Regents of the University of California.  All rights reserved.
 .\"
@@ -31,8 +31,7 @@
 .\"
 .\"	@(#)sh.1	8.6 (Berkeley) 5/4/95
 .\"
-.\" RIP Noi, October 6, 1959 --
-.Dd August 26, 2022
+.Dd December 9, 2022
 .Dt SH 1
 .\" everything except c o and s (keep them ordered)
 .ds flags abCEeFfhIiLlmnpquVvXx
@@ -3583,15 +3582,21 @@ the program will use
 and the built-in uses a separately cached value.
 .\"
 .Pp
-.It Ic read Oo Fl p Ar prompt Oc Oo Fl r Oc Ar variable Op Ar ...
+.It Ic read Oo Fl d Ar delim Oc Oo Fl p Ar prompt Oc Oo Fl r Oc Ar variable Op Ar ...
 The
 .Ar prompt
 is printed on standard error if the
 .Fl p
 option is specified and the standard input is a terminal.
-Then a line is read from the standard input.
-The trailing newline is deleted from the
-line and the line is split as described in the field splitting section of the
+Then a record, terminated by the
+first character of
+.Ar delim
+if the
+.Fl d
+option was given, or a newline character otherwise,
+is read from the standard input.
+The ending delimiter is deleted from the
+record which is then split as described in the field splitting section of the
 .Sx Word Expansions
 section above.
 The pieces are assigned to the
@@ -3614,17 +3619,41 @@ which case failure is returned.
 By default, unless the
 .Fl r
 option is specified, the backslash
-.Dq \e
-acts as an escape character, causing the following character to be treated
-literally.
+.Pq Ql \e
+acts as an escape character,
+causing the following character,
+when that character is the escape character, or end delimiter character,
+to be treated literally when reading the record.
 This is the only form of quoting that applies.
 If an unescaped backslash is followed by a newline,
 the backslash and the newline will be deleted,
-and replaced by the contents of the following line,
+and replaced by the contents from the following line,
 which is processed as if it had been part of the original line.
 This includes reading yet more input if necessary,
-until a line is read that is not terminated by
-an unescaped backslash immediately before the newline.
+until a line is read that contains or ends with an unescaped
+copy of the delimiter character.
+If the end delimiter (when it is not a newline) is escaoed,
+it is treated as a normal character, and
+.Ic read
+continues looking for an unescaped end delimiter character.
+No other escape sequences are meaningful, the escape character
+is simply ignored.
+This happens as the record is read,
+before field splitting occurs.
+When
+.Fl r
+is used,
+no escaping occurs,
+no line joining happens,
+any input backslash is simply an input character.
+.Pp
+Note that if
+.Ar delim
+is given as an empty string, the nul character
+.Pq Ql \e0
+is used as the delimiter.
+Other than this use, any nul characters in the input
+stream are silently deleted.
 .\"
 .Pp
 .It Ic readonly Ar name Ns Oo =value Oc ...

Reply via email to