Here's a new diff with the issues addressed.
But instead of a check I fixed the starting offset, which also caused
issues with y///.

OK?

Index: compile.c
===================================================================
RCS file: /cvs/src/usr.bin/sed/compile.c,v
retrieving revision 1.43
diff -u -p -u -r1.43 compile.c
--- compile.c   8 Dec 2017 18:41:59 -0000       1.43
+++ compile.c   11 Dec 2017 09:05:56 -0000
@@ -59,7 +59,7 @@ static struct labhash {
 
 static char     *compile_addr(char *, struct s_addr *);
 static char     *compile_ccl(char **, char *);
-static char     *compile_delimited(char *, char *, int);
+static char     *compile_delimited(char *, char *);
 static char     *compile_flags(char *, struct s_subst *);
 static char     *compile_re(char *, regex_t **);
 static char     *compile_subst(char *, struct s_subst *);
@@ -351,7 +351,7 @@ nonsel:             /* Now parse the command */
  * with the processed string.
  */
 static char *
-compile_delimited(char *p, char *d, int is_tr)
+compile_delimited(char *p, char *d)
 {
        char c;
 
@@ -376,10 +376,7 @@ compile_delimited(char *p, char *d, int 
                        p += 2;
                        continue;
                } else if (*p == '\\' && p[1] == '\\') {
-                       if (is_tr)
-                               p++;
-                       else
-                               *d++ = *p++;
+                       *d++ = *p++;
                } else if (*p == c) {
                        *d = '\0';
                        return (p + 1);
@@ -436,7 +433,7 @@ compile_re(char *p, regex_t **repp)
        char *re;
 
        re = xmalloc(strlen(p) + 1); /* strlen(re) <= strlen(p) */
-       p = compile_delimited(p, re, 0);
+       p = compile_delimited(p, re);
        if (p && strlen(re) == 0) {
                *repp = NULL;
                free(re);
@@ -603,46 +600,63 @@ compile_flags(char *p, struct s_subst *s
  * Compile a translation set of strings into a lookup table.
  */
 static char *
-compile_tr(char *p, char **transtab)
+compile_tr(char *old, char **transtab)
 {
        int i;
-       char *lt, *op, *np;
-       char *old = NULL, *new = NULL;
+       char delimiter, check[UCHAR_MAX];
+       char *new, *end;
+
+       memset(check, 0, sizeof(check));
+       delimiter = *old;
+       if (delimiter == '\\')
+               error(COMPILE, "\\ can not be used as a string delimiter");
+       else if (delimiter == '\n' || delimiter == '\0')
+               error(COMPILE, "newline can not be used as a string delimiter");
+
+       new = old++;
+       do {
+               if ((new = strchr(new + 1, delimiter)) == NULL)
+                       error(COMPILE, "unterminated transform source string");
+       } while (*(new - 1) == '\\' && *(new -2) != '\\');
+       *new = '\0';
+       end = new++;
+       do {
+               if ((end = strchr(end + 1, delimiter)) == NULL)
+                       error(COMPILE, "unterminated transform target string");
+       } while (*(end -1) == '\\' && *(end -2) != '\\');
+       *end = '\0';
 
-       if (*p == '\0' || *p == '\\')
-               error(COMPILE,
-"transform pattern can not be delimited by newline or backslash");
-       old = xmalloc(strlen(p) + 1);
-       p = compile_delimited(p, old, 1);
-       if (p == NULL) {
-               error(COMPILE, "unterminated transform source string");
-               goto bad;
-       }
-       new = xmalloc(strlen(p) + 1);
-       p = compile_delimited(--p, new, 1);
-       if (p == NULL) {
-               error(COMPILE, "unterminated transform target string");
-               goto bad;
-       }
-       EATSPACE();
-       if (strlen(new) != strlen(old)) {
-               error(COMPILE, "transform strings are not the same length");
-               goto bad;
-       }
        /* We assume characters are 8 bits */
-       lt = xmalloc(UCHAR_MAX + 1);
+       *transtab = xmalloc(UCHAR_MAX + 1);
        for (i = 0; i <= UCHAR_MAX; i++)
-               lt[i] = (char)i;
-       for (op = old, np = new; *op; op++, np++)
-               lt[(u_char)*op] = *np;
-       *transtab = lt;
-       free(old);
-       free(new);
-       return (p);
-bad:
-       free(old);
-       free(new);
-       return (NULL);
+               (*transtab)[i] = (char)i;
+
+       while (*old != '\0' && *new != '\0') {
+               if (*old == '\\') {
+                       old++;
+                       if (*old == 'n')
+                               *old = '\n';
+                       else if (*old != delimiter && *old != '\\')
+                               error(COMPILE, "Unexpected character after "
+                                   "backslash");
+                       
+               }
+               if (*new == '\\') {
+                       new++;
+                       if (*new == 'n')
+                               *new = '\n';
+                       else if (*new != delimiter && *new != '\\')
+                               error(COMPILE, "Unexpected character after "
+                                   "backslash");
+               }
+               if (check[*old] == 1)
+                       error(COMPILE, "Repeated character in source string");
+               check[*old] = 1;
+               (*transtab)[*old++] = *new++;
+       }
+       if (*old != '\0' || *new != '\0')
+               error(COMPILE, "transform strings are not the same length");
+       return end + 1;
 }
 
 /*
Index: sed.1
===================================================================
RCS file: /cvs/src/usr.bin/sed/sed.1,v
retrieving revision 1.52
diff -u -p -u -r1.52 sed.1
--- sed.1       8 Dec 2017 18:41:59 -0000       1.52
+++ sed.1       11 Dec 2017 09:05:56 -0000
@@ -482,14 +482,29 @@ in the pattern space with the correspond
 .Ar string2 .
 Any character other than a backslash or newline can be used instead of
 a slash to delimit the strings.
+.Pp
 Within
 .Ar string1
 and
 .Ar string2 ,
-a backslash followed by any character other than a newline is that literal
-character, and a backslash followed by an
+a backslash followed by another backslash
+is replaced by a single backslash,
+a backslash followed by an
 .Sq n
-is replaced by a newline character.
+is replaced by a newline character,
+and a backslash followed by the delimiting character
+is replaced by that character,
+causing it to be treated literally,
+with the exception of the
+.Sq n
+character,
+which will still be treated like a newline character.
+It is an error for a backslash to not be followed by another backslash,
+.Sq n ,
+or the delimiting character,
+or for
+.Ar string1
+to contain repeating characters,
 .It [0addr] Ns Ic \&: Ns Ar label
 This function does nothing; it bears a
 .Ar label

Reply via email to