Author: ache
Date: Mon Jul 11 21:23:50 2016
New Revision: 302594
URL: https://svnweb.freebsd.org/changeset/base/302594

Log:
  1) Following r302512 (remove collation support for [a-z]-ranges in libc)
  remove collation support for a-z ranges here too.
  It was implemented for single byte locales only in any case.
  
  2) Reduce [Cc]flag loop to WCHAR_MAX, WINT_MAX here includes WEOF which is
  not a character.
  
  3) Optimize [Cc]flag case: don't repeatedly add the last character of
  string2 to squeeze cset when string2 reach its EOS state.
  
  4) Reflect in the manpage that [=equiv=] is implemented for single
  byte locales only.

Modified:
  head/usr.bin/tr/str.c
  head/usr.bin/tr/tr.1
  head/usr.bin/tr/tr.c

Modified: head/usr.bin/tr/str.c
==============================================================================
--- head/usr.bin/tr/str.c       Mon Jul 11 20:15:46 2016        (r302593)
+++ head/usr.bin/tr/str.c       Mon Jul 11 21:23:50 2016        (r302594)
@@ -53,7 +53,7 @@ static int      backslash(STR *, int *);
 static int     bracket(STR *);
 static void    genclass(STR *);
 static void    genequiv(STR *);
-static int      genrange(STR *, int);
+static int     genrange(STR *);
 static void    genseq(STR *);
 
 wint_t
@@ -93,7 +93,7 @@ next(STR *s)
                }
 
                /* We can start a range at any time. */
-               if (s->str[0] == '-' && genrange(s, is_octal))
+               if (s->str[0] == '-' && genrange(s))
                        return (next(s));
                return (1);
        case RANGE:
@@ -237,18 +237,16 @@ genequiv(STR *s)
 }
 
 static int
-genrange(STR *s, int was_octal)
+genrange(STR *s)
 {
-       int stopval, octal;
+       int stopval;
        char *savestart;
-       int n, cnt, *p;
        size_t clen;
        wchar_t wc;
 
-       octal = 0;
        savestart = s->str;
        if (*++s->str == '\\')
-               stopval = backslash(s, &octal);
+               stopval = backslash(s, NULL);
        else {
                clen = mbrtowc(&wc, s->str, MB_LEN_MAX, NULL);
                if (clen == (size_t)-1 || clen == (size_t)-2)
@@ -256,37 +254,13 @@ genrange(STR *s, int was_octal)
                stopval = wc;
                s->str += clen;
        }
-       /*
-        * XXX Characters are not ordered according to collating sequence in
-        * multibyte locales.
-        */
-       if (octal || was_octal || MB_CUR_MAX > 1) {
-               if (stopval < s->lastch) {
-                       s->str = savestart;
-                       return (0);
-               }
-               s->cnt = stopval - s->lastch + 1;
-               s->state = RANGE;
-               --s->lastch;
-               return (1);
-       }
-       if (charcoll((const void *)&stopval, (const void *)&(s->lastch)) < 0) {
+       if (stopval < s->lastch) {
                s->str = savestart;
                return (0);
        }
-       if ((s->set = p = malloc((NCHARS_SB + 1) * sizeof(int))) == NULL)
-               err(1, "genrange() malloc");
-       for (cnt = 0; cnt < NCHARS_SB; cnt++)
-               if (charcoll((const void *)&cnt, (const void *)&(s->lastch)) >= 
0 &&
-                   charcoll((const void *)&cnt, (const void *)&stopval) <= 0)
-                       *p++ = cnt;
-       *p = OOBCH;
-       n = p - s->set;
-
-       s->cnt = 0;
-       s->state = SET;
-       if (n > 1)
-               mergesort(s->set, n, sizeof(*(s->set)), charcoll);
+       s->cnt = stopval - s->lastch + 1;
+       s->state = RANGE;
+       --s->lastch;
        return (1);
 }
 

Modified: head/usr.bin/tr/tr.1
==============================================================================
--- head/usr.bin/tr/tr.1        Mon Jul 11 20:15:46 2016        (r302593)
+++ head/usr.bin/tr/tr.1        Mon Jul 11 21:23:50 2016        (r302594)
@@ -164,14 +164,6 @@ as defined by the collation sequence.
 If either or both of the range endpoints are octal sequences, it
 represents the range of specific coded values between the
 range endpoints, inclusive.
-.Pp
-.Bf Em
-See the
-.Sx COMPATIBILITY
-section below for an important note regarding
-differences in the way the current
-implementation interprets range expressions differently from
-previous implementations.
 .Ef
 .It [:class:]
 Represents all characters belonging to the defined character class.
@@ -307,22 +299,16 @@ Remove diacritical marks from all accent
 .Pp
 .Dl "tr \*q[=e=]\*q \*qe\*q"
 .Sh COMPATIBILITY
-Previous
 .Fx
 implementations of
 .Nm
 did not order characters in range expressions according to the current
-locale's collation order, making it possible to convert unaccented Latin
-characters (esp.\& as found in English text) from upper to lower case using
+locale's collation order, making it possible to convert accented Latin
+characters from upper to lower case using
 the traditional
 .Ux
 idiom of
 .Dq Li "tr A-Z a-z" .
-Since
-.Nm
-now obeys the locale's collation order, this idiom may not produce
-correct results when there is not a 1:1 mapping between lower and
-upper case, or when the order of characters within the two cases differs.
 As noted in the
 .Sx EXAMPLES
 section above, the character class expressions
@@ -334,6 +320,9 @@ should be used instead of explicit chara
 and
 .Dq Li A-Z .
 .Pp
+.Dq Li [=equiv=]
+expression is implemented for single byte locales only.
+.Pp
 System V has historically implemented character ranges using the syntax
 .Dq Li [c-c]
 instead of the

Modified: head/usr.bin/tr/tr.c
==============================================================================
--- head/usr.bin/tr/tr.c        Mon Jul 11 20:15:46 2016        (r302593)
+++ head/usr.bin/tr/tr.c        Mon Jul 11 21:23:50 2016        (r302594)
@@ -68,10 +68,8 @@ static void usage(void);
 int
 main(int argc, char **argv)
 {
-       static int carray[NCHARS_SB];
        struct cmap *map;
        struct cset *delete, *squeeze;
-       int n, *p;
        int Cflag, cflag, dflag, sflag, isstring2;
        wint_t ch, cnt, lastch;
 
@@ -254,7 +252,7 @@ main(int argc, char **argv)
                (void)next(&s2);
        }
 endloop:
-       if (cflag || (Cflag && MB_CUR_MAX > 1)) {
+       if (cflag || Cflag) {
                /*
                 * This is somewhat tricky: since the character set is
                 * potentially huge, we need to avoid allocating a map
@@ -268,14 +266,15 @@ endloop:
                 */
                s2.str = argv[1];
                s2.state = NORMAL;
-               for (cnt = 0; cnt < WINT_MAX; cnt++) {
+               for (cnt = 0; cnt <= WCHAR_MAX; cnt++) {
                        if (Cflag && !iswrune(cnt))
                                continue;
                        if (cmap_lookup(map, cnt) == OOBCH) {
-                               if (next(&s2))
+                               if (next(&s2)) {
                                        cmap_add(map, cnt, s2.lastch);
-                               if (sflag)
-                                       cset_add(squeeze, s2.lastch);
+                                       if (sflag)
+                                               cset_add(squeeze, s2.lastch);
+                               }
                        } else
                                cmap_add(map, cnt, cnt);
                        if ((s2.state == EOS || s2.state == INFINITE) &&
@@ -283,30 +282,6 @@ endloop:
                                break;
                }
                cmap_default(map, s2.lastch);
-       } else if (Cflag) {
-               for (p = carray, cnt = 0; cnt < NCHARS_SB; cnt++) {
-                       if (cmap_lookup(map, cnt) == OOBCH && iswrune(cnt))
-                               *p++ = cnt;
-                       else
-                               cmap_add(map, cnt, cnt);
-               }
-               n = p - carray;
-               if (Cflag && n > 1)
-                       (void)mergesort(carray, n, sizeof(*carray), charcoll);
-
-               s2.str = argv[1];
-               s2.state = NORMAL;
-               for (cnt = 0; cnt < n; cnt++) {
-                       (void)next(&s2);
-                       cmap_add(map, carray[cnt], s2.lastch);
-                       /*
-                        * Chars taken from s2 can be different this time
-                        * due to lack of complex upper/lower processing,
-                        * so fill string2 again to not miss some.
-                        */
-                       if (sflag)
-                               cset_add(squeeze, s2.lastch);
-               }
        }
 
        cset_cache(squeeze);
@@ -351,16 +326,6 @@ setup(char *arg, STR *str, int cflag, in
        return (cs);
 }
 
-int
-charcoll(const void *a, const void *b)
-{
-       static char sa[2], sb[2];
-
-       sa[0] = *(const int *)a;
-       sb[0] = *(const int *)b;
-       return (strcoll(sa, sb));
-}
-
 static void
 usage(void)
 {
_______________________________________________
svn-src-head@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"

Reply via email to