The new sort was written to behave like GNU sort even when that
means violating POSIX.  Since our old sort never had the GNU behavior
we should restore the more standard behavior.

There are a couple of places where GNU and other sorts disagree.

1) Handling of the -b flag

    POSIX says -b only has an effect when -k is specified
    GNU ignores leading blanks with -b even if no -k

    POSIX says -b is only global when it preceeds -k
    GNU uses -b in all -k flags regardless of order

2) Handling of the -g flag

    Other sorts treat non-floating point keys as 0, similar to -n.
    GNU does not treat non-floating point keys as 0

   This means that "sort -nu" and "sort -gu" work differently, which
   seems like a bug.

With following diff sort passes all regress tests except for one
where the standard was (is?) unclear.

I've used #ifdef GNUSORT_COMPATIBILITY for now so that we can test
both behaviors.  After further testing this will probably go away.

 - todd

Index: usr.bin/sort/coll.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/coll.c,v
retrieving revision 1.8
diff -u -p -u -r1.8 coll.c
--- usr.bin/sort/coll.c 2 Apr 2015 22:14:51 -0000       1.8
+++ usr.bin/sort/coll.c 3 Apr 2015 15:41:05 -0000
@@ -406,11 +406,13 @@ preproc(struct bwstring *s, struct keys_
                struct bwstring *ret = NULL;
                struct sort_mods *sm = default_sort_mods;
 
+#ifdef GNUSORT_COMPATIBILITY
                if (sm->bflag) {
                        if (ret == NULL)
                                ret = bwsdup(s);
                        ret = ignore_leading_blanks(ret);
                }
+#endif
                if (sm->dflag) {
                        if (ret == NULL)
                                ret = bwsdup(s);
@@ -1099,9 +1101,10 @@ gnumcoll(struct key_value *kv1, struct k
                d1 = bwstod(kv1->k, &empty1);
                err1 = errno;
 
-               if (empty1)
+               if (empty1) {
                        kv1->hint->v.gh.notnum = true;
-               else if (err1 == 0) {
+                       kv1->hint->status = HS_INITIALIZED;
+               } else if (err1 == 0) {
                        kv1->hint->v.gh.d = d1;
                        kv1->hint->v.gh.nan = is_nan(d1);
                        kv1->hint->status = HS_INITIALIZED;
@@ -1116,9 +1119,10 @@ gnumcoll(struct key_value *kv1, struct k
                d2 = bwstod(kv2->k, &empty2);
                err2 = errno;
 
-               if (empty2)
+               if (empty2) {
                        kv2->hint->v.gh.notnum = true;
-               else if (err2 == 0) {
+                       kv2->hint->status = HS_INITIALIZED;
+               } else if (err2 == 0) {
                        kv2->hint->v.gh.d = d2;
                        kv2->hint->v.gh.nan = is_nan(d2);
                        kv2->hint->status = HS_INITIALIZED;
@@ -1130,10 +1134,15 @@ gnumcoll(struct key_value *kv1, struct k
 
        if (kv1->hint->status == HS_INITIALIZED &&
            kv2->hint->status == HS_INITIALIZED) {
+#ifdef GNUSORT_COMPATIBILITY
                if (kv1->hint->v.gh.notnum)
                        return kv2->hint->v.gh.notnum ? 0 : -1;
                else if (kv2->hint->v.gh.notnum)
                        return 1;
+#else
+               if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
+                       return 0;
+#endif
 
                if (kv1->hint->v.gh.nan)
                        return kv2->hint->v.gh.nan ?
@@ -1164,11 +1173,16 @@ gnumcoll(struct key_value *kv1, struct k
                err2 = errno;
        }
 
-       /* Non-value case: */
+       /* Non-value case */
+#ifdef GNUSORT_COMPATIBILITY
        if (empty1)
                return empty2 ? 0 : -1;
        else if (empty2)
                return 1;
+#else
+       if (empty1 && empty2)
+               return 0;
+#endif
 
        /* NAN case */
        if (is_nan(d1))
Index: usr.bin/sort/sort.1
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.1,v
retrieving revision 1.51
diff -u -p -u -r1.51 sort.1
--- usr.bin/sort/sort.1 1 Apr 2015 19:56:01 -0000       1.51
+++ usr.bin/sort/sort.1 3 Apr 2015 17:08:58 -0000
@@ -137,8 +137,6 @@ appear after
 .Fl k
 or results may be unexpected.
 .Bl -tag -width indent
-.It Fl b, Fl Fl ignore-leading-blanks
-Ignore leading blank characters when comparing lines.
 .It Fl d , Fl Fl dictionary-order
 Consider only blank spaces and alphanumeric characters in comparisons.
 .It Fl f , Fl Fl ignore-case
@@ -235,6 +233,11 @@ Otherwise,
 can be attached independently to each
 .Ar field
 argument of the key specifications.
+Note that
+.Fl b
+should not appear after
+.Fl k ,
+and that it has no effect unless key fields are specified.
 .It Xo
 .Fl k Ar field1 Ns Op , Ns Ar field2 ,
 .Fl Fl key Ns = Ns Ar field1 Ns Op , Ns Ar field2
@@ -582,6 +585,26 @@ Some are provided for compatibility with
 .Nm ,
 others are specific to this implementation.
 .Pp
+Some implementations of
+.Nm
+honor the
+.Fl b
+option even when no key fields are specified.
+This implementation follows historic practice and
+.St -p1003.1-2008
+in only honoring the
+.Fl b
+when
+.Fl k
+is also specified.
+.Pp
+The historic practice of allowing the
+.Fl o
+option to appear after the
+.Ar file
+is supported for compatibility with older versions of
+.Nm .
+.Pp
 The historic key notations
 .Cm \(pl Ns Ar pos1
 and
@@ -609,7 +632,7 @@ The fastest sort is with the C locale, o
 .Fl s .
 In general, the C locale is the fastest, followed by single-byte
 locales with multi-byte locales being the slowest.
-The correct collation order respected in all cases.
+Correct collation order is respected in all cases.
 For the key specification, the simpler to process the
 lines the faster the search will be.
 .Pp
Index: usr.bin/sort/sort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.c,v
retrieving revision 1.77
diff -u -p -u -r1.77 sort.c
--- usr.bin/sort/sort.c 3 Apr 2015 12:52:48 -0000       1.77
+++ usr.bin/sort/sort.c 3 Apr 2015 17:00:18 -0000
@@ -53,7 +53,12 @@
 #include "file.h"
 #include "sort.h"
 
-#define        OPTIONS "bCcdfgHhik:Mmno:RrS:st:T:uVz"
+#ifdef GNUSORT_COMPATIBILITY
+# define PERMUTE       ""
+#else
+# define PERMUTE       "+"
+#endif
+#define        OPTIONS PERMUTE"bCcdfgHhik:Mmno:RrS:st:T:uVz"
 
 static bool need_random;
 static const char *random_source;
@@ -859,6 +864,7 @@ main(int argc, char *argv[])
        char *outfile, *real_outfile, *sflag;
        int c;
        size_t i;
+       struct sort_mods *sm = &default_sort_mods_object;
        bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
            { false, false, false, false, false, false };
 
@@ -866,8 +872,6 @@ main(int argc, char *argv[])
        real_outfile = NULL;
        sflag = NULL;
 
-       struct sort_mods *sm = &default_sort_mods_object;
-
        init_tmp_files();
 
        set_signal_handler();
@@ -909,13 +913,16 @@ main(int argc, char *argv[])
                                sort_opts_vals.complex_sort = true;
                                sort_opts_vals.kflag = true;
 
-                               keys_num++;
-                               keys = sort_reallocarray(keys, keys_num,
+                               keys = sort_reallocarray(keys, keys_num + 1,
                                    sizeof(struct key_specs));
-                               memset(&(keys[keys_num - 1]), 0,
+                               memset(&(keys[keys_num]), 0,
                                    sizeof(struct key_specs));
+#ifndef GNUSORT_COMPATIBILITY
+                               keys[keys_num].pos1b = default_sort_mods->bflag;
+                               keys[keys_num].pos2b = default_sort_mods->bflag;
+#endif
 
-                               if (parse_k(optarg, &(keys[keys_num - 1])) < 0)
+                               if (parse_k(optarg, &(keys[keys_num++])) < 0)
                                        errc(2, EINVAL, "-k %s", optarg);
 
                                break;
@@ -1038,10 +1045,16 @@ main(int argc, char *argv[])
                        }
                }
        }
-
        argc -= optind;
        argv += optind;
 
+#ifndef GNUSORT_COMPATIBILITY
+       if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) {
+               outfile = argv[argc - 1];
+               argc -= 2;
+       }
+#endif
+
        if (sort_opts_vals.cflag && argc > 1)
                errx(2, "only one input file is allowed with the -%c flag",
                    sort_opts_vals.csilentflag ? 'C' : 'c');
@@ -1054,10 +1067,11 @@ main(int argc, char *argv[])
                keys = sort_reallocarray(keys, 1, sizeof(struct key_specs));
                memset(&(keys[0]), 0, sizeof(struct key_specs));
                keys[0].c1 = 1;
-               keys[0].pos1b = default_sort_mods->bflag;
-               keys[0].pos2b = default_sort_mods->bflag;
-               memcpy(&(keys[0].sm), default_sort_mods,
-                   sizeof(struct sort_mods));
+#ifdef GNUSORT_COMPATIBILITY
+               keys[0].pos1b = sm->bflag;
+               keys[0].pos2b = sm->bflag;
+#endif
+               memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods));
        }
 
        for (i = 0; i < keys_num; i++) {
@@ -1067,8 +1081,10 @@ main(int argc, char *argv[])
 
                if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
                    !(ks->pos2b)) {
+#ifdef GNUSORT_COMPATIBILITY
                        ks->pos1b = sm->bflag;
                        ks->pos2b = sm->bflag;
+#endif
                        memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
                }
 

Reply via email to