The new sort was written to behave like GNU sort even when that
means violating POSIX. Since our old sort never had the GNU behavior
we should restore the more standard behavior.
There are a couple of places where GNU and other sorts disagree.
1) Handling of the -b flag
POSIX says -b only has an effect when -k is specified
GNU ignores leading blanks with -b even if no -k
POSIX says -b is only global when it preceeds -k
GNU uses -b in all -k flags regardless of order
2) Handling of the -g flag
Other sorts treat non-floating point keys as 0, similar to -n.
GNU does not treat non-floating point keys as 0
This means that "sort -nu" and "sort -gu" work differently, which
seems like a bug.
With following diff sort passes all regress tests except for one
where the standard was (is?) unclear.
I've used #ifdef GNUSORT_COMPATIBILITY for now so that we can test
both behaviors. After further testing this will probably go away.
- todd
Index: usr.bin/sort/coll.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/coll.c,v
retrieving revision 1.8
diff -u -p -u -r1.8 coll.c
--- usr.bin/sort/coll.c 2 Apr 2015 22:14:51 -0000 1.8
+++ usr.bin/sort/coll.c 3 Apr 2015 15:41:05 -0000
@@ -406,11 +406,13 @@ preproc(struct bwstring *s, struct keys_
struct bwstring *ret = NULL;
struct sort_mods *sm = default_sort_mods;
+#ifdef GNUSORT_COMPATIBILITY
if (sm->bflag) {
if (ret == NULL)
ret = bwsdup(s);
ret = ignore_leading_blanks(ret);
}
+#endif
if (sm->dflag) {
if (ret == NULL)
ret = bwsdup(s);
@@ -1099,9 +1101,10 @@ gnumcoll(struct key_value *kv1, struct k
d1 = bwstod(kv1->k, &empty1);
err1 = errno;
- if (empty1)
+ if (empty1) {
kv1->hint->v.gh.notnum = true;
- else if (err1 == 0) {
+ kv1->hint->status = HS_INITIALIZED;
+ } else if (err1 == 0) {
kv1->hint->v.gh.d = d1;
kv1->hint->v.gh.nan = is_nan(d1);
kv1->hint->status = HS_INITIALIZED;
@@ -1116,9 +1119,10 @@ gnumcoll(struct key_value *kv1, struct k
d2 = bwstod(kv2->k, &empty2);
err2 = errno;
- if (empty2)
+ if (empty2) {
kv2->hint->v.gh.notnum = true;
- else if (err2 == 0) {
+ kv2->hint->status = HS_INITIALIZED;
+ } else if (err2 == 0) {
kv2->hint->v.gh.d = d2;
kv2->hint->v.gh.nan = is_nan(d2);
kv2->hint->status = HS_INITIALIZED;
@@ -1130,10 +1134,15 @@ gnumcoll(struct key_value *kv1, struct k
if (kv1->hint->status == HS_INITIALIZED &&
kv2->hint->status == HS_INITIALIZED) {
+#ifdef GNUSORT_COMPATIBILITY
if (kv1->hint->v.gh.notnum)
return kv2->hint->v.gh.notnum ? 0 : -1;
else if (kv2->hint->v.gh.notnum)
return 1;
+#else
+ if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
+ return 0;
+#endif
if (kv1->hint->v.gh.nan)
return kv2->hint->v.gh.nan ?
@@ -1164,11 +1173,16 @@ gnumcoll(struct key_value *kv1, struct k
err2 = errno;
}
- /* Non-value case: */
+ /* Non-value case */
+#ifdef GNUSORT_COMPATIBILITY
if (empty1)
return empty2 ? 0 : -1;
else if (empty2)
return 1;
+#else
+ if (empty1 && empty2)
+ return 0;
+#endif
/* NAN case */
if (is_nan(d1))
Index: usr.bin/sort/sort.1
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.1,v
retrieving revision 1.51
diff -u -p -u -r1.51 sort.1
--- usr.bin/sort/sort.1 1 Apr 2015 19:56:01 -0000 1.51
+++ usr.bin/sort/sort.1 3 Apr 2015 17:08:58 -0000
@@ -137,8 +137,6 @@ appear after
.Fl k
or results may be unexpected.
.Bl -tag -width indent
-.It Fl b, Fl Fl ignore-leading-blanks
-Ignore leading blank characters when comparing lines.
.It Fl d , Fl Fl dictionary-order
Consider only blank spaces and alphanumeric characters in comparisons.
.It Fl f , Fl Fl ignore-case
@@ -235,6 +233,11 @@ Otherwise,
can be attached independently to each
.Ar field
argument of the key specifications.
+Note that
+.Fl b
+should not appear after
+.Fl k ,
+and that it has no effect unless key fields are specified.
.It Xo
.Fl k Ar field1 Ns Op , Ns Ar field2 ,
.Fl Fl key Ns = Ns Ar field1 Ns Op , Ns Ar field2
@@ -582,6 +585,26 @@ Some are provided for compatibility with
.Nm ,
others are specific to this implementation.
.Pp
+Some implementations of
+.Nm
+honor the
+.Fl b
+option even when no key fields are specified.
+This implementation follows historic practice and
+.St -p1003.1-2008
+in only honoring the
+.Fl b
+when
+.Fl k
+is also specified.
+.Pp
+The historic practice of allowing the
+.Fl o
+option to appear after the
+.Ar file
+is supported for compatibility with older versions of
+.Nm .
+.Pp
The historic key notations
.Cm \(pl Ns Ar pos1
and
@@ -609,7 +632,7 @@ The fastest sort is with the C locale, o
.Fl s .
In general, the C locale is the fastest, followed by single-byte
locales with multi-byte locales being the slowest.
-The correct collation order respected in all cases.
+Correct collation order is respected in all cases.
For the key specification, the simpler to process the
lines the faster the search will be.
.Pp
Index: usr.bin/sort/sort.c
===================================================================
RCS file: /cvs/src/usr.bin/sort/sort.c,v
retrieving revision 1.77
diff -u -p -u -r1.77 sort.c
--- usr.bin/sort/sort.c 3 Apr 2015 12:52:48 -0000 1.77
+++ usr.bin/sort/sort.c 3 Apr 2015 17:00:18 -0000
@@ -53,7 +53,12 @@
#include "file.h"
#include "sort.h"
-#define OPTIONS "bCcdfgHhik:Mmno:RrS:st:T:uVz"
+#ifdef GNUSORT_COMPATIBILITY
+# define PERMUTE ""
+#else
+# define PERMUTE "+"
+#endif
+#define OPTIONS PERMUTE"bCcdfgHhik:Mmno:RrS:st:T:uVz"
static bool need_random;
static const char *random_source;
@@ -859,6 +864,7 @@ main(int argc, char *argv[])
char *outfile, *real_outfile, *sflag;
int c;
size_t i;
+ struct sort_mods *sm = &default_sort_mods_object;
bool mef_flags[NUMBER_OF_MUTUALLY_EXCLUSIVE_FLAGS] =
{ false, false, false, false, false, false };
@@ -866,8 +872,6 @@ main(int argc, char *argv[])
real_outfile = NULL;
sflag = NULL;
- struct sort_mods *sm = &default_sort_mods_object;
-
init_tmp_files();
set_signal_handler();
@@ -909,13 +913,16 @@ main(int argc, char *argv[])
sort_opts_vals.complex_sort = true;
sort_opts_vals.kflag = true;
- keys_num++;
- keys = sort_reallocarray(keys, keys_num,
+ keys = sort_reallocarray(keys, keys_num + 1,
sizeof(struct key_specs));
- memset(&(keys[keys_num - 1]), 0,
+ memset(&(keys[keys_num]), 0,
sizeof(struct key_specs));
+#ifndef GNUSORT_COMPATIBILITY
+ keys[keys_num].pos1b = default_sort_mods->bflag;
+ keys[keys_num].pos2b = default_sort_mods->bflag;
+#endif
- if (parse_k(optarg, &(keys[keys_num - 1])) < 0)
+ if (parse_k(optarg, &(keys[keys_num++])) < 0)
errc(2, EINVAL, "-k %s", optarg);
break;
@@ -1038,10 +1045,16 @@ main(int argc, char *argv[])
}
}
}
-
argc -= optind;
argv += optind;
+#ifndef GNUSORT_COMPATIBILITY
+ if (argc > 2 && strcmp(argv[argc - 2], "-o") == 0) {
+ outfile = argv[argc - 1];
+ argc -= 2;
+ }
+#endif
+
if (sort_opts_vals.cflag && argc > 1)
errx(2, "only one input file is allowed with the -%c flag",
sort_opts_vals.csilentflag ? 'C' : 'c');
@@ -1054,10 +1067,11 @@ main(int argc, char *argv[])
keys = sort_reallocarray(keys, 1, sizeof(struct key_specs));
memset(&(keys[0]), 0, sizeof(struct key_specs));
keys[0].c1 = 1;
- keys[0].pos1b = default_sort_mods->bflag;
- keys[0].pos2b = default_sort_mods->bflag;
- memcpy(&(keys[0].sm), default_sort_mods,
- sizeof(struct sort_mods));
+#ifdef GNUSORT_COMPATIBILITY
+ keys[0].pos1b = sm->bflag;
+ keys[0].pos2b = sm->bflag;
+#endif
+ memcpy(&(keys[0].sm), sm, sizeof(struct sort_mods));
}
for (i = 0; i < keys_num; i++) {
@@ -1067,8 +1081,10 @@ main(int argc, char *argv[])
if (sort_modifier_empty(&(ks->sm)) && !(ks->pos1b) &&
!(ks->pos2b)) {
+#ifdef GNUSORT_COMPATIBILITY
ks->pos1b = sm->bflag;
ks->pos2b = sm->bflag;
+#endif
memcpy(&(ks->sm), sm, sizeof(struct sort_mods));
}