Module Name: src
Committed By: kre
Date: Wed Jun 1 02:37:55 UTC 2016
Modified Files:
src/usr.bin/sort: msort.c sort.1 sort.c sort.h
Log Message:
Add the posix -C option (-c but quieter). Fix -R to work properly when
setting \n as the record delimited using a numeric value rather than literal
\n - and to not incorrectly turn \n into a field separator if -R is used to
make some other char the record separator (\n becomes a field separator in
that case as long as the field separator remains "white space" but should not
be in any other case - unless set explicitly of course.)
Plus more cosmetic changes - the man page and usage are updated to make it
more clear that the 2 (or 1) params to -k are not fields (field1 and field2)
but specifiers of the beginning and end of one key field. There was an
unused 'x' option in the GETOPTS string. The usage message is reformatted
to display properly on both 80 col and > 80 col displays (on < 80 it will
still probably look pretty ugly ... perhaps not quite so bad though), and
is also updated to show the different usage for the -c case (and -C) from the
others (only 1 file permitted) - the man page synopsis has a similar update.
Using more than one of -c -C or -m generates a usage message rather than
just ignoring the -m as it did before (there was no -C before of course).
Aside from the bug fix to the interaction between -R and -t, there are no
changes that affect the way anything is sorted (or read, or written).
Discussed on tech-userlevel earlier this week.
To generate a diff of this commit:
cvs rdiff -u -r1.30 -r1.31 src/usr.bin/sort/msort.c
cvs rdiff -u -r1.34 -r1.35 src/usr.bin/sort/sort.1
cvs rdiff -u -r1.61 -r1.62 src/usr.bin/sort/sort.c
cvs rdiff -u -r1.35 -r1.36 src/usr.bin/sort/sort.h
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
Modified files:
Index: src/usr.bin/sort/msort.c
diff -u src/usr.bin/sort/msort.c:1.30 src/usr.bin/sort/msort.c:1.31
--- src/usr.bin/sort/msort.c:1.30 Fri Feb 5 21:58:42 2010
+++ src/usr.bin/sort/msort.c Wed Jun 1 02:37:55 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $ */
+/* $NetBSD: msort.c,v 1.31 2016/06/01 02:37:55 kre Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
#include "sort.h"
#include "fsort.h"
-__RCSID("$NetBSD: msort.c,v 1.30 2010/02/05 21:58:42 enami Exp $");
+__RCSID("$NetBSD: msort.c,v 1.31 2016/06/01 02:37:55 kre Exp $");
#include <stdlib.h>
#include <string.h>
@@ -365,7 +365,7 @@ insert(struct mfile **flist, struct mfil
* check order on one file
*/
void
-order(struct filelist *filelist, struct field *ftbl)
+order(struct filelist *filelist, struct field *ftbl, int quiet)
{
get_func_t get = SINGL_FLD ? makeline : makekey;
RECHEADER *crec, *prec, *trec;
@@ -387,10 +387,14 @@ order(struct filelist *filelist, struct
exit(0);
while (get(fp, crec, crec_end, ftbl) == 0) {
if (0 < (c = cmp(prec, crec))) {
+ if (quiet)
+ exit(1);
crec->data[crec->length-1] = 0;
errx(1, "found disorder: %s", crec->data+crec->offset);
}
if (UNIQUE && !c) {
+ if (quiet)
+ exit(1);
crec->data[crec->length-1] = 0;
errx(1, "found non-uniqueness: %s",
crec->data+crec->offset);
Index: src/usr.bin/sort/sort.1
diff -u src/usr.bin/sort/sort.1:1.34 src/usr.bin/sort/sort.1:1.35
--- src/usr.bin/sort/sort.1:1.34 Wed May 29 15:00:35 2013
+++ src/usr.bin/sort/sort.1 Wed Jun 1 02:37:55 2016
@@ -1,4 +1,4 @@
-.\" $NetBSD: sort.1,v 1.34 2013/05/29 15:00:35 wiz Exp $
+.\" $NetBSD: sort.1,v 1.35 2016/06/01 02:37:55 kre Exp $
.\"
.\" Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
.\" All rights reserved.
@@ -67,16 +67,26 @@
.Nd sort or merge text files
.Sh SYNOPSIS
.Nm
-.Op Fl bcdfHilmnrSsu
+.Op Fl bdfHilmnrSsu
.Oo
.Fl k
-.Ar field1 Ns Op Li \&, Ns Ar field2
+.Ar kstart Ns Op Li \&, Ns Ar kend
.Oc
.Op Fl o Ar output
.Op Fl R Ar char
.Op Fl T Ar dir
.Op Fl t Ar char
.Op Ar
+.Nm
+.Fl c|C
+.Op Fl bdfilnru
+.Oo
+.Fl k
+.Ar kstart Ns Op Li \&, Ns Ar kend
+.Op Fl t Ar char
+.Oc
+.Op Fl R Ar char
+.Op Ar file
.Sh DESCRIPTION
The
.Nm
@@ -101,6 +111,10 @@ returns 0.
produces no output.
See also
.Fl u .
+.It Fl C
+Identical to
+.Fl c
+without the error messages in the case of unsorted input.
.It Fl H
Ignored for compatibility with earlier versions of
.Nm .
@@ -137,9 +151,13 @@ If used with the
option, check that there are no lines with duplicate keys.
.El
.Pp
-The following options override the default ordering rules.
-When ordering options appear independent of key field
-specifications, the requested field ordering rules are
+The following options,
+which should be given before any
+.Fl k
+options, override the default ordering rules.
+When ordering options appear independent of,
+and before, key field specifications,
+the requested field ordering rules are
applied globally to all sort keys.
When attached to a specific key (see
.Fl k ) ,
@@ -224,12 +242,21 @@ is used as the record separator characte
This should be used with discretion;
.Fl R Aq Ar alphanumeric
usually produces undesirable results.
+If char is not a single character, then it
+specifies the value of the desired record
+separator as an integer specified in any
+of the normal NNN, 0ooo, or 0xXXX ways,
+or as an octal value preceded by \e.
+Caution: do not attempt to specify Ctl-A
+as
+.Dq -R 1
+which will not do what was intended at all!
The default record separator is newline.
-.It Fl k Ar field1 Ns Op Li \&, Ns Ar field2
+.It Fl k Ar kstart Ns Op Li \&, Ns Ar kend
Designates the starting position,
-.Ar field1 ,
+.Ar kstart ,
and optional ending position,
-.Ar field2 ,
+.Ar kend ,
of a key field.
The
.Fl k
@@ -265,16 +292,16 @@ first field.
Fields are specified
by the
.Fl k
-.Ar field1 Ns Op \&, Ns Ar field2
+.Ar kstart Ns Op \&, Ns Ar kend
argument.
A missing
-.Ar field2
+.Ar kend
argument defaults to the end of a line.
.Pp
The arguments
-.Ar field1
+.Ar kstart
and
-.Ar field2
+.Ar kend
have the form
.Ar m Ns Li \&. Ns Ar n
and can be followed by one or more of the letters
@@ -284,7 +311,7 @@ and
.Cm r ,
which correspond to the options discussed above.
A
-.Ar field1
+.Ar kstart
position specified by
.Ar m Ns Li \&. Ns Ar n
.Pq Ar m , n No \*[Gt] 0
@@ -296,7 +323,7 @@ field.
A missing
.Li \&. Ns Ar n
in
-.Ar field1
+.Ar kstart
means
.Ql \&.1 ,
indicating the first character of the
@@ -314,7 +341,7 @@ refers to the first non-blank character
field.
.Pp
A
-.Ar field2
+.Ar kend
position specified by
.Ar m Ns Li \&. Ns Ar n
is interpreted as
@@ -364,6 +391,38 @@ option is still supported, except for
which has no
.Fl k
equivalent.
+.Pp
+.Nm
+compares records by comparing the key fields selected by
+.Fl k
+arguments,
+from first given to last,
+until discovering a difference.
+If there are no
+.Fl k
+arguments, the whole record is treated as a single key.
+After exhausting the
+.Fl k
+arguments, if no difference has been found,
+then the result depends upon the
+.Fl u
+and
+.Fl S
+option settings.
+With
+.Fl u
+the records are considered identical, and one is supressed.
+Otherwise with
+.Fl s
+set (default) the records are left in their original order,
+or with
+.Fl S
+(posix mode) the whole record is considered as a tie breaker.
+.\"
+.\" If you fail to understand why it doesn't matter which order
+.\" the records are output when they are wholly identical, there
+.\" is nothing that this man page can say that wll help!
+.\"
.Sh ENVIRONMENT
If the following environment variable exists, it is used by
.Nm .
@@ -398,6 +457,8 @@ Normal behavior.
.It 1
On disorder (or non-uniqueness) with the
.Fl c
+(or
+.Fl C )
option
.It 2
An error occurred.
@@ -451,7 +512,7 @@ and merge sorts and did not.)
Thus performance depends highly on efficient choice of sort keys, and the
.Fl b
option and the
-.Ar field2
+.Ar kend
argument of the
.Fl k
option should be used whenever possible.
Index: src/usr.bin/sort/sort.c
diff -u src/usr.bin/sort/sort.c:1.61 src/usr.bin/sort/sort.c:1.62
--- src/usr.bin/sort/sort.c:1.61 Fri Sep 16 15:39:29 2011
+++ src/usr.bin/sort/sort.c Wed Jun 1 02:37:55 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $ */
+/* $NetBSD: sort.c,v 1.62 2016/06/01 02:37:55 kre Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@ __COPYRIGHT("@(#) Copyright (c) 1993\
The Regents of the University of California. All rights reserved.");
#endif /* not lint */
-__RCSID("$NetBSD: sort.c,v 1.61 2011/09/16 15:39:29 joerg Exp $");
+__RCSID("$NetBSD: sort.c,v 1.62 2016/06/01 02:37:55 kre Exp $");
#include <sys/types.h>
#include <sys/time.h>
@@ -117,7 +117,7 @@ int
main(int argc, char *argv[])
{
int ch, i, stdinflag = 0;
- char cflag = 0, mflag = 0;
+ char mode = 0;
char *outfile, *outpath = 0;
struct field *fldtab;
size_t fldtab_sz, fld_cnt;
@@ -145,9 +145,9 @@ main(int argc, char *argv[])
fldtab = emalloc(fldtab_sz * sizeof(*fldtab));
memset(fldtab, 0, fldtab_sz * sizeof(*fldtab));
-#define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux"
+#define SORT_OPTS "bcCdD:fHik:lmno:rR:sSt:T:u"
- /* Convert "+field" args to -f format */
+ /* Convert "+field" args to -k format */
fixit(&argc, argv, SORT_OPTS);
if (!(tmpdir = getenv("TMPDIR")))
@@ -158,8 +158,10 @@ main(int argc, char *argv[])
case 'b':
fldtab[0].flags |= BI | BT;
break;
- case 'c':
- cflag = 1;
+ case 'c': case 'C': case 'm':
+ if (mode)
+ usage("Incompatible operation modes");
+ mode = ch;
break;
case 'D': /* Debug flags */
for (i = 0; optarg[i]; i++)
@@ -179,15 +181,33 @@ main(int argc, char *argv[])
setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags);
break;
- case 'm':
- mflag = 1;
- break;
case 'o':
outpath = optarg;
break;
case 'r':
REVERSE = 1;
break;
+ case 'R':
+ if (REC_D != '\n')
+ usage("multiple record delimiters");
+ REC_D = *optarg;
+ if (optarg[1] != '\0') {
+ char *ep;
+ int t = 0;
+
+ if (optarg[0] == '\\')
+ optarg++, t = 8;
+ REC_D = (int)strtol(optarg, &ep, t);
+ if (*ep != '\0' || REC_D < 0 ||
+ REC_D >= (int)__arraycount(d_mask))
+ errx(2, "invalid record delimiter %s",
+ optarg);
+ }
+ if (REC_D == '\n')
+ break;
+ d_mask['\n'] = d_mask[' '];
+ d_mask[REC_D] = REC_D_F;
+ break;
case 's':
/*
* Nominally 'stable sort', keep lines with equal keys
@@ -213,30 +233,11 @@ main(int argc, char *argv[])
SEP_FLAG = 1;
d_mask[' '] &= ~FLD_D;
d_mask['\t'] &= ~FLD_D;
+ d_mask['\n'] &= ~FLD_D;
d_mask[(u_char)*optarg] |= FLD_D;
if (d_mask[(u_char)*optarg] & REC_D_F)
errx(2, "record/field delimiter clash");
break;
- case 'R':
- if (REC_D != '\n')
- usage("multiple record delimiters");
- REC_D = *optarg;
- if (REC_D == '\n')
- break;
- if (optarg[1] != '\0') {
- char *ep;
- int t = 0;
- if (optarg[0] == '\\')
- optarg++, t = 8;
- REC_D = (int)strtol(optarg, &ep, t);
- if (*ep != '\0' || REC_D < 0 ||
- REC_D >= (int)__arraycount(d_mask))
- errx(2, "invalid record delimiter %s",
- optarg);
- }
- d_mask['\n'] = d_mask[' '];
- d_mask[REC_D] = REC_D_F;
- break;
case 'T':
/* -T tmpdir */
tmpdir = optarg;
@@ -254,13 +255,13 @@ main(int argc, char *argv[])
/* Don't sort on raw record if keys match */
posix_sort = 0;
- if (cflag && argc > optind+1)
+ if ((mode == 'c' || mode == 'C') && argc > optind+1)
errx(2, "too many input files for -c option");
if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) {
outpath = argv[argc-1];
argc -= 2;
}
- if (mflag && argc - optind > (MAXFCT - (16+1))*16)
+ if (mode == 'm' && argc - optind > (MAXFCT - (16+1))*16)
errx(2, "too many input files for -m option");
for (i = optind; i < argc; i++) {
@@ -309,8 +310,8 @@ main(int argc, char *argv[])
num_input_files = argc - optind;
}
- if (cflag) {
- order(&filelist, fldtab);
+ if (mode == 'c' || mode == 'C') {
+ order(&filelist, fldtab, mode == 'C');
/* NOT REACHED */
}
@@ -348,7 +349,7 @@ main(int argc, char *argv[])
err(2, "output file %s", outfile);
}
- if (mflag)
+ if (mode == 'm')
fmerge(&filelist, num_input_files, outfp, fldtab);
else
fsort(&filelist, num_input_files, outfp, fldtab);
@@ -393,13 +394,20 @@ cleanup(void)
static void
usage(const char *msg)
{
+ const char *pn = getprogname();
+
if (msg != NULL)
(void)fprintf(stderr, "%s: %s\n", getprogname(), msg);
(void)fprintf(stderr,
- "usage: %s [-bcdfHilmnrSsu] [-k field1[,field2]] [-o output]"
- " [-R char] [-T dir]", getprogname());
+ "usage: %s [-bdfHilmnrSsu] [-k kstart[,kend]] [-o output]"
+ " [-R char] [-T dir]\n", pn);
(void)fprintf(stderr,
" [-t char] [file ...]\n");
+ (void)fprintf(stderr,
+ " or: %s -[cC] [-bdfilnru] [-k kstart[,kend]] [-o output]"
+ " [-R char]\n", pn);
+ (void)fprintf(stderr,
+ " [-t char] [file]\n");
exit(2);
}
Index: src/usr.bin/sort/sort.h
diff -u src/usr.bin/sort/sort.h:1.35 src/usr.bin/sort/sort.h:1.36
--- src/usr.bin/sort/sort.h:1.35 Wed Aug 5 07:10:03 2015
+++ src/usr.bin/sort/sort.h Wed Jun 1 02:37:55 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: sort.h,v 1.35 2015/08/05 07:10:03 mrg Exp $ */
+/* $NetBSD: sort.h,v 1.36 2016/06/01 02:37:55 kre Exp $ */
/*-
* Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -191,7 +191,7 @@ int makekey(FILE *, RECHEADER *, u_char
int makeline(FILE *, RECHEADER *, u_char *, struct field *);
void makeline_copydown(RECHEADER *);
int optval(int, int);
-__dead void order(struct filelist *, struct field *);
+__dead void order(struct filelist *, struct field *, int);
void putline(const RECHEADER *, FILE *);
void putrec(const RECHEADER *, FILE *);
void putkeydump(const RECHEADER *, FILE *);