Module Name: src Committed By: dsl Date: Sat Sep 5 09:16:18 UTC 2009
Modified Files: src/usr.bin/sort: Makefile init.c sort.c sort.h Added Files: src/usr.bin/sort: radix_sort.c Log Message: Include a local copy of the sradixsort() code from libc. Currently unchanged apart from the deletion of the 'unstable' version and other unneeded code. Use fldtab[0]. not fldtab-> when we are referring to the global info in the 0th entry to emphasise that this entry is different. fldtab[0].weights is only needed in the SINGL_FLD case - so set it there. Re-indent a big 'if' is setfield() so that the line breaks match the logic - which looks dubious now! To generate a diff of this commit: cvs rdiff -u -r1.6 -r1.7 src/usr.bin/sort/Makefile cvs rdiff -u -r1.21 -r1.22 src/usr.bin/sort/init.c cvs rdiff -u -r0 -r1.1 src/usr.bin/sort/radix_sort.c cvs rdiff -u -r1.53 -r1.54 src/usr.bin/sort/sort.c cvs rdiff -u -r1.25 -r1.26 src/usr.bin/sort/sort.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/sort/Makefile diff -u src/usr.bin/sort/Makefile:1.6 src/usr.bin/sort/Makefile:1.7 --- src/usr.bin/sort/Makefile:1.6 Tue Apr 14 22:15:26 2009 +++ src/usr.bin/sort/Makefile Sat Sep 5 09:16:18 2009 @@ -1,7 +1,8 @@ -# $NetBSD: Makefile,v 1.6 2009/04/14 22:15:26 lukem Exp $ +# $NetBSD: Makefile,v 1.7 2009/09/05 09:16:18 dsl Exp $ # from: @(#)Makefile 8.1 (Berkeley) 6/6/93 PROG= sort SRCS= append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c +SRCS+= radix_sort.c .include <bsd.prog.mk> Index: src/usr.bin/sort/init.c diff -u src/usr.bin/sort/init.c:1.21 src/usr.bin/sort/init.c:1.22 --- src/usr.bin/sort/init.c:1.21 Sat Aug 22 21:50:32 2009 +++ src/usr.bin/sort/init.c Sat Sep 5 09:16:18 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: init.c,v 1.21 2009/08/22 21:50:32 dsl Exp $ */ +/* $NetBSD: init.c,v 1.22 2009/09/05 09:16:18 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -64,7 +64,7 @@ #include "sort.h" #ifndef lint -__RCSID("$NetBSD: init.c,v 1.21 2009/08/22 21:50:32 dsl Exp $"); +__RCSID("$NetBSD: init.c,v 1.22 2009/09/05 09:16:18 dsl Exp $"); __SCCSID("@(#)init.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ @@ -210,10 +210,12 @@ if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */ cur_fld->flags &= ~BT; - if (cur_fld->tcol.num && !(!(cur_fld->flags & BI) - && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num - && cur_fld->tcol.indent != 0 /* == 0 -> end of field, i.e. okay */ - && cur_fld->tcol.indent < cur_fld->icol.indent)) + if (cur_fld->tcol.num + && !(!(cur_fld->flags & BI) && cur_fld->flags & BT) + && (cur_fld->tcol.num <= cur_fld->icol.num + /* indent if 0 -> end of field, i.e. okay */ + && cur_fld->tcol.indent != 0 + && cur_fld->tcol.indent < cur_fld->icol.indent)) errx(2, "fields out of order"); insertcol(cur_fld); return (cur_fld->tcol.num); @@ -333,7 +335,7 @@ * and -d (only sort blank and alphanumerics). */ void -settables(int gflags) +settables(void) { int i; int next_weight = SINGL_FLD ? 1 : 2; Index: src/usr.bin/sort/sort.c diff -u src/usr.bin/sort/sort.c:1.53 src/usr.bin/sort/sort.c:1.54 --- src/usr.bin/sort/sort.c:1.53 Sat Aug 22 21:43:53 2009 +++ src/usr.bin/sort/sort.c Sat Sep 5 09:16:18 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: sort.c,v 1.53 2009/08/22 21:43:53 dsl Exp $ */ +/* $NetBSD: sort.c,v 1.54 2009/09/05 09:16:18 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -76,7 +76,7 @@ #endif /* not lint */ #ifndef lint -__RCSID("$NetBSD: sort.c,v 1.53 2009/08/22 21:43:53 dsl Exp $"); +__RCSID("$NetBSD: sort.c,v 1.54 2009/09/05 09:16:18 dsl Exp $"); __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ @@ -103,11 +103,6 @@ u_char unweighted[NBINS]; int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; -/* - * Default to stable sort. - */ -int (*radix_sort)(const u_char **, int, const u_char *, u_int) = sradixsort; - unsigned int debug_flags = 0; static char toutpath[MAXPATHLEN]; @@ -124,11 +119,11 @@ main(int argc, char *argv[]) { get_func_t get; - int ch, i, stdinflag = 0, tmp = 0; + int ch, i, stdinflag = 0; char cflag = 0, mflag = 0; char *outfile, *outpath = 0; struct field *fldtab, *p; - size_t fldtab_sz = 3, fidx = 0; + size_t fldtab_sz, fld_cnt; struct filelist filelist; int num_input_files; FILE *outfp = NULL; @@ -147,6 +142,9 @@ d_mask[REC_D = '\n'] = REC_D_F; d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; + /* fldtab[0] is the global options. */ + fldtab_sz = 3; + fld_cnt = 0; fldtab = malloc(fldtab_sz * sizeof(*fldtab)); memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); @@ -159,7 +157,7 @@ while ((ch = getopt(argc, argv, "bcdD:fik:mHno:rR:sSt:T:ux")) != -1) { switch (ch) { case 'b': - fldtab->flags |= BI | BT; + fldtab[0].flags |= BI | BT; break; case 'c': cflag = 1; @@ -169,9 +167,7 @@ debug_flags |= 1 << (optarg[i] & 31); break; case 'd': case 'f': case 'i': case 'n': case 'r': - tmp |= optval(ch, 0); - fldtab->weights = weight_tables[tmp & (R | F)]; - fldtab->flags |= tmp; + fldtab[0].flags |= optval(ch, 0); break; case 'H': /* -H was ; use merge sort for blocks of large files' */ @@ -182,11 +178,10 @@ if (!p) err(1, "realloc"); fldtab = p; - memset(&fldtab[fldtab_sz], 0, - sizeof(fldtab[fldtab_sz])); + memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); fldtab_sz++; - setfield(optarg, &fldtab[++fidx], fldtab->flags); + setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); break; case 'm': mflag = 1; @@ -195,11 +190,21 @@ outpath = optarg; break; case 's': - /* for GNU sort compatibility (this is our default) */ - radix_sort = sradixsort; + /* + * Nominally 'stable sort', keep lines with equal keys + * in input file order. (Default for NetBSD) + * (-s for GNU sort compatibility.) + */ break; case 'S': - radix_sort = radixsort; + /* + * Reverse of -s! + * This needs to enforce a POSIX sort where records + * with equal keys are then sorted by the raw data. + * Currently not implemented! + * (using libc radixsort() v sradixsort() doesn't + * have the desired effect.) + */ break; case 't': if (SEP_FLAG) @@ -272,19 +277,18 @@ err(2, "%s", argv[i]); } - if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) { + if (!(fldtab[0].flags & (I|D|N) || fldtab[1].icol.num)) { SINGL_FLD = 1; fldtab[0].icol.num = 1; + fldtab[0].weights = weight_tables[fldtab->flags & (R | F)]; } else { if (!fldtab[1].icol.num) { fldtab[0].flags &= ~(BI|BT); - setfield("1", &fldtab[++fidx], fldtab->flags); + setfield("1", &fldtab[++fld_cnt], fldtab->flags); } fldreset(fldtab); - // fldtab[0].flags &= ~F; } - settables(fldtab[0].flags); - fldtab->weights = weight_tables[fldtab->flags & (R | F)]; + settables(); if (optind == argc) { static const char * const names[] = { _PATH_STDIN, NULL }; Index: src/usr.bin/sort/sort.h diff -u src/usr.bin/sort/sort.h:1.25 src/usr.bin/sort/sort.h:1.26 --- src/usr.bin/sort/sort.h:1.25 Sat Aug 22 10:53:28 2009 +++ src/usr.bin/sort/sort.h Sat Sep 5 09:16:18 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: sort.h,v 1.25 2009/08/22 10:53:28 dsl Exp $ */ +/* $NetBSD: sort.h,v 1.26 2009/09/05 09:16:18 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -170,7 +170,6 @@ extern int SINGL_FLD, SEP_FLAG, UNIQUE; extern int REC_D; extern const char *tmpdir; -extern int (*radix_sort)(const u_char **, int, const u_char *, u_int); extern u_char unweighted[NBINS]; extern struct coldesc *clist; extern int ncols; @@ -200,5 +199,6 @@ void putrec(const RECHEADER *, FILE *); void putkeydump(const RECHEADER *, FILE *); void rd_append(int, int, int, FILE *, u_char *, u_char *); +int radix_sort(const u_char **, int, const u_char *, u_int); int setfield(const char *, struct field *, int); -void settables(int); +void settables(void); Added files: Index: src/usr.bin/sort/radix_sort.c diff -u /dev/null src/usr.bin/sort/radix_sort.c:1.1 --- /dev/null Sat Sep 5 09:16:18 2009 +++ src/usr.bin/sort/radix_sort.c Sat Sep 5 09:16:18 2009 @@ -0,0 +1,182 @@ +/* $NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $ */ + +/*- + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Peter McIlroy and by Dan Bernstein at New York University, + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 +static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95"; +#else +__RCSID("$NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $"); +#endif +#endif /* LIBC_SCCS and not lint */ + +/* + * 'stable' radix sort initially from libc/stdlib/radixsort.c + */ + +#include <sys/types.h> + +#include <assert.h> +#include <errno.h> +#include "sort.h" + +typedef struct { + const u_char **sa; + int sn, si; +} stack; + +static inline void simplesort(const u_char **, int, int, const u_char *, u_int); +static void r_sort_b(const u_char **, + const u_char **, int, int, const u_char *, u_int); + +#define THRESHOLD 20 /* Divert to simplesort(). */ +#define SIZE 512 /* Default stack size. */ + +int +radix_sort(const u_char **a, int n, const u_char *tab, u_int endch) +{ + const u_char **ta; + + endch = tab[endch]; + if (n < THRESHOLD) + simplesort(a, n, 0, tab, endch); + else { + if ((ta = malloc(n * sizeof(a))) == NULL) + return (-1); + r_sort_b(a, ta, n, 0, tab, endch); + free(ta); + } + return (0); +} + +#define empty(s) (s >= sp) +#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si +#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i +#define swap(a, b, t) t = a, a = b, b = t + +/* Stable sort, requiring additional memory. */ +static void +r_sort_b(const u_char **a, const u_char **ta, int n, int i, const u_char *tr, + u_int endch) +{ + static u_int count[256], nc, bmin; + u_int c; + const u_char **ak, **ai; + stack s[512], *sp, *sp0, *sp1, temp; + const u_char **top[256]; + u_int *cp, bigc; + + _DIAGASSERT(a != NULL); + _DIAGASSERT(ta != NULL); + _DIAGASSERT(tr != NULL); + + sp = s; + push(a, n, i); + while (!empty(s)) { + pop(a, n, i); + if (n < THRESHOLD) { + simplesort(a, n, i, tr, endch); + continue; + } + + if (nc == 0) { + bmin = 255; + for (ak = a + n; --ak >= a;) { + c = tr[(*ak)[i]]; + if (++count[c] == 1 && c != endch) { + if (c < bmin) + bmin = c; + nc++; + } + } + if (sp + nc > s + SIZE) { + r_sort_b(a, ta, n, i, tr, endch); + continue; + } + } + + sp0 = sp1 = sp; + bigc = 2; + if (endch == 0) { + top[0] = ak = a + count[0]; + count[0] = 0; + } else { + ak = a; + top[255] = a + n; + count[255] = 0; + } + for (cp = count + bmin; nc > 0; cp++) { + while (*cp == 0) + cp++; + if ((c = *cp) > 1) { + if (c > bigc) { + bigc = c; + sp1 = sp; + } + push(ak, c, i+1); + } + top[cp-count] = ak += c; + *cp = 0; /* Reset count[]. */ + nc--; + } + swap(*sp0, *sp1, temp); + + for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */ + *--ak = *--ai; + for (ak = ta+n; --ak >= ta;) /* Deal to piles. */ + *--top[tr[(*ak)[i]]] = *ak; + } +} + +/* insertion sort */ +static inline void +simplesort(const u_char **a, int n, int b, const u_char *tr, u_int endch) +{ + u_char ch; + const u_char **ak, **ai, *s, *t; + + _DIAGASSERT(a != NULL); + _DIAGASSERT(tr != NULL); + + for (ak = a+1; --n >= 1; ak++) + for (ai = ak; ai > a; ai--) { + for (s = ai[0] + b, t = ai[-1] + b; + (ch = tr[*s]) != endch; s++, t++) + if (ch != tr[*t]) + break; + if (ch >= tr[*t]) + break; + swap(ai[0], ai[-1], s); + } +}