Module Name: src Committed By: dsl Date: Sat Sep 5 12:00:26 UTC 2009
Modified Files: src/usr.bin/sort: append.c files.c fsort.c fsort.h msort.c radix_sort.c sort.h Log Message: Now we have our own radix_sort() change the interface so that we pass an array of 'RECHEADER *' and remove all the crappy stuff that backed up by REC_DATA_OFFSET (etc). Also change radix_sort() to return the number of elements, soon to be used to drop duplicate keys (for sort -u). To generate a diff of this commit: cvs rdiff -u -r1.20 -r1.21 src/usr.bin/sort/append.c cvs rdiff -u -r1.35 -r1.36 src/usr.bin/sort/files.c cvs rdiff -u -r1.39 -r1.40 src/usr.bin/sort/fsort.c cvs rdiff -u -r1.15 -r1.16 src/usr.bin/sort/fsort.h cvs rdiff -u -r1.24 -r1.25 src/usr.bin/sort/msort.c cvs rdiff -u -r1.1 -r1.2 src/usr.bin/sort/radix_sort.c cvs rdiff -u -r1.26 -r1.27 src/usr.bin/sort/sort.h Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/sort/append.c diff -u src/usr.bin/sort/append.c:1.20 src/usr.bin/sort/append.c:1.21 --- src/usr.bin/sort/append.c:1.20 Sat Aug 22 10:53:28 2009 +++ src/usr.bin/sort/append.c Sat Sep 5 12:00:25 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $ */ +/* $NetBSD: append.c,v 1.21 2009/09/05 12:00:25 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -64,7 +64,7 @@ #include "sort.h" #ifndef lint -__RCSID("$NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $"); +__RCSID("$NetBSD: append.c,v 1.21 2009/09/05 12:00:25 dsl Exp $"); __SCCSID("@(#)append.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ @@ -88,16 +88,16 @@ * copy sorted lines to output; check for uniqueness */ void -append(const u_char **keylist, int nelem, FILE *fp, put_func_t put, u_char *wts) +append(const RECHEADER **keylist, int nelem, FILE *fp, put_func_t put, u_char *wts) { - const u_char **cpos, **lastkey; - const struct recheader *crec, *prec; + const RECHEADER **cpos, **lastkey; + const RECHEADER *crec, *prec; size_t plen; lastkey = keylist + nelem; if (!UNIQUE || wts == NULL) { for (cpos = keylist; cpos < lastkey; cpos++) - put((const RECHEADER *)(*cpos - REC_DATA_OFFSET), fp); + put(*cpos, fp); return; } @@ -105,13 +105,13 @@ return; cpos = keylist; - prec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET); + prec = *cpos; if (!SINGL_FLD) { /* Key for each line is already in adjacent bytes */ plen = prec->offset; for (cpos = &keylist[1]; cpos < lastkey; cpos++) { - crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET); + crec = *cpos; if (crec->offset == plen && memcmp(crec->data, prec->data, plen) == 0) { /* Duplicate key */ @@ -130,7 +130,7 @@ /* Key for each line is already in adjacent bytes */ plen = prec->length; for (cpos = &keylist[1]; cpos < lastkey; cpos++) { - crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET); + crec = *cpos; if (crec->length == plen && wt_cmp(crec->data, prec->data, plen, wts) == 0) { /* Duplicate key */ Index: src/usr.bin/sort/files.c diff -u src/usr.bin/sort/files.c:1.35 src/usr.bin/sort/files.c:1.36 --- src/usr.bin/sort/files.c:1.35 Sat Aug 22 10:53:28 2009 +++ src/usr.bin/sort/files.c Sat Sep 5 12:00:25 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $ */ +/* $NetBSD: files.c,v 1.36 2009/09/05 12:00:25 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -65,7 +65,7 @@ #include "fsort.h" #ifndef lint -__RCSID("$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $"); +__RCSID("$NetBSD: files.c,v 1.36 2009/09/05 12:00:25 dsl Exp $"); __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ @@ -271,7 +271,7 @@ void putrec(const RECHEADER *rec, FILE *fp) { - EWRITE(rec, 1, rec->length + REC_DATA_OFFSET, fp); + EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->length, fp); } /* @@ -289,7 +289,7 @@ void putkeydump(const RECHEADER *rec, FILE *fp) { - EWRITE(rec, 1, rec->offset + REC_DATA_OFFSET, fp); + EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->offset, fp); } /* @@ -303,15 +303,15 @@ FILE *fp; fp = fstack[flno].fp; - if ((u_char *) rec > end - REC_DATA_OFFSET) + if ((u_char *)(rec + 1) > end) return (BUFFEND); - if (!fread(rec, 1, REC_DATA_OFFSET, fp)) { + if (!fread(rec, 1, offsetof(RECHEADER, data), fp)) { fclose(fp); fstack[flno].fp = 0; return (EOF); } if (end - rec->data < (ptrdiff_t)rec->length) { - for (i = REC_DATA_OFFSET - 1; i >= 0; i--) + for (i = offsetof(RECHEADER, data) - 1; i >= 0; i--) ungetc(*((char *) rec + i), fp); return (BUFFEND); } Index: src/usr.bin/sort/fsort.c diff -u src/usr.bin/sort/fsort.c:1.39 src/usr.bin/sort/fsort.c:1.40 --- src/usr.bin/sort/fsort.c:1.39 Sat Aug 22 10:53:28 2009 +++ src/usr.bin/sort/fsort.c Sat Sep 5 12:00:25 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $ */ +/* $NetBSD: fsort.c,v 1.40 2009/09/05 12:00:25 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -72,17 +72,13 @@ #include "fsort.h" #ifndef lint -__RCSID("$NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $"); +__RCSID("$NetBSD: fsort.c,v 1.40 2009/09/05 12:00:25 dsl Exp $"); __SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ #include <stdlib.h> #include <string.h> -static const u_char **keylist = 0; -u_char *buffer = 0; -size_t bufsize = DEFBUFSIZE; - struct tempfile fstack[MAXFCT]; #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1)) @@ -90,21 +86,24 @@ void fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl) { - const u_char **keypos, **keyp; + const RECHEADER **keylist; + const RECHEADER **keypos, **keyp; + RECHEADER *buffer; + size_t bufsize = DEFBUFSIZE; u_char *bufend; int mfct = 0; int c, nelem; get_func_t get; - struct recheader *crec; - u_char *nbuffer; + RECHEADER *crec; + RECHEADER *nbuffer; FILE *fp; - if (!buffer) { - buffer = malloc(bufsize); - keylist = malloc(MAXNUM * sizeof(u_char *)); - memset(keylist, 0, MAXNUM * sizeof(u_char *)); - } - bufend = buffer + bufsize; + buffer = malloc(bufsize); + bufend = (u_char *)buffer + bufsize; + /* Allocate double length keymap for radix_sort */ + keylist = malloc(2 * MAXNUM * sizeof(*keylist)); + if (buffer == NULL || keylist == NULL) + err(2, "failed to malloc initial buffer or keylist"); if (SINGL_FLD) /* Key and data are one! */ @@ -118,7 +117,7 @@ for (;;) { keypos = keylist; nelem = 0; - crec = (RECHEADER *) buffer; + crec = buffer; /* Loop reading records */ for (;;) { @@ -126,13 +125,12 @@ /* 'c' is 0, EOF or BUFFEND */ if (c == 0) { /* Save start of key in input buffer */ - *keypos++ = crec->data; + *keypos++ = crec; if (++nelem == MAXNUM) { c = BUFFEND; break; } - crec = (RECHEADER *)((char *) crec + - SALIGN(crec->length) + REC_DATA_OFFSET); + crec = (RECHEADER *)(crec->data + SALIGN(crec->length)); continue; } if (c == EOF) @@ -154,18 +152,18 @@ for (keyp = &keypos[-1]; keyp >= keylist; keyp--) *keyp = nbuffer + (*keyp - buffer); - crec = (RECHEADER *) (nbuffer + ((u_char *)crec - buffer)); + crec = nbuffer + (crec - buffer); buffer = nbuffer; - bufend = buffer + bufsize; + bufend = (u_char *)buffer + bufsize; } /* Sort this set of records */ if (SINGL_FLD) { - if (radix_sort(keylist, nelem, ftbl[0].weights, REC_D)) - err(2, "single field radix_sort"); + nelem = radix_sort(keylist, keylist + MAXNUM, nelem, + ftbl[0].weights, REC_D); } else { - if (radix_sort(keylist, nelem, unweighted, 0)) - err(2, "unweighted radix_sort"); + nelem = radix_sort(keylist, keylist + MAXNUM, nelem, + unweighted, 0); } if (c == EOF && mfct == 0) { Index: src/usr.bin/sort/fsort.h diff -u src/usr.bin/sort/fsort.h:1.15 src/usr.bin/sort/fsort.h:1.16 --- src/usr.bin/sort/fsort.h:1.15 Thu Aug 20 06:36:25 2009 +++ src/usr.bin/sort/fsort.h Sat Sep 5 12:00:25 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: fsort.h,v 1.15 2009/08/20 06:36:25 dsl Exp $ */ +/* $NetBSD: fsort.h,v 1.16 2009/09/05 12:00:25 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -84,9 +84,6 @@ */ #define MERGE_FNUM 16 -extern u_char *buffer; -extern size_t bufsize; - /* Temporary files contian data (with record headers) in sorted order */ struct tempfile { FILE *fp; Index: src/usr.bin/sort/msort.c diff -u src/usr.bin/sort/msort.c:1.24 src/usr.bin/sort/msort.c:1.25 --- src/usr.bin/sort/msort.c:1.24 Sat Aug 22 15:16:50 2009 +++ src/usr.bin/sort/msort.c Sat Sep 5 12:00:25 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $ */ +/* $NetBSD: msort.c,v 1.25 2009/09/05 12:00:25 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -65,7 +65,7 @@ #include "fsort.h" #ifndef lint -__RCSID("$NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $"); +__RCSID("$NetBSD: msort.c,v 1.25 2009/09/05 12:00:25 dsl Exp $"); __SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ @@ -79,7 +79,7 @@ typedef struct mfile { u_char *end; short flno; - struct recheader rec[1]; + RECHEADER rec[1]; } MFILE; static u_char *wts; @@ -324,15 +324,14 @@ void order(struct filelist *filelist, get_func_t get, struct field *ftbl) { + RECHEADER *crec, *prec, *trec; u_char *crec_end, *prec_end, *trec_end; int c; - RECHEADER *crec, *prec, *trec; - buffer = malloc(2 * (DEFLLEN + REC_DATA_OFFSET)); - crec = (RECHEADER *) buffer; - crec_end = buffer + DEFLLEN + REC_DATA_OFFSET; - prec = (RECHEADER *) (buffer + DEFLLEN + REC_DATA_OFFSET); - prec_end = buffer + 2*(DEFLLEN + REC_DATA_OFFSET); + crec = malloc(offsetof(RECHEADER, data[DEFLLEN])); + crec_end = crec->data + DEFLLEN; + prec = malloc(offsetof(RECHEADER, data[DEFLLEN])); + prec_end = prec->data + DEFLLEN; wts = ftbl->weights; /* XXX this does exit(0) for overlong lines */ Index: src/usr.bin/sort/radix_sort.c diff -u src/usr.bin/sort/radix_sort.c:1.1 src/usr.bin/sort/radix_sort.c:1.2 --- src/usr.bin/sort/radix_sort.c:1.1 Sat Sep 5 09:16:18 2009 +++ src/usr.bin/sort/radix_sort.c Sat Sep 5 12:00:25 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $ */ +/* $NetBSD: radix_sort.c,v 1.2 2009/09/05 12:00:25 dsl Exp $ */ /*- * Copyright (c) 1990, 1993 @@ -37,7 +37,7 @@ #if 0 static char sccsid[] = "@(#)radixsort.c 8.2 (Berkeley) 4/28/95"; #else -__RCSID("$NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $"); +__RCSID("$NetBSD: radix_sort.c,v 1.2 2009/09/05 12:00:25 dsl Exp $"); #endif #endif /* LIBC_SCCS and not lint */ @@ -52,60 +52,49 @@ #include "sort.h" typedef struct { - const u_char **sa; - int sn, si; + const RECHEADER **sa; /* Base of saved area */ + int sn; /* Number of entries */ + int si; /* index into data for compare */ } stack; -static inline void simplesort(const u_char **, int, int, const u_char *, u_int); -static void r_sort_b(const u_char **, - const u_char **, int, int, const u_char *, u_int); +static inline int simplesort(const RECHEADER **, int, int, const u_char *, u_int); +static int r_sort_b(const RECHEADER **, + const RECHEADER **, int, int, const u_char *, u_int); #define THRESHOLD 20 /* Divert to simplesort(). */ #define SIZE 512 /* Default stack size. */ +#define empty(s) (s >= sp) +#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si +#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i +#define swap(a, b, t) t = a, a = b, b = t + int -radix_sort(const u_char **a, int n, const u_char *tab, u_int endch) +radix_sort(const RECHEADER **a, const RECHEADER **ta, int n, const u_char *tab, u_int endch) { - const u_char **ta; - endch = tab[endch]; - if (n < THRESHOLD) - simplesort(a, n, 0, tab, endch); - else { - if ((ta = malloc(n * sizeof(a))) == NULL) - return (-1); - r_sort_b(a, ta, n, 0, tab, endch); - free(ta); + if (n < THRESHOLD && !DEBUG('r')) { + return simplesort(a, n, 0, tab, endch); } - return (0); + return r_sort_b(a, ta, n, 0, tab, endch); } -#define empty(s) (s >= sp) -#define pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si -#define push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i -#define swap(a, b, t) t = a, a = b, b = t - -/* Stable sort, requiring additional memory. */ -static void -r_sort_b(const u_char **a, const u_char **ta, int n, int i, const u_char *tr, - u_int endch) +static int +r_sort_b(const RECHEADER **a, const RECHEADER **ta, int n, int i, const u_char *tr, u_int endch) { static u_int count[256], nc, bmin; u_int c; - const u_char **ak, **ai; + const RECHEADER **ak, **ai; stack s[512], *sp, *sp0, *sp1, temp; - const u_char **top[256]; + const RECHEADER **top[256]; u_int *cp, bigc; - - _DIAGASSERT(a != NULL); - _DIAGASSERT(ta != NULL); - _DIAGASSERT(tr != NULL); + int nrec = n; sp = s; push(a, n, i); while (!empty(s)) { pop(a, n, i); - if (n < THRESHOLD) { + if (n < THRESHOLD && !DEBUG('r')) { simplesort(a, n, i, tr, endch); continue; } @@ -113,7 +102,7 @@ if (nc == 0) { bmin = 255; for (ak = a + n; --ak >= a;) { - c = tr[(*ak)[i]]; + c = tr[(*ak)->data[i]]; if (++count[c] == 1 && c != endch) { if (c < bmin) bmin = c; @@ -155,28 +144,37 @@ for (ak = ta + n, ai = a+n; ak > ta;) /* Copy to temp. */ *--ak = *--ai; for (ak = ta+n; --ak >= ta;) /* Deal to piles. */ - *--top[tr[(*ak)[i]]] = *ak; + *--top[tr[(*ak)->data[i]]] = *ak; } + + return nrec; } /* insertion sort */ -static inline void -simplesort(const u_char **a, int n, int b, const u_char *tr, u_int endch) +static inline int +simplesort(const RECHEADER **a, int n, int b, const u_char *tr, u_int endch) { + const RECHEADER **ak, **ai, *tmp; + const RECHEADER **lim = a + n; + const u_char *s, *t; u_char ch; - const u_char **ak, **ai, *s, *t; - _DIAGASSERT(a != NULL); - _DIAGASSERT(tr != NULL); + if (n <= 1) + return n; - for (ak = a+1; --n >= 1; ak++) + for (ak = a+1; ak < lim; ak++) { for (ai = ak; ai > a; ai--) { - for (s = ai[0] + b, t = ai[-1] + b; + for (s = ai[0]->data + b, t = ai[-1]->data + b; (ch = tr[*s]) != endch; s++, t++) if (ch != tr[*t]) break; - if (ch >= tr[*t]) + if (ch >= tr[*t]) { + break; - swap(ai[0], ai[-1], s); + } + swap(ai[0], ai[-1], tmp); } + } + + return n; } Index: src/usr.bin/sort/sort.h diff -u src/usr.bin/sort/sort.h:1.26 src/usr.bin/sort/sort.h:1.27 --- src/usr.bin/sort/sort.h:1.26 Sat Sep 5 09:16:18 2009 +++ src/usr.bin/sort/sort.h Sat Sep 5 12:00:25 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: sort.h,v 1.26 2009/09/05 09:16:18 dsl Exp $ */ +/* $NetBSD: sort.h,v 1.27 2009/09/05 12:00:25 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -109,10 +109,6 @@ /* A record is a key/line pair starting at rec.data. It has a total length * and an offset to the start of the line half of the pair. - * In order to use (s)radixsort, the array of pointers often points - * to the data field (and sometimes not the first byte even!). - * This means the code has to 'back up' by the correct number of bytes - * in order to get the actual header. */ typedef struct recheader { length_t length; @@ -120,8 +116,6 @@ u_char data[1]; } RECHEADER; -#define REC_DATA_OFFSET offsetof(RECHEADER, data) - /* This is the column as seen by struct field. It is used by enterfield. * They are matched with corresponding coldescs during initialization. */ @@ -162,7 +156,7 @@ typedef int (*get_func_t)(int, int, struct filelist *, int, RECHEADER *, u_char *, struct field *); -typedef void (*put_func_t)(const struct recheader *, FILE *); +typedef void (*put_func_t)(const RECHEADER *, FILE *); extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS]; extern u_char *const weight_tables[4]; /* ascii, Rascii, Ftable, RFtable */ @@ -177,7 +171,7 @@ #define DEBUG(ch) (debug_flags & (1 << ((ch) & 31))) extern unsigned int debug_flags; -void append(const u_char **, int, FILE *, +void append(const RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *), u_char *); void concat(FILE *, FILE *); length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *); @@ -199,6 +193,6 @@ void putrec(const RECHEADER *, FILE *); void putkeydump(const RECHEADER *, FILE *); void rd_append(int, int, int, FILE *, u_char *, u_char *); -int radix_sort(const u_char **, int, const u_char *, u_int); +int radix_sort(const RECHEADER **, const RECHEADER **, int, const u_char *, u_int); int setfield(const char *, struct field *, int); void settables(void);