Module Name:    src
Committed By:   dsl
Date:           Sat Sep  5 12:00:26 UTC 2009

Modified Files:
        src/usr.bin/sort: append.c files.c fsort.c fsort.h msort.c radix_sort.c
            sort.h

Log Message:
Now we have our own radix_sort() change the interface so that we pass
an array of 'RECHEADER *' and remove all the crappy stuff that backed up
by REC_DATA_OFFSET (etc).
Also change radix_sort() to return the number of elements, soon to be used
to drop duplicate keys (for sort -u).


To generate a diff of this commit:
cvs rdiff -u -r1.20 -r1.21 src/usr.bin/sort/append.c
cvs rdiff -u -r1.35 -r1.36 src/usr.bin/sort/files.c
cvs rdiff -u -r1.39 -r1.40 src/usr.bin/sort/fsort.c
cvs rdiff -u -r1.15 -r1.16 src/usr.bin/sort/fsort.h
cvs rdiff -u -r1.24 -r1.25 src/usr.bin/sort/msort.c
cvs rdiff -u -r1.1 -r1.2 src/usr.bin/sort/radix_sort.c
cvs rdiff -u -r1.26 -r1.27 src/usr.bin/sort/sort.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/sort/append.c
diff -u src/usr.bin/sort/append.c:1.20 src/usr.bin/sort/append.c:1.21
--- src/usr.bin/sort/append.c:1.20	Sat Aug 22 10:53:28 2009
+++ src/usr.bin/sort/append.c	Sat Sep  5 12:00:25 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $	*/
+/*	$NetBSD: append.c,v 1.21 2009/09/05 12:00:25 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
 #include "sort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: append.c,v 1.21 2009/09/05 12:00:25 dsl Exp $");
 __SCCSID("@(#)append.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -88,16 +88,16 @@
  * copy sorted lines to output; check for uniqueness
  */
 void
-append(const u_char **keylist, int nelem, FILE *fp, put_func_t put, u_char *wts)
+append(const RECHEADER **keylist, int nelem, FILE *fp, put_func_t put, u_char *wts)
 {
-	const u_char **cpos, **lastkey;
-	const struct recheader *crec, *prec;
+	const RECHEADER **cpos, **lastkey;
+	const RECHEADER *crec, *prec;
 	size_t plen;
 
 	lastkey = keylist + nelem;
 	if (!UNIQUE || wts == NULL) {
 		for (cpos = keylist; cpos < lastkey; cpos++)
-			put((const RECHEADER *)(*cpos - REC_DATA_OFFSET), fp);
+			put(*cpos, fp);
 		return;
 	}
 
@@ -105,13 +105,13 @@
 		return;
 
 	cpos = keylist;
-	prec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+	prec = *cpos;
 
 	if (!SINGL_FLD) {
 		/* Key for each line is already in adjacent bytes */
 		plen = prec->offset;
 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
-			crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+			crec = *cpos;
 			if (crec->offset == plen
 			    && memcmp(crec->data, prec->data, plen) == 0) {
 				/* Duplicate key */
@@ -130,7 +130,7 @@
 	/* Key for each line is already in adjacent bytes */
 	plen = prec->length;
 	for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
-		crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+		crec = *cpos;
 		if (crec->length == plen
 		    && wt_cmp(crec->data, prec->data, plen, wts) == 0) {
 			/* Duplicate key */

Index: src/usr.bin/sort/files.c
diff -u src/usr.bin/sort/files.c:1.35 src/usr.bin/sort/files.c:1.36
--- src/usr.bin/sort/files.c:1.35	Sat Aug 22 10:53:28 2009
+++ src/usr.bin/sort/files.c	Sat Sep  5 12:00:25 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $	*/
+/*	$NetBSD: files.c,v 1.36 2009/09/05 12:00:25 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: files.c,v 1.36 2009/09/05 12:00:25 dsl Exp $");
 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -271,7 +271,7 @@
 void
 putrec(const RECHEADER *rec, FILE *fp)
 {
-	EWRITE(rec, 1, rec->length + REC_DATA_OFFSET, fp);
+	EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->length, fp);
 }
 
 /*
@@ -289,7 +289,7 @@
 void
 putkeydump(const RECHEADER *rec, FILE *fp)
 {
-	EWRITE(rec, 1, rec->offset + REC_DATA_OFFSET, fp);
+	EWRITE(rec, 1, offsetof(RECHEADER, data) + rec->offset, fp);
 }
 
 /*
@@ -303,15 +303,15 @@
 	FILE *fp;
 
 	fp = fstack[flno].fp;
-	if ((u_char *) rec > end - REC_DATA_OFFSET)
+	if ((u_char *)(rec + 1) > end)
 		return (BUFFEND);
-	if (!fread(rec, 1, REC_DATA_OFFSET, fp)) {
+	if (!fread(rec, 1, offsetof(RECHEADER, data), fp)) {
 		fclose(fp);
 		fstack[flno].fp = 0;
 		return (EOF);
 	}
 	if (end - rec->data < (ptrdiff_t)rec->length) {
-		for (i = REC_DATA_OFFSET - 1; i >= 0;  i--)
+		for (i = offsetof(RECHEADER, data) - 1; i >= 0;  i--)
 			ungetc(*((char *) rec + i), fp);
 		return (BUFFEND);
 	}

Index: src/usr.bin/sort/fsort.c
diff -u src/usr.bin/sort/fsort.c:1.39 src/usr.bin/sort/fsort.c:1.40
--- src/usr.bin/sort/fsort.c:1.39	Sat Aug 22 10:53:28 2009
+++ src/usr.bin/sort/fsort.c	Sat Sep  5 12:00:25 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $	*/
+/*	$NetBSD: fsort.c,v 1.40 2009/09/05 12:00:25 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -72,17 +72,13 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: fsort.c,v 1.40 2009/09/05 12:00:25 dsl Exp $");
 __SCCSID("@(#)fsort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
 #include <stdlib.h>
 #include <string.h>
 
-static const u_char **keylist = 0;
-u_char *buffer = 0;
-size_t bufsize = DEFBUFSIZE;
-
 struct tempfile fstack[MAXFCT];
 
 #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
@@ -90,21 +86,24 @@
 void
 fsort(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
 {
-	const u_char **keypos, **keyp;
+	const RECHEADER **keylist;
+	const RECHEADER **keypos, **keyp;
+	RECHEADER *buffer;
+	size_t bufsize = DEFBUFSIZE;
 	u_char *bufend;
 	int mfct = 0;
 	int c, nelem;
 	get_func_t get;
-	struct recheader *crec;
-	u_char *nbuffer;
+	RECHEADER *crec;
+	RECHEADER *nbuffer;
 	FILE *fp;
 
-	if (!buffer) {
-		buffer = malloc(bufsize);
-		keylist = malloc(MAXNUM * sizeof(u_char *));
-		memset(keylist, 0, MAXNUM * sizeof(u_char *));
-	}
-	bufend = buffer + bufsize;
+	buffer = malloc(bufsize);
+	bufend = (u_char *)buffer + bufsize;
+	/* Allocate double length keymap for radix_sort */
+	keylist = malloc(2 * MAXNUM * sizeof(*keylist));
+	if (buffer == NULL || keylist == NULL)
+		err(2, "failed to malloc initial buffer or keylist");
 
 	if (SINGL_FLD)
 		/* Key and data are one! */
@@ -118,7 +117,7 @@
 	for (;;) {
 		keypos = keylist;
 		nelem = 0;
-		crec = (RECHEADER *) buffer;
+		crec = buffer;
 
 		/* Loop reading records */
 		for (;;) {
@@ -126,13 +125,12 @@
 			/* 'c' is 0, EOF or BUFFEND */
 			if (c == 0) {
 				/* Save start of key in input buffer */
-				*keypos++ = crec->data;
+				*keypos++ = crec;
 				if (++nelem == MAXNUM) {
 					c = BUFFEND;
 					break;
 				}
-				crec = (RECHEADER *)((char *) crec +
-				    SALIGN(crec->length) + REC_DATA_OFFSET);
+				crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
 				continue;
 			}
 			if (c == EOF)
@@ -154,18 +152,18 @@
 			for (keyp = &keypos[-1]; keyp >= keylist; keyp--)
 				*keyp = nbuffer + (*keyp - buffer);
 
-			crec = (RECHEADER *) (nbuffer + ((u_char *)crec - buffer));
+			crec = nbuffer + (crec - buffer);
 			buffer = nbuffer;
-			bufend = buffer + bufsize;
+			bufend = (u_char *)buffer + bufsize;
 		}
 
 		/* Sort this set of records */
 		if (SINGL_FLD) {
-			if (radix_sort(keylist, nelem, ftbl[0].weights, REC_D))
-				err(2, "single field radix_sort");
+			nelem = radix_sort(keylist, keylist + MAXNUM, nelem,
+			    ftbl[0].weights, REC_D);
 		} else {
-			if (radix_sort(keylist, nelem, unweighted, 0))
-				err(2, "unweighted radix_sort");
+			nelem = radix_sort(keylist, keylist + MAXNUM, nelem,
+			    unweighted, 0);
 		}
 
 		if (c == EOF && mfct == 0) {

Index: src/usr.bin/sort/fsort.h
diff -u src/usr.bin/sort/fsort.h:1.15 src/usr.bin/sort/fsort.h:1.16
--- src/usr.bin/sort/fsort.h:1.15	Thu Aug 20 06:36:25 2009
+++ src/usr.bin/sort/fsort.h	Sat Sep  5 12:00:25 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: fsort.h,v 1.15 2009/08/20 06:36:25 dsl Exp $	*/
+/*	$NetBSD: fsort.h,v 1.16 2009/09/05 12:00:25 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -84,9 +84,6 @@
  */
 #define MERGE_FNUM	16
 
-extern u_char *buffer;
-extern size_t bufsize;
-
 /* Temporary files contian data (with record headers) in sorted order */
 struct tempfile {
 	FILE *fp;

Index: src/usr.bin/sort/msort.c
diff -u src/usr.bin/sort/msort.c:1.24 src/usr.bin/sort/msort.c:1.25
--- src/usr.bin/sort/msort.c:1.24	Sat Aug 22 15:16:50 2009
+++ src/usr.bin/sort/msort.c	Sat Sep  5 12:00:25 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $	*/
+/*	$NetBSD: msort.c,v 1.25 2009/09/05 12:00:25 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $");
+__RCSID("$NetBSD: msort.c,v 1.25 2009/09/05 12:00:25 dsl Exp $");
 __SCCSID("@(#)msort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -79,7 +79,7 @@
 typedef struct mfile {
 	u_char *end;
 	short flno;
-	struct recheader rec[1];
+	RECHEADER rec[1];
 } MFILE;
 
 static u_char *wts;
@@ -324,15 +324,14 @@
 void
 order(struct filelist *filelist, get_func_t get, struct field *ftbl)
 {
+	RECHEADER *crec, *prec, *trec;
 	u_char *crec_end, *prec_end, *trec_end;
 	int c;
-	RECHEADER *crec, *prec, *trec;
 
-	buffer = malloc(2 * (DEFLLEN + REC_DATA_OFFSET));
-	crec = (RECHEADER *) buffer;
-	crec_end = buffer + DEFLLEN + REC_DATA_OFFSET;
-	prec = (RECHEADER *) (buffer + DEFLLEN + REC_DATA_OFFSET);
-	prec_end = buffer + 2*(DEFLLEN + REC_DATA_OFFSET);
+	crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
+	crec_end = crec->data + DEFLLEN;
+	prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
+	prec_end = prec->data + DEFLLEN;
 	wts = ftbl->weights;
 
 	/* XXX this does exit(0) for overlong lines */

Index: src/usr.bin/sort/radix_sort.c
diff -u src/usr.bin/sort/radix_sort.c:1.1 src/usr.bin/sort/radix_sort.c:1.2
--- src/usr.bin/sort/radix_sort.c:1.1	Sat Sep  5 09:16:18 2009
+++ src/usr.bin/sort/radix_sort.c	Sat Sep  5 12:00:25 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $	*/
+/*	$NetBSD: radix_sort.c,v 1.2 2009/09/05 12:00:25 dsl Exp $	*/
 
 /*-
  * Copyright (c) 1990, 1993
@@ -37,7 +37,7 @@
 #if 0
 static char sccsid[] = "@(#)radixsort.c	8.2 (Berkeley) 4/28/95";
 #else
-__RCSID("$NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $");
+__RCSID("$NetBSD: radix_sort.c,v 1.2 2009/09/05 12:00:25 dsl Exp $");
 #endif
 #endif /* LIBC_SCCS and not lint */
 
@@ -52,60 +52,49 @@
 #include "sort.h"
 
 typedef struct {
-	const u_char **sa;
-	int sn, si;
+	const RECHEADER **sa;	/* Base of saved area */
+	int sn;				/* Number of entries */
+	int si;				/* index into data for compare */
 } stack;
 
-static inline void simplesort(const u_char **, int, int, const u_char *, u_int);
-static void r_sort_b(const u_char **,
-	    const u_char **, int, int, const u_char *, u_int);
+static inline int simplesort(const RECHEADER **, int, int, const u_char *, u_int);
+static int r_sort_b(const RECHEADER **,
+	    const RECHEADER **, int, int, const u_char *, u_int);
 
 #define	THRESHOLD	20		/* Divert to simplesort(). */
 #define	SIZE		512		/* Default stack size. */
 
+#define empty(s)	(s >= sp)
+#define pop(a, n, i)	a = (--sp)->sa, n = sp->sn, i = sp->si
+#define push(a, n, i)	sp->sa = a, sp->sn = n, (sp++)->si = i
+#define swap(a, b, t)	t = a, a = b, b = t
+
 int
-radix_sort(const u_char **a, int n, const u_char *tab, u_int endch)
+radix_sort(const RECHEADER **a, const RECHEADER **ta, int n, const u_char *tab, u_int endch)
 {
-	const u_char **ta;
-
 	endch = tab[endch];
-	if (n < THRESHOLD)
-		simplesort(a, n, 0, tab, endch);
-	else {
-		if ((ta = malloc(n * sizeof(a))) == NULL)
-			return (-1);
-		r_sort_b(a, ta, n, 0, tab, endch);
-		free(ta);
+	if (n < THRESHOLD && !DEBUG('r')) {
+		return simplesort(a, n, 0, tab, endch);
 	}
-	return (0);
+	return r_sort_b(a, ta, n, 0, tab, endch);
 }
 
-#define empty(s)	(s >= sp)
-#define pop(a, n, i)	a = (--sp)->sa, n = sp->sn, i = sp->si
-#define push(a, n, i)	sp->sa = a, sp->sn = n, (sp++)->si = i
-#define swap(a, b, t)	t = a, a = b, b = t
-
-/* Stable sort, requiring additional memory. */
-static void
-r_sort_b(const u_char **a, const u_char **ta, int n, int i, const u_char *tr,
-    u_int endch)
+static int
+r_sort_b(const RECHEADER **a, const RECHEADER **ta, int n, int i, const u_char *tr, u_int endch)
 {
 	static u_int count[256], nc, bmin;
 	u_int c;
-	const u_char **ak, **ai;
+	const RECHEADER **ak, **ai;
 	stack s[512], *sp, *sp0, *sp1, temp;
-	const u_char **top[256];
+	const RECHEADER **top[256];
 	u_int *cp, bigc;
-
-	_DIAGASSERT(a != NULL);
-	_DIAGASSERT(ta != NULL);
-	_DIAGASSERT(tr != NULL);
+	int nrec = n;
 
 	sp = s;
 	push(a, n, i);
 	while (!empty(s)) {
 		pop(a, n, i);
-		if (n < THRESHOLD) {
+		if (n < THRESHOLD && !DEBUG('r')) {
 			simplesort(a, n, i, tr, endch);
 			continue;
 		}
@@ -113,7 +102,7 @@
 		if (nc == 0) {
 			bmin = 255;
 			for (ak = a + n; --ak >= a;) {
-				c = tr[(*ak)[i]];
+				c = tr[(*ak)->data[i]];
 				if (++count[c] == 1 && c != endch) {
 					if (c < bmin)
 						bmin = c;
@@ -155,28 +144,37 @@
 		for (ak = ta + n, ai = a+n; ak > ta;)	/* Copy to temp. */
 			*--ak = *--ai;
 		for (ak = ta+n; --ak >= ta;)		/* Deal to piles. */
-			*--top[tr[(*ak)[i]]] = *ak;
+			*--top[tr[(*ak)->data[i]]] = *ak;
 	}
+
+	return nrec;
 }
 
 /* insertion sort */
-static inline void
-simplesort(const u_char **a, int n, int b, const u_char *tr, u_int endch)
+static inline int
+simplesort(const RECHEADER **a, int n, int b, const u_char *tr, u_int endch)
 {
+	const RECHEADER **ak, **ai, *tmp;
+	const RECHEADER **lim = a + n;
+	const u_char *s, *t;
 	u_char ch;
-	const u_char  **ak, **ai, *s, *t;
 
-	_DIAGASSERT(a != NULL);
-	_DIAGASSERT(tr != NULL);
+	if (n <= 1)
+		return n;
 
-	for (ak = a+1; --n >= 1; ak++)
+	for (ak = a+1; ak < lim; ak++) {
 		for (ai = ak; ai > a; ai--) {
-			for (s = ai[0] + b, t = ai[-1] + b;
+			for (s = ai[0]->data + b, t = ai[-1]->data + b;
 			    (ch = tr[*s]) != endch; s++, t++)
 				if (ch != tr[*t])
 					break;
-			if (ch >= tr[*t])
+			if (ch >= tr[*t]) {
+
 				break;
-			swap(ai[0], ai[-1], s);
+			}
+			swap(ai[0], ai[-1], tmp);
 		}
+	}
+
+	return n;
 }

Index: src/usr.bin/sort/sort.h
diff -u src/usr.bin/sort/sort.h:1.26 src/usr.bin/sort/sort.h:1.27
--- src/usr.bin/sort/sort.h:1.26	Sat Sep  5 09:16:18 2009
+++ src/usr.bin/sort/sort.h	Sat Sep  5 12:00:25 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: sort.h,v 1.26 2009/09/05 09:16:18 dsl Exp $	*/
+/*	$NetBSD: sort.h,v 1.27 2009/09/05 12:00:25 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -109,10 +109,6 @@
 
 /* A record is a key/line pair starting at rec.data. It has a total length
  * and an offset to the start of the line half of the pair.
- * In order to use (s)radixsort, the array of pointers often points
- * to the data field (and sometimes not the first byte even!).
- * This means the code has to 'back up' by the correct number of bytes
- * in order to get the actual header.
  */
 typedef struct recheader {
 	length_t length;
@@ -120,8 +116,6 @@
 	u_char data[1];
 } RECHEADER;
 
-#define REC_DATA_OFFSET offsetof(RECHEADER, data)
-
 /* This is the column as seen by struct field.  It is used by enterfield.
  * They are matched with corresponding coldescs during initialization.
  */
@@ -162,7 +156,7 @@
 
 typedef int (*get_func_t)(int, int, struct filelist *, int,
 		RECHEADER *, u_char *, struct field *);
-typedef void (*put_func_t)(const struct recheader *, FILE *);
+typedef void (*put_func_t)(const RECHEADER *, FILE *);
 
 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
 extern u_char *const weight_tables[4];   /* ascii, Rascii, Ftable, RFtable */
@@ -177,7 +171,7 @@
 #define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
 extern unsigned int debug_flags;
 
-void	 append(const u_char **, int, FILE *,
+void	 append(const RECHEADER **, int, FILE *,
 	    void (*)(const RECHEADER *, FILE *), u_char *);
 void	 concat(FILE *, FILE *);
 length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
@@ -199,6 +193,6 @@
 void	 putrec(const RECHEADER *, FILE *);
 void	 putkeydump(const RECHEADER *, FILE *);
 void	 rd_append(int, int, int, FILE *, u_char *, u_char *);
-int	 radix_sort(const u_char **, int, const u_char *, u_int);
+int	 radix_sort(const RECHEADER **, const RECHEADER **, int, const u_char *, u_int);
 int	 setfield(const char *, struct field *, int);
 void	 settables(void);

Reply via email to