Module Name:    src
Committed By:   dsl
Date:           Sat Sep  5 09:16:18 UTC 2009

Modified Files:
        src/usr.bin/sort: Makefile init.c sort.c sort.h
Added Files:
        src/usr.bin/sort: radix_sort.c

Log Message:
Include a local copy of the sradixsort() code from libc.
Currently unchanged apart from the deletion of the 'unstable' version and
other unneeded code.
Use fldtab[0]. not fldtab-> when we are referring to the global info
in the 0th entry to emphasise that this entry is different.
fldtab[0].weights is only needed in the SINGL_FLD case - so set it there.
Re-indent a big 'if' is setfield() so that the line breaks match the
logic - which looks dubious now!


To generate a diff of this commit:
cvs rdiff -u -r1.6 -r1.7 src/usr.bin/sort/Makefile
cvs rdiff -u -r1.21 -r1.22 src/usr.bin/sort/init.c
cvs rdiff -u -r0 -r1.1 src/usr.bin/sort/radix_sort.c
cvs rdiff -u -r1.53 -r1.54 src/usr.bin/sort/sort.c
cvs rdiff -u -r1.25 -r1.26 src/usr.bin/sort/sort.h

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/sort/Makefile
diff -u src/usr.bin/sort/Makefile:1.6 src/usr.bin/sort/Makefile:1.7
--- src/usr.bin/sort/Makefile:1.6	Tue Apr 14 22:15:26 2009
+++ src/usr.bin/sort/Makefile	Sat Sep  5 09:16:18 2009
@@ -1,7 +1,8 @@
-#	$NetBSD: Makefile,v 1.6 2009/04/14 22:15:26 lukem Exp $
+#	$NetBSD: Makefile,v 1.7 2009/09/05 09:16:18 dsl Exp $
 #	from: @(#)Makefile	8.1 (Berkeley) 6/6/93
 
 PROG=	sort
 SRCS=	append.c fields.c files.c fsort.c init.c msort.c sort.c tmp.c
+SRCS+=	radix_sort.c
 
 .include <bsd.prog.mk>

Index: src/usr.bin/sort/init.c
diff -u src/usr.bin/sort/init.c:1.21 src/usr.bin/sort/init.c:1.22
--- src/usr.bin/sort/init.c:1.21	Sat Aug 22 21:50:32 2009
+++ src/usr.bin/sort/init.c	Sat Sep  5 09:16:18 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: init.c,v 1.21 2009/08/22 21:50:32 dsl Exp $	*/
+/*	$NetBSD: init.c,v 1.22 2009/09/05 09:16:18 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
 #include "sort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: init.c,v 1.21 2009/08/22 21:50:32 dsl Exp $");
+__RCSID("$NetBSD: init.c,v 1.22 2009/09/05 09:16:18 dsl Exp $");
 __SCCSID("@(#)init.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -210,10 +210,12 @@
 	if (!cur_fld->tcol.indent)	/* BT has no meaning at end of field */
 		cur_fld->flags &= ~BT;
 
-	if (cur_fld->tcol.num && !(!(cur_fld->flags & BI)
-	    && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num
-	    && cur_fld->tcol.indent != 0 /* == 0 -> end of field, i.e. okay */
-	    && cur_fld->tcol.indent < cur_fld->icol.indent))
+	if (cur_fld->tcol.num
+	    && !(!(cur_fld->flags & BI) && cur_fld->flags & BT)
+	    && (cur_fld->tcol.num <= cur_fld->icol.num
+		    /* indent if 0 -> end of field, i.e. okay */
+		    && cur_fld->tcol.indent != 0
+		    && cur_fld->tcol.indent < cur_fld->icol.indent))
 		errx(2, "fields out of order");
 	insertcol(cur_fld);
 	return (cur_fld->tcol.num);
@@ -333,7 +335,7 @@
  * and -d (only sort blank and alphanumerics).
  */
 void
-settables(int gflags)
+settables(void)
 {
 	int i;
 	int next_weight = SINGL_FLD ? 1 : 2;

Index: src/usr.bin/sort/sort.c
diff -u src/usr.bin/sort/sort.c:1.53 src/usr.bin/sort/sort.c:1.54
--- src/usr.bin/sort/sort.c:1.53	Sat Aug 22 21:43:53 2009
+++ src/usr.bin/sort/sort.c	Sat Sep  5 09:16:18 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: sort.c,v 1.53 2009/08/22 21:43:53 dsl Exp $	*/
+/*	$NetBSD: sort.c,v 1.54 2009/09/05 09:16:18 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
 #endif /* not lint */
 
 #ifndef lint
-__RCSID("$NetBSD: sort.c,v 1.53 2009/08/22 21:43:53 dsl Exp $");
+__RCSID("$NetBSD: sort.c,v 1.54 2009/09/05 09:16:18 dsl Exp $");
 __SCCSID("@(#)sort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -103,11 +103,6 @@
 u_char unweighted[NBINS];
 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
 
-/*
- * Default to stable sort.
- */
-int (*radix_sort)(const u_char **, int, const u_char *, u_int) = sradixsort;
-
 unsigned int debug_flags = 0;
 
 static char toutpath[MAXPATHLEN];
@@ -124,11 +119,11 @@
 main(int argc, char *argv[])
 {
 	get_func_t get;
-	int ch, i, stdinflag = 0, tmp = 0;
+	int ch, i, stdinflag = 0;
 	char cflag = 0, mflag = 0;
 	char *outfile, *outpath = 0;
 	struct field *fldtab, *p;
-	size_t fldtab_sz = 3, fidx = 0;
+	size_t fldtab_sz, fld_cnt;
 	struct filelist filelist;
 	int num_input_files;
 	FILE *outfp = NULL;
@@ -147,6 +142,9 @@
 	d_mask[REC_D = '\n'] = REC_D_F;
 	d_mask['\t'] = d_mask[' '] = BLANK | FLD_D;
 
+	/* fldtab[0] is the global options. */
+	fldtab_sz = 3;
+	fld_cnt = 0;
 	fldtab = malloc(fldtab_sz * sizeof(*fldtab));
 	memset(fldtab, 0, fldtab_sz * sizeof(*fldtab));
 
@@ -159,7 +157,7 @@
 	while ((ch = getopt(argc, argv, "bcdD:fik:mHno:rR:sSt:T:ux")) != -1) {
 		switch (ch) {
 		case 'b':
-			fldtab->flags |= BI | BT;
+			fldtab[0].flags |= BI | BT;
 			break;
 		case 'c':
 			cflag = 1;
@@ -169,9 +167,7 @@
 			    debug_flags |= 1 << (optarg[i] & 31);
 			break;
 		case 'd': case 'f': case 'i': case 'n': case 'r':
-			tmp |= optval(ch, 0);
-			fldtab->weights = weight_tables[tmp & (R | F)];
-			fldtab->flags |= tmp;
+			fldtab[0].flags |= optval(ch, 0);
 			break;
 		case 'H':
 			/* -H was ; use merge sort for blocks of large files' */
@@ -182,11 +178,10 @@
 			if (!p)
 				err(1, "realloc");
 			fldtab = p;
-			memset(&fldtab[fldtab_sz], 0,
-			    sizeof(fldtab[fldtab_sz]));
+			memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0]));
 			fldtab_sz++;
 
-			setfield(optarg, &fldtab[++fidx], fldtab->flags);
+			setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags);
 			break;
 		case 'm':
 			mflag = 1;
@@ -195,11 +190,21 @@
 			outpath = optarg;
 			break;
 		case 's':
-			/* for GNU sort compatibility (this is our default) */
-			radix_sort = sradixsort;
+			/*
+			 * Nominally 'stable sort', keep lines with equal keys
+			 * in input file order. (Default for NetBSD)
+			 * (-s for GNU sort compatibility.)
+			 */
 			break;
 		case 'S':
-			radix_sort = radixsort;
+			/*
+			 * Reverse of -s!
+			 * This needs to enforce a POSIX sort where records
+			 * with equal keys are then sorted by the raw data.
+			 * Currently not implemented!
+			 * (using libc radixsort() v sradixsort() doesn't
+			 * have the desired effect.)
+			 */
 			break;
 		case 't':
 			if (SEP_FLAG)
@@ -272,19 +277,18 @@
 			err(2, "%s", argv[i]);
 	}
 
-	if (!(fldtab->flags & (I|D|N) || fldtab[1].icol.num)) {
+	if (!(fldtab[0].flags & (I|D|N) || fldtab[1].icol.num)) {
 		SINGL_FLD = 1;
 		fldtab[0].icol.num = 1;
+		fldtab[0].weights = weight_tables[fldtab->flags & (R | F)];
 	} else {
 		if (!fldtab[1].icol.num) {
 			fldtab[0].flags &= ~(BI|BT);
-			setfield("1", &fldtab[++fidx], fldtab->flags);
+			setfield("1", &fldtab[++fld_cnt], fldtab->flags);
 		}
 		fldreset(fldtab);
-		// fldtab[0].flags &= ~F;
 	}
-	settables(fldtab[0].flags);
-	fldtab->weights = weight_tables[fldtab->flags & (R | F)];
+	settables();
 
 	if (optind == argc) {
 		static const char * const names[] = { _PATH_STDIN, NULL };

Index: src/usr.bin/sort/sort.h
diff -u src/usr.bin/sort/sort.h:1.25 src/usr.bin/sort/sort.h:1.26
--- src/usr.bin/sort/sort.h:1.25	Sat Aug 22 10:53:28 2009
+++ src/usr.bin/sort/sort.h	Sat Sep  5 09:16:18 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: sort.h,v 1.25 2009/08/22 10:53:28 dsl Exp $	*/
+/*	$NetBSD: sort.h,v 1.26 2009/09/05 09:16:18 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -170,7 +170,6 @@
 extern int SINGL_FLD, SEP_FLAG, UNIQUE;
 extern int REC_D;
 extern const char *tmpdir;
-extern int (*radix_sort)(const u_char **, int, const u_char *, u_int);
 extern u_char unweighted[NBINS];
 extern struct coldesc *clist;
 extern int ncols;
@@ -200,5 +199,6 @@
 void	 putrec(const RECHEADER *, FILE *);
 void	 putkeydump(const RECHEADER *, FILE *);
 void	 rd_append(int, int, int, FILE *, u_char *, u_char *);
+int	 radix_sort(const u_char **, int, const u_char *, u_int);
 int	 setfield(const char *, struct field *, int);
-void	 settables(int);
+void	 settables(void);

Added files:

Index: src/usr.bin/sort/radix_sort.c
diff -u /dev/null src/usr.bin/sort/radix_sort.c:1.1
--- /dev/null	Sat Sep  5 09:16:18 2009
+++ src/usr.bin/sort/radix_sort.c	Sat Sep  5 09:16:18 2009
@@ -0,0 +1,182 @@
+/*	$NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $	*/
+
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Peter McIlroy and by Dan Bernstein at New York University, 
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+static char sccsid[] = "@(#)radixsort.c	8.2 (Berkeley) 4/28/95";
+#else
+__RCSID("$NetBSD: radix_sort.c,v 1.1 2009/09/05 09:16:18 dsl Exp $");
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+/*
+ * 'stable' radix sort initially from libc/stdlib/radixsort.c
+ */
+
+#include <sys/types.h>
+
+#include <assert.h>
+#include <errno.h>
+#include "sort.h"
+
+typedef struct {
+	const u_char **sa;
+	int sn, si;
+} stack;
+
+static inline void simplesort(const u_char **, int, int, const u_char *, u_int);
+static void r_sort_b(const u_char **,
+	    const u_char **, int, int, const u_char *, u_int);
+
+#define	THRESHOLD	20		/* Divert to simplesort(). */
+#define	SIZE		512		/* Default stack size. */
+
+int
+radix_sort(const u_char **a, int n, const u_char *tab, u_int endch)
+{
+	const u_char **ta;
+
+	endch = tab[endch];
+	if (n < THRESHOLD)
+		simplesort(a, n, 0, tab, endch);
+	else {
+		if ((ta = malloc(n * sizeof(a))) == NULL)
+			return (-1);
+		r_sort_b(a, ta, n, 0, tab, endch);
+		free(ta);
+	}
+	return (0);
+}
+
+#define empty(s)	(s >= sp)
+#define pop(a, n, i)	a = (--sp)->sa, n = sp->sn, i = sp->si
+#define push(a, n, i)	sp->sa = a, sp->sn = n, (sp++)->si = i
+#define swap(a, b, t)	t = a, a = b, b = t
+
+/* Stable sort, requiring additional memory. */
+static void
+r_sort_b(const u_char **a, const u_char **ta, int n, int i, const u_char *tr,
+    u_int endch)
+{
+	static u_int count[256], nc, bmin;
+	u_int c;
+	const u_char **ak, **ai;
+	stack s[512], *sp, *sp0, *sp1, temp;
+	const u_char **top[256];
+	u_int *cp, bigc;
+
+	_DIAGASSERT(a != NULL);
+	_DIAGASSERT(ta != NULL);
+	_DIAGASSERT(tr != NULL);
+
+	sp = s;
+	push(a, n, i);
+	while (!empty(s)) {
+		pop(a, n, i);
+		if (n < THRESHOLD) {
+			simplesort(a, n, i, tr, endch);
+			continue;
+		}
+
+		if (nc == 0) {
+			bmin = 255;
+			for (ak = a + n; --ak >= a;) {
+				c = tr[(*ak)[i]];
+				if (++count[c] == 1 && c != endch) {
+					if (c < bmin)
+						bmin = c;
+					nc++;
+				}
+			}
+			if (sp + nc > s + SIZE) {
+				r_sort_b(a, ta, n, i, tr, endch);
+				continue;
+			}
+		}
+
+		sp0 = sp1 = sp;
+		bigc = 2;
+		if (endch == 0) {
+			top[0] = ak = a + count[0];
+			count[0] = 0;
+		} else {
+			ak = a;
+			top[255] = a + n;
+			count[255] = 0;
+		}
+		for (cp = count + bmin; nc > 0; cp++) {
+			while (*cp == 0)
+				cp++;
+			if ((c = *cp) > 1) {
+				if (c > bigc) {
+					bigc = c;
+					sp1 = sp;
+				}
+				push(ak, c, i+1);
+			}
+			top[cp-count] = ak += c;
+			*cp = 0;			/* Reset count[]. */
+			nc--;
+		}
+		swap(*sp0, *sp1, temp);
+
+		for (ak = ta + n, ai = a+n; ak > ta;)	/* Copy to temp. */
+			*--ak = *--ai;
+		for (ak = ta+n; --ak >= ta;)		/* Deal to piles. */
+			*--top[tr[(*ak)[i]]] = *ak;
+	}
+}
+
+/* insertion sort */
+static inline void
+simplesort(const u_char **a, int n, int b, const u_char *tr, u_int endch)
+{
+	u_char ch;
+	const u_char  **ak, **ai, *s, *t;
+
+	_DIAGASSERT(a != NULL);
+	_DIAGASSERT(tr != NULL);
+
+	for (ak = a+1; --n >= 1; ak++)
+		for (ai = ak; ai > a; ai--) {
+			for (s = ai[0] + b, t = ai[-1] + b;
+			    (ch = tr[*s]) != endch; s++, t++)
+				if (ch != tr[*t])
+					break;
+			if (ch >= tr[*t])
+				break;
+			swap(ai[0], ai[-1], s);
+		}
+}

Reply via email to