sort

David Laight Sat, 22 Aug 2009 03:53:38 -0700

Module Name:    src
Committed By:   dsl
Date:           Sat Aug 22 10:53:28 UTC 2009


Modified Files:
        src/usr.bin/sort: append.c fields.c files.c fsort.c init.c msort.c
            sort.c sort.h

Log Message:
Rework the way sort generates sort keys:
- If we generate a key, it is always sortable using memcmp()
- If we are sorting the whole record, then a weight-table must be used
  during compares.
- Major surgery to encoding of numbers to ensure unique keys for equal
  numeric values.  Reverse numerics are handled by inverting the sign.
- Case folding (-f) is handled when the sort keys are generated. No other
  code has to care at all.
- Key uniqueness (-u) is done during merge for large datasets. It only
  has to be done when writing the output file for small files.
  Since the file is in key order this is simple!
Probably fixes all of: PR/27257 PR/25551 PR/22182 PR/31095 PR/30504
PR/36816 PR/37860 PR/39308
Also PR/18614 should no longer die, but a little more work needs to be
done on the merging for very large files.


To generate a diff of this commit:
cvs rdiff -u -r1.19 -r1.20 src/usr.bin/sort/append.c src/usr.bin/sort/init.c
cvs rdiff -u -r1.24 -r1.25 src/usr.bin/sort/fields.c src/usr.bin/sort/sort.h
cvs rdiff -u -r1.34 -r1.35 src/usr.bin/sort/files.c
cvs rdiff -u -r1.38 -r1.39 src/usr.bin/sort/fsort.c
cvs rdiff -u -r1.22 -r1.23 src/usr.bin/sort/msort.c
cvs rdiff -u -r1.51 -r1.52 src/usr.bin/sort/sort.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/sort/append.c
diff -u src/usr.bin/sort/append.c:1.19 src/usr.bin/sort/append.c:1.20
--- src/usr.bin/sort/append.c:1.19	Thu Aug 20 06:36:25 2009
+++ src/usr.bin/sort/append.c	Sat Aug 22 10:53:28 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: append.c,v 1.19 2009/08/20 06:36:25 dsl Exp $	*/
+/*	$NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -64,118 +64,82 @@
 #include "sort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: append.c,v 1.19 2009/08/20 06:36:25 dsl Exp $");
+__RCSID("$NetBSD: append.c,v 1.20 2009/08/22 10:53:28 dsl Exp $");
 __SCCSID("@(#)append.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
 #include <stdlib.h>
 #include <string.h>
 
-#define OUTPUT {							\
-	if ((n = cpos - ppos) > 1) {					\
-		ppos -= n;						\
-		radix_sort(ppos, n, wts1, REC_D);			\
-		for (; ppos < cpos; ppos++) {				\
-			prec = (const RECHEADER *) (*ppos - REC_DATA_OFFSET);\
-			put(prec, fp);					\
-		}							\
-	} else put(prec, fp);						\
+static int
+wt_cmp(const u_char *a, const u_char *b, size_t len, u_char *wts)
+{
+    size_t i;
+
+    for (i = 0; i < len; i++) {
+	    if (wts[*a++] != wts[*b++])
+		return 1;
+    }
+
+    return 0;
 }
 
 /*
  * copy sorted lines to output; check for uniqueness
  */
 void
-append(const u_char **keylist, int nelem, FILE *fp, put_func_t put,
-    struct field *ftbl)
+append(const u_char **keylist, int nelem, FILE *fp, put_func_t put, u_char *wts)
 {
-	u_char *wts, *wts1;
-	int n;
-	const u_char **cpos, **ppos, **lastkey;
-	const u_char *cend, *pend, *start;
+	const u_char **cpos, **lastkey;
 	const struct recheader *crec, *prec;
+	size_t plen;
 
-	if (*keylist == '\0' && UNIQUE)
+	lastkey = keylist + nelem;
+	if (!UNIQUE || wts == NULL) {
+		for (cpos = keylist; cpos < lastkey; cpos++)
+			put((const RECHEADER *)(*cpos - REC_DATA_OFFSET), fp);
 		return;
-
-	wts1 = wts = ftbl[0].weights;
-	if ((!UNIQUE) && SINGL_FLD && ftbl[0].flags & F) {
-		/* Folding case */
-		if (ftbl[0].flags & R)
-			wts1 = Rascii;
-		else
-			wts1 = ascii;
 	}
 
-	lastkey = keylist + nelem;
-	if (SINGL_FLD && (UNIQUE || wts1 != wts)) {
-		ppos = keylist;
-		prec = (const RECHEADER *) (*ppos - REC_DATA_OFFSET);
-		if (UNIQUE)
-			put(prec, fp);
+	if (nelem == 0)
+		return;
+
+	cpos = keylist;
+	prec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
+
+	if (!SINGL_FLD) {
+		/* Key for each line is already in adjacent bytes */
+		plen = prec->offset;
 		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
 			crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
-			if (crec->length == prec->length) {
-				/*
-				 * Set pend and cend so that trailing NUL and
-				 * record separator is ignored.
-				 */
-				pend = (const u_char *) &prec->data + prec->length - 2;
-				cend = (const u_char *) &crec->data + crec->length - 2;
-				for (start = *cpos; cend >= start; cend--) {
-					if (wts[*cend] != wts[*pend])
-						break;
-					pend--;
-				}
-				if (pend + 1 != *ppos) {
-					if (!UNIQUE) {
-						OUTPUT;
-					} else
-						put(crec, fp);
-					ppos = cpos;
-					prec = crec;
-				}
-			} else {
-				if (!UNIQUE) {
-					OUTPUT;
-				} else
-					put(crec, fp);
-				ppos = cpos;
-				prec = crec;
+			if (crec->offset == plen
+			    && memcmp(crec->data, prec->data, plen) == 0) {
+				/* Duplicate key */
+				continue;
 			}
+			put(prec, fp);
+			prec = crec;
+			plen = prec->offset;
 		}
-		if (!UNIQUE)  { OUTPUT; }
-	} else if (UNIQUE) {
-		ppos = keylist;
-		prec = (const RECHEADER *) (*ppos - REC_DATA_OFFSET);
 		put(prec, fp);
-		for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
-			crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
-			if (crec->offset == prec->offset) {
-				/*
-				 * Set pend and cend so that trailing NUL and
-				 * record separator is ignored.
-				 */
-				pend = (const u_char *) &prec->data + prec->offset - 2;
-				cend = (const u_char *) &crec->data + crec->offset - 2;
-				for (start = *cpos; cend >= start; cend--) {
-					if (wts[*cend] != wts[*pend])
-						break;
-					pend--;
-				}
-				if (pend + 1 != *ppos) {
-					ppos = cpos;
-					prec = crec;
-					put(prec, fp);
-				}
-			} else {
-				ppos = cpos;
-				prec = crec;
-				put(prec, fp);
-			}
-		}
-	} else for (cpos = keylist; cpos < lastkey; cpos++) {
+		return;
+	}
+
+	/* We have to compare the raw data - which means applying weight */
+
+	/* Key for each line is already in adjacent bytes */
+	plen = prec->length;
+	for (cpos = &keylist[1]; cpos < lastkey; cpos++) {
 		crec = (const RECHEADER *) (*cpos - REC_DATA_OFFSET);
-		put(crec, fp);
+		if (crec->length == plen
+		    && wt_cmp(crec->data, prec->data, plen, wts) == 0) {
+			/* Duplicate key */
+			continue;
+		}
+		put(prec, fp);
+		prec = crec;
+		plen = prec->length;
 	}
+	put(prec, fp);
+	return;
 }
Index: src/usr.bin/sort/init.c
diff -u src/usr.bin/sort/init.c:1.19 src/usr.bin/sort/init.c:1.20
--- src/usr.bin/sort/init.c:1.19	Sat Aug 15 09:48:46 2009
+++ src/usr.bin/sort/init.c	Sat Aug 22 10:53:28 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: init.c,v 1.19 2009/08/15 09:48:46 dsl Exp $	*/
+/*	$NetBSD: init.c,v 1.20 2009/08/22 10:53:28 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
 #include "sort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: init.c,v 1.19 2009/08/15 09:48:46 dsl Exp $");
+__RCSID("$NetBSD: init.c,v 1.20 2009/08/22 10:53:28 dsl Exp $");
 __SCCSID("@(#)init.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -74,12 +74,10 @@
 static void insertcol(struct field *);
 static const char *setcolumn(const char *, struct field *, int);
 
-u_char gweights[NBINS];
-
 /*
- * masks of ignored characters.  Alltable is 256 ones.
+ * masks of ignored characters.
  */
-static u_char alltable[NBINS], dtable[NBINS], itable[NBINS];
+static u_char dtable[NBINS], itable[NBINS];
 
 /*
  * parsed key options
@@ -187,8 +185,7 @@
 {
 	int tmp;
 
-	cur_fld->weights = ascii;
-	cur_fld->mask = alltable;
+	cur_fld->mask = NULL;
 
 	pos = setcolumn(pos, cur_fld, gflag);
 	if (*pos == '\0')			/* key extends to EOL. */
@@ -202,18 +199,8 @@
 		cur_fld->flags = gflag;
 	tmp = cur_fld->flags;
 
-	/*
-	 * Assign appropriate mask table and weight table.
-	 * If the global weights are reversed, the local field
-	 * must be "re-reversed".
-	 */
-	if (((tmp & R) ^ (gflag & R)) && (tmp & F))
-		cur_fld->weights = RFtable;
-	else if (tmp & F)
-		cur_fld->weights = Ftable;
-	else if ((tmp & R) ^ (gflag & R))
-		cur_fld->weights = Rascii;
-
+	/* Assign appropriate mask table and weight table. */
+	cur_fld->weights = weight_tables[tmp & (R | F)];
 	if (tmp & I)
 		cur_fld->mask = itable;
 	else if (tmp & D)
@@ -325,71 +312,64 @@
 
 /*
  * ascii, Rascii, Ftable, and RFtable map
- * REC_D -> REC_D;  {not REC_D} -> {not REC_D}.
- * gweights maps REC_D -> (0 or 255); {not REC_D} -> {not gweights[REC_D]}.
- * Note: when sorting in forward order, to encode character zero in a key,
- * use \001\001; character 1 becomes \001\002.  In this case, character 0
- * is reserved for the field delimiter.  Analagously for -r (fld_d = 255).
+ *
+ * Sorting 'weight' tables.
+ * Convert 'ascii' characters into their sort order.
+ * The 'F' variants fold lower case to upper equivalent
+ * The 'R' variants are for reverse sorting.
+ * The record separator (REC_D) always maps to 0.
+ * One is reserved for field separators (added when key is generated)
+ * The field separator (from -t<ch>) map to 1 or 255 (unless SINGL_FLD)
+ * All other bytes map to the appropriate value for the sort order.
+ * Numeric sorts don't need any tables, they are reversed by negation.
+ *
  * Note: this is only good for ASCII sorting.  For different LC 's,
- * all bets are off.  See also num_init in number.c
+ * all bets are off.
+ *
+ * If SINGL_FLD then the weights have to be applied during the actual sort.
+ * Otherwise they are applied when the key bytes are built.
+ *
+ * itable[] and dtable[] are the masks for -i (ignore non-printables)
+ * and -d (only sort blank and alphanumerics).
  */
 void
 settables(int gflags)
 {
-	u_char *wts;
-	int i, incr;
-	for (i=0; i < 256; i++) {
-		ascii[i] = i;
-		if (i > REC_D && i < 255 - REC_D+1)
-			Rascii[i] = 255 - i + 1;
-		else
-			Rascii[i] = 255 - i;
-		if (islower(i)) {
-			Ftable[i] = Ftable[toupper(i)];
-			RFtable[i] = RFtable[toupper(i)];
-		} else if (REC_D>= 'A' && REC_D < 'Z' && i < 'a' && i > REC_D) {
-			Ftable[i] = i + 1;
-			RFtable[i] = Rascii[i] - 1;
-		} else {
-			Ftable[i] = i;
-			RFtable[i] = Rascii[i];
+	int i;
+	int next_weight = SINGL_FLD ? 1 : 2;
+	int rev_weight = SINGL_FLD ? 255 : 254;
+	int had_field_sep = 0;
+
+	for (i = 0; i < 256; i++) {
+		unweighted[i] = i;
+		if (d_mask[i] & REC_D_F)
+			continue;
+		if (d_mask[i] & FLD_D && !SINGL_FLD) {
+			ascii[i] = 1;
+			Rascii[i] = 255;
+			if (had_field_sep) {
+				/* avoid confusion in key dumps */
+				next_weight++;
+				rev_weight--;
+			}
+			had_field_sep = 1;
+			continue;
+		}
+		ascii[i] = next_weight;
+		Rascii[i] = rev_weight;
+		if (Ftable[i] == 0) {
+			Ftable[i] = next_weight;
+			RFtable[i] = rev_weight;
+			Ftable[tolower(i)] = next_weight;
+			RFtable[tolower(i)] = rev_weight;
 		}
-		alltable[i] = 1;
+		next_weight++;
+		rev_weight--;
 
 		if (i == '\n' || isprint(i))
 			itable[i] = 1;
-		else
-			itable[i] = 0;
 
 		if (i == '\n' || i == '\t' || i == ' ' || isalnum(i))
 			dtable[i] = 1;
-		else
-			dtable[i] = 0;
-	}
-
-	Rascii[REC_D] = RFtable[REC_D] = REC_D;
-	if (isupper(REC_D))
-		Ftable[tolower(REC_D)]++;
-
-	if ((gflags & R) && !((gflags & F) && SINGL_FLD))
-		wts = Rascii;
-	else if (!((gflags & F) && SINGL_FLD))
-		wts = ascii;
-	else if (gflags & R)
-		wts = RFtable;
-	else
-		wts = Ftable;
-
-	memmove(gweights, wts, sizeof(gweights));
-	incr = (gflags & R) ? -1 : 1;
-	for (i = 0; i < REC_D; i++)
-		gweights[i] += incr;
-	gweights[REC_D] = ((gflags & R) ? 255 : 0);
-	if (SINGL_FLD && (gflags & F)) {
-		for (i = 0; i < REC_D; i++) {
-			ascii[i] += incr;
-			Rascii[i] += incr;
-		}
-		ascii[REC_D] = Rascii[REC_D] = gweights[REC_D];
 	}
 }

Index: src/usr.bin/sort/fields.c
diff -u src/usr.bin/sort/fields.c:1.24 src/usr.bin/sort/fields.c:1.25
--- src/usr.bin/sort/fields.c:1.24	Thu Aug 20 06:36:25 2009
+++ src/usr.bin/sort/fields.c	Sat Aug 22 10:53:28 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: fields.c,v 1.24 2009/08/20 06:36:25 dsl Exp $	*/
+/*	$NetBSD: fields.c,v 1.25 2009/08/22 10:53:28 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -66,7 +66,7 @@
 #include "sort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: fields.c,v 1.24 2009/08/20 06:36:25 dsl Exp $");
+__RCSID("$NetBSD: fields.c,v 1.25 2009/08/22 10:53:28 dsl Exp $");
 __SCCSID("@(#)fields.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -84,13 +84,7 @@
 static u_char *enterfield(u_char *, const u_char *, struct field *, int);
 static u_char *number(u_char *, const u_char *, u_char *, u_char *, int);
 
-#define DECIMAL '.'
-#define OFFSET 128
-
-u_char TENS[10];	/* TENS[0] = REC_D <= 128 ? 130 - '0' : 2 -'0'... */
-u_char NEGTENS[10];	/* NEGTENS[0] = REC_D <= 128 ? 126 + '0' : 252 +'0' */
-u_char *OFF_TENS, *OFF_NTENS;	/* TENS - '0', NEGTENS - '0' */
-u_char fnum[NBINS], rnum[NBINS];
+#define DECIMAL_POINT '.'
 
 /*
  * constructs sort key with leading recheader, followed by the key,
@@ -142,9 +136,10 @@
 	 * original line data (for output) as the 'keybuf' data.
 	 * keybuf->length is the number of key bytes + data bytes.
 	 * keybuf->offset is the number of key bytes.
-	 * We add a record separator (usually \n) after the key in case
+	 * We add a record separator weight after the key in case
 	 * (as is usual) we need to preserve the order of equal lines,
 	 * and for 'sort -u'.
+	 * The key itself will have had the correct weight applied.
 	 */
 	keypos = keybuf->data;
 	endkey = keybuf_end - line_size - 1;
@@ -157,7 +152,7 @@
 		    fieldtable->flags)) == NULL)
 			return (1);
 	}
-	*keypos++ = REC_D;
+	*keypos++ = 0;
 
 	keybuf->offset = keypos - keybuf->data;
 	keybuf->length = keybuf->offset + line_size;
@@ -176,7 +171,6 @@
 	u_char *start, *end, *lineend, *mask, *lweight;
 	struct column icol, tcol;
 	u_int flags;
-	u_int Rflag;
 
 	icol = cur_fld->icol;
 	tcol = cur_fld->tcol;
@@ -201,162 +195,174 @@
 			end = tcol.p->end;
 	}
 
-	if (flags & N) {
-		Rflag = (gflags & R ) ^ (flags & R) ? 1 : 0;
-		return number(tablepos, endkey, start, end, Rflag);
-	}
+	if (flags & N)
+		return number(tablepos, endkey, start, end, flags);
+
+	/* Bound check space - assuming nothing is skipped */
+	if (tablepos + (end - start) + 1 >= endkey)
+		return NULL;
 
 	mask = cur_fld->mask;
 	lweight = cur_fld->weights;	
-	for (; start < end; start++)
-		if (mask[*start]) {
-			if (*start <= 1) {
-				if (tablepos+2 >= endkey)
-					return (NULL);
-				*tablepos++ = lweight[1];
-				*tablepos++ = lweight[*start ? 2 : 1];
-			} else {
-				if (tablepos+1 >= endkey)
-					return (NULL);
-				*tablepos++ = lweight[*start];
-			}
+	for (; start < end; start++) {
+		if (mask && mask[*start]) {
+			*tablepos++ = lweight[*start];
 		}
-	*tablepos++ = lweight[0];
-	return (tablepos == endkey ? NULL : tablepos);
+	}
+	/* Add extra byte to sort short keys correctly */
+	*tablepos++ = flags & R ? 255 : 1;
+	return tablepos;
 }
 
-/* Uses the first bin to assign sign, expsign, 0, and the first
- * 61 out of the exponent ( (254 - 3 origins - 4 over/underflows)/4 = 61 ).
- *   When sorting in forward order:
- * use (0-99) -> (130->240) for sorting the mantissa if REC_D <=128;
- * else use (0-99)->(2-102).
- * If the exponent is >=61, use another byte for each additional 253
- * in the exponent. Cutoff is at 567.
- * To avoid confusing the exponent and the mantissa, use a field delimiter
- * if the exponent is exactly 61, 61+252, etc--this is ok, since it's the
- * only time a field delimiter can come in that position.
- * Reverse order is done analagously.
+/*
+ * Numbers are converted to a floating point format (exponent & mantissa)
+ * so that they compare correctly as sequence of unsigned bytes.
+ * The output cannot contain a 0x00 byte (the record separator).
+ * Bytes 0x01 and 0xff are used to terminate positive and negative numbers
+ * to ensure that 0.123 sorts after 0.12 and -0.123 sorts before -0.12.
+ *
+ * The first byte contain the overall sign, exponent sign and some of the
+ * exponent. These have to be ordered (-ve value, decreasing exponent),
+ * zero, (+ve value, increasing exponent).
+ * After excluding 0, 1, 0xff and 0x80 (used for zero) there are 61
+ * exponent values available, this isn't quite enough and the highest
+ * values are used to encode large exponents in multiple bytes.
+ *
+ * An exponent of zero has value 0xc0 for +ve numbers and 0x40 for -ves.
+ *
+ * The mantissa is stored 2 digits per byte offset by 0x40, for negative
+ * numbers the order must be reversed (they are subtracted from 0x100).
+ *
+ * Reverse sorts are done by inverting the sign of the number.
+ *
+ * We don't have to worry about REC_D, the key is terminated by 0x00.
  */
 
+#define SIGNED(reverse, value) ((reverse) ? 0x100 - (value) : (value))
+
+/* Large exponents are encoded EXP_EXC_BITS per byte */
+#define EXP_ENC_BITS 7
+#define EXP_ENC_VAL  (1 << EXP_ENC_BITS)
+#define EXP_ENC_MASK (EXP_ENC_VAL - 1)
+#define MAX_EXP_ENC  ((int)(sizeof(int) * 8 + (EXP_ENC_BITS-1))/EXP_ENC_BITS)
+
 static u_char *
-number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend, int Rflag)
+number(u_char *pos, const u_char *bufend, u_char *line, u_char *lineend,
+    int reverse)
 {
-	int or_sign, parity = 0;
-	int expincr = 1, exponent = -1;
-	int bite, expsign = 1, sign = 1, zeroskip = 0;
-	u_char lastvalue='0', *nonzero=NULL, *tline, *C_TENS;
-	u_char *nweights;
-
-	if (Rflag)
-		nweights = rnum;
-	else
-		nweights = fnum;
-	if (pos > bufend - 8)
+	int exponent = -1;
+	int had_dp = 0;
+	u_char *tline;
+	char ch;
+	unsigned int val;
+	u_char *last_nz_pos;
+
+	reverse &= R;
+
+	/* Give ourselves space for the key terminator */
+	bufend--;
+
+	/* Ensure we have enough space for the exponent */
+	if (pos + 1 + MAX_EXP_ENC > bufend)
 		return (NULL);
-	/*
-	 * or_sign sets the sort direction:
-	 *	(-r: +/-)(sign: +/-)(expsign: +/-)
-	 */
-	or_sign = sign ^ expsign ^ Rflag;
+
 	SKIP_BLANKS(line);
 	if (*line == '-') {	/* set the sign */
-		or_sign ^= 1;
-		sign = 0;
+		reverse ^= R;
 		line++;
 	}
 	/* eat initial zeroes */
 	for (; *line == '0' && line < lineend; line++)
-		zeroskip = 1;
-	/* calculate exponents < 0 */
-	if (*line == DECIMAL) {
-		exponent = 1;
+		continue;
+
+	/* calculate exponents */
+	if (*line == DECIMAL_POINT) {
+		/* Decimal fraction */
+		had_dp = 1;
 		while (*++line == '0' && line < lineend)
+			exponent--;
+	} else {
+		/* Large (absolute) value, count digits */
+		for (tline = line; *tline >= '0' && 
+		    *tline <= '9' && tline < lineend; tline++)
 			exponent++;
-		expincr = 0;
-		expsign = 0;
 	}
-	/* next character better be a digit */
+
+	/* If the first/next character isn't a digit, value is zero */
 	if (*line < '1' || *line > '9' || line >= lineend) {
-		/* only exit if we didn't skip any zero number */
-		if (!zeroskip) {
-			*pos++ = nweights[127];
-			return (pos);
+		/* This may be "0", "0.00", "000" or "fubar" but sorts as 0 */
+		/* XXX what about NaN, NAN, inf and INF */
+		*pos++ = 0x80;
+		return pos;
+	}
+
+	/* Maybe here we should allow for e+12 (etc) */
+
+	/* exponent 0 is 0xc0 for +ve numbers and 0x40 for -ve ones */
+	exponent += 0xc0;
+
+	if (exponent > 0x80 + MAX_EXP_ENC && exponent < 0x100 - MAX_EXP_ENC) {
+		/* Value ok for simple encoding */
+		*pos++ = SIGNED(reverse, exponent);
+	} else {
+		/* Out or range for a single byte */
+		int c, t;
+		exponent -= 0xc0;
+		t = exponent > 0 ? exponent : -exponent;
+		/* Count how many 7-bit blocks are needed */
+		for (c = 0; ; c++) {
+			t /= EXP_ENC_VAL;
+			if (t == 0)
+				break;
+		}
+		/* 'c' better be 0..4 here - but probably 0..2 */
+		t = c;
+		/* Offset just outside valid range */
+		t += 0x40 - MAX_EXP_ENC;
+		if (exponent < 0)
+			t = -t;
+		t += 0xc0;
+		*pos++ = SIGNED(reverse, t);
+		/* now add each 7-bit block (offset 0x40..0xbf) */
+		for (; c >= 0; c--) {
+			t = exponent >> (c * EXP_ENC_BITS);
+			t = (t & EXP_ENC_MASK) + 0x40;
+			*pos++ = SIGNED(reverse, t);
 		}
 	}
-	if (expincr) {
-		for (tline = line-1; *++tline >= '0' && 
-		    *tline <= '9' && tline < lineend;)
-			exponent++;
-	}
-	if (exponent > 567) {
-		*pos++ = nweights[sign ? (expsign ? 254 : 128)
-					: (expsign ? 0 : 126)];
-		warnx("exponent out of bounds");
-		return (pos);
-	}
-	bite = min(exponent, 61);
-	*pos++ = nweights[(sign) ? (expsign ? 189+bite : 189-bite)
-				: (expsign ? 64-bite : 64+bite)];
-	if (bite >= 61) {
-		do {
-			exponent -= bite;
-			bite = min(exponent, 254);
-			*pos++ = nweights[or_sign ? 254-bite : bite];
-		} while (bite == 254);
-	}
-	C_TENS = or_sign ? OFF_NTENS : OFF_TENS;
-	for (; line < lineend; line++) {
-		if (*line >= '0' && *line <= '9') {
-			if (parity) {
-				*pos++ = C_TENS[lastvalue] + (or_sign ? - *line
-						: *line);
-				if (pos == bufend)
-					return (NULL);
-				if (*line != '0' || lastvalue != '0')
-					nonzero = pos;	
-			} else
-				lastvalue = *line;
-			parity ^= 1;
-		} else if (*line == DECIMAL) {
-			if (!expincr)	/* a decimal already occurred once */
-				break;
-			expincr = 0;
-		} else
+
+	/* Now add mantissa, 2 digits per byte */
+	for (last_nz_pos = pos; line < lineend; ) {
+		if (pos >= bufend)
+			return NULL;
+		ch = *line;
+		val = (ch - '0') * 10;
+		if (val > 90) {
+			if (ch == DECIMAL_POINT && !had_dp) {
+				had_dp = 1;
+				continue;
+			}
+			break;
+		}
+		while (line < lineend) {
+			ch = *line++;
+			if (ch == DECIMAL_POINT && !had_dp) {
+				had_dp = 1;
+				continue;
+			}
+			if (ch < '0' || ch > '9')
+				line = lineend;
+			else
+				val += ch - '0';
 			break;
+		}
+		*pos++ = SIGNED(reverse, val + 0x40);
+		if (val != 0)
+			last_nz_pos = pos;
 	}
-	if ((parity && lastvalue != '0') || !nonzero) {
-		*pos++ = or_sign ? OFF_NTENS[lastvalue] - '0' :
-					OFF_TENS[lastvalue] + '0';
-	} else
-		pos = nonzero;	
-	if (pos > bufend-1)
-		return (NULL);
-	*pos++ = or_sign ? nweights[254] : nweights[0];
-	return (pos);
-}
 
-/* This forces a gap around the record delimiter
- * Thus fnum has values over (0,254) -> ((0,REC_D-1),(REC_D+1,255));
- * rnum over (0,254) -> (255,REC_D+1),(REC_D-1,0))
- */
-void
-num_init(void)
-{
-	int i;
-	TENS[0] = REC_D <=128 ? 130 - '0' : 2 - '0';
-	NEGTENS[0] = REC_D <=128 ? 126 + '0' : 254 + '0';
-	OFF_TENS = TENS - '0';
-	OFF_NTENS = NEGTENS - '0';
-	for (i = 1; i < 10; i++) {
-		TENS[i] = TENS[i - 1] + 10;
-		NEGTENS[i] = NEGTENS[i - 1] - 10;
-	}
-	for (i = 0; i < REC_D; i++) {
-		fnum[i] = i;
-		rnum[255 - i] = i;
-	}
-	for (i = REC_D; i <255; i++) {
-		fnum[i] = i + 1;
-		rnum[255 - i] = i - 1;
-	}
+	/* Add key terminator, deleting any trailing "00" */
+	*last_nz_pos++ = SIGNED(reverse, 1);
+
+	return (last_nz_pos);
 }
Index: src/usr.bin/sort/sort.h
diff -u src/usr.bin/sort/sort.h:1.24 src/usr.bin/sort/sort.h:1.25
--- src/usr.bin/sort/sort.h:1.24	Thu Aug 20 06:36:25 2009
+++ src/usr.bin/sort/sort.h	Sat Aug 22 10:53:28 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: sort.h,v 1.24 2009/08/20 06:36:25 dsl Exp $	*/
+/*	$NetBSD: sort.h,v 1.25 2009/08/22 10:53:28 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -77,18 +77,19 @@
 #define NBINS		256
 
 /* values for masks, weights, and other flags. */
-#define I 1		/* mask out non-printable characters */
-#define D 2		/* sort alphanumeric characters only */
-#define N 4		/* Field is a number */
-#define F 8		/* weight lower and upper case the same */
-#define R 16		/* Field is reversed with respect to the global weight */
+/* R and F get used to index weight_tables[] */
+#define R 1		/* Field is reversed */
+#define F 2		/* weight lower and upper case the same */
+#define I 4		/* mask out non-printable characters */
+#define D 8		/* sort alphanumeric characters only */
+#define N 16		/* Field is a number */
 #define BI 32		/* ignore blanks in icol */
 #define BT 64		/* ignore blanks in tcol */
 
 /* masks for delimiters: blanks, fields, and termination. */
-#define BLANK 1		/* ' ', '\t'; '\n' if -T is invoked */
+#define BLANK 1		/* ' ', '\t'; '\n' if -R is invoked */
 #define FLD_D 2		/* ' ', '\t' default; from -t otherwise */
-#define REC_D_F 4	/* '\n' default; from -T otherwise */
+#define REC_D_F 4	/* '\n' default; from -R otherwise */
 
 #define min(a, b) ((a) < (b) ? (a) : (b))
 #define max(a, b) ((a) > (b) ? (a) : (b))
@@ -143,6 +144,9 @@
  * implies the end of the line.  Flags regulate omission of blanks and
  * numerical sorts; mask determines which characters are ignored (from -i, -d);
  * weights determines the sort weights of a character (from -f, -r).
+ *
+ * The first field contain the global flags etc.
+ * The list terminates when icol = 0.
  */
 struct field {
 	struct column icol;
@@ -161,18 +165,21 @@
 typedef void (*put_func_t)(const struct recheader *, FILE *);
 
 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
+extern u_char *const weight_tables[4];   /* ascii, Rascii, Ftable, RFtable */
 extern u_char d_mask[NBINS];
 extern int SINGL_FLD, SEP_FLAG, UNIQUE;
 extern int REC_D;
 extern const char *tmpdir;
-extern int stable_sort;
 extern int (*radix_sort)(const u_char **, int, const u_char *, u_int);
-extern u_char gweights[NBINS];
+extern u_char unweighted[NBINS];
 extern struct coldesc *clist;
 extern int ncols;
 
+#define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
+extern unsigned int debug_flags;
+
 void	 append(const u_char **, int, FILE *,
-	    void (*)(const RECHEADER *, FILE *), struct field *);
+	    void (*)(const RECHEADER *, FILE *), u_char *);
 void	 concat(FILE *, FILE *);
 length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
 void	 fixit(int *, char **);
@@ -187,11 +194,11 @@
 	    int, RECHEADER *, u_char *, struct field *);
 int	 makeline(int, int, struct filelist *,
 	    int, RECHEADER *, u_char *, struct field *);
-void	 num_init(void);
 int	 optval(int, int);
 void	 order(struct filelist *, get_func_t, struct field *);
 void	 putline(const RECHEADER *, FILE *);
 void	 putrec(const RECHEADER *, FILE *);
+void	 putkeydump(const RECHEADER *, FILE *);
 void	 rd_append(int, int, int, FILE *, u_char *, u_char *);
 int	 setfield(const char *, struct field *, int);
 void	 settables(int);

Index: src/usr.bin/sort/files.c
diff -u src/usr.bin/sort/files.c:1.34 src/usr.bin/sort/files.c:1.35
--- src/usr.bin/sort/files.c:1.34	Tue Aug 18 18:00:28 2009
+++ src/usr.bin/sort/files.c	Sat Aug 22 10:53:28 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: files.c,v 1.34 2009/08/18 18:00:28 dsl Exp $	*/
+/*	$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: files.c,v 1.34 2009/08/18 18:00:28 dsl Exp $");
+__RCSID("$NetBSD: files.c,v 1.35 2009/08/22 10:53:28 dsl Exp $");
 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -284,6 +284,15 @@
 }
 
 /*
+ * write dump of key to output (for -Dk)
+ */
+void
+putkeydump(const RECHEADER *rec, FILE *fp)
+{
+	EWRITE(rec, 1, rec->offset + REC_DATA_OFFSET, fp);
+}
+
+/*
  * get a record from a temporary file. (Used by merge sort.)
  */
 int

Index: src/usr.bin/sort/fsort.c
diff -u src/usr.bin/sort/fsort.c:1.38 src/usr.bin/sort/fsort.c:1.39
--- src/usr.bin/sort/fsort.c:1.38	Thu Aug 20 06:36:25 2009
+++ src/usr.bin/sort/fsort.c	Sat Aug 22 10:53:28 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: fsort.c,v 1.38 2009/08/20 06:36:25 dsl Exp $	*/
+/*	$NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -72,7 +72,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: fsort.c,v 1.38 2009/08/20 06:36:25 dsl Exp $");
+__RCSID("$NetBSD: fsort.c,v 1.39 2009/08/22 10:53:28 dsl Exp $");
 __SCCSID("@(#)fsort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -97,6 +97,7 @@
 	get_func_t get;
 	struct recheader *crec;
 	u_char *nbuffer;
+	FILE *fp;
 
 	if (!buffer) {
 		buffer = malloc(bufsize);
@@ -159,30 +160,37 @@
 		}
 
 		/* Sort this set of records */
-		if (radix_sort(keylist, nelem, ftbl[0].weights, REC_D))
-			err(2, NULL);
+		if (SINGL_FLD) {
+			if (radix_sort(keylist, nelem, ftbl[0].weights, REC_D))
+				err(2, "single field radix_sort");
+		} else {
+			if (radix_sort(keylist, nelem, unweighted, 0))
+				err(2, "unweighted radix_sort");
+		}
 
 		if (c == EOF && mfct == 0) {
 			/* all the data is (sorted) in the buffer */
-			append(keylist, nelem, outfp, putline, ftbl);
+			append(keylist, nelem, outfp,
+			    DEBUG('k') ? putkeydump : putline, ftbl->weights);
 			break;
 		}
 
 		/* Save current data to a temporary file for a later merge */
-		fstack[mfct].fp = ftmp();
-		append(keylist, nelem, fstack[mfct].fp, putrec, ftbl);
+		fp = ftmp();
+		fstack[mfct].fp = fp;
+		append(keylist, nelem, fp, putrec, NULL);
 		mfct++;
 
 		if (c == EOF) {
 			/* merge to output file */
-			fmerge(0, filelist, mfct, geteasy, outfp, putline,
-			    ftbl);
+			fmerge(0, filelist, mfct, geteasy, outfp,
+			    DEBUG('k') ? putkeydump : putline, ftbl);
 			break;
 		}
 
 		if (mfct == MERGE_FNUM) {
 			/* Merge the files we have */
-			FILE *fp = ftmp();
+			fp = ftmp();
 			fmerge(0, filelist, mfct, geteasy, fp, putrec, ftbl);
 			mfct = 1;
 			fstack[0].fp = fp;

Index: src/usr.bin/sort/msort.c
diff -u src/usr.bin/sort/msort.c:1.22 src/usr.bin/sort/msort.c:1.23
--- src/usr.bin/sort/msort.c:1.22	Thu Aug 20 06:36:25 2009
+++ src/usr.bin/sort/msort.c	Sat Aug 22 10:53:28 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: msort.c,v 1.22 2009/08/20 06:36:25 dsl Exp $	*/
+/*	$NetBSD: msort.c,v 1.23 2009/08/22 10:53:28 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: msort.c,v 1.22 2009/08/20 06:36:25 dsl Exp $");
+__RCSID("$NetBSD: msort.c,v 1.23 2009/08/22 10:53:28 dsl Exp $");
 __SCCSID("@(#)msort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -82,7 +82,7 @@
 	struct recheader rec[1];
 } MFILE;
 
-static u_char *wts, *wts1 = NULL;
+static u_char *wts;
 
 static int cmp(RECHEADER *, RECHEADER *);
 static int insert(struct mfile **, struct mfile **, int, int);
@@ -97,15 +97,6 @@
 	put_func_t put;
 
 	wts = ftbl->weights;
-	if (!UNIQUE && SINGL_FLD && ftbl->flags & F)
-		wts1 = (ftbl->flags & R) ? Rascii : ascii;
-
-	if (!buffer) {
-		buffer = malloc(bufsize);
-		if (!buffer)
-			err(2, "fmerge(): malloc");
-		memset(buffer, 0, bufsize);
-	}
 
 	while (nfiles) {
 		put = putrec;
@@ -153,12 +144,9 @@
 	static size_t bufs_sz[MERGE_FNUM + 1];
 
 	/*
-	 * We need nfiles + 1 buffers. One is 'buffer', the
-	 * rest needs to be allocated.
+	 * We need nfiles + 1 buffers.
 	 */
-	bufs[0] = buffer;
-	bufs_sz[0] = bufsize;
-	for (i = 1; i < nfiles + 1; i++) {
+	for (i = 0; i < nfiles + 1; i++) {
 		if (bufs[i])
 			continue;
 
@@ -169,25 +157,27 @@
 		bufs_sz[i] = DEFLLEN;
 	}
 
+	/* Read one record from each file (read again if a duplicate) */
 	for (i = j = 0; i < nfiles; i++, j++) {
 		cfile = (struct mfile *) bufs[j];
 		cfile->flno = infl0 + j;
 		cfile->end = (u_char *) bufs[j] + bufs_sz[j];
 		for (c = 1; c == 1;) {
-			if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
-			   cfile->rec, cfile->end, ftbl))) {
+			c = get(cfile->flno, 0, NULL, nfiles, cfile->rec,
+			    cfile->end, ftbl);
+			if (c == EOF) {
 				--i;
 				--nfiles;
 				break;
 			}
 
 			if (c == BUFFEND) {
+				bufs_sz[j] *= 2;
 				cfile = realloc(bufs[j], bufs_sz[j]);
 				if (!cfile)
 					err(2, "merge: realloc");
 
 				bufs[j] = (void *) cfile;
-				bufs_sz[j] *= 2;
 				cfile->end = (u_char *)cfile + bufs_sz[j];
 
 				c = 1;
@@ -206,8 +196,9 @@
 	cfile->end = (u_char *) cfile + bufs_sz[nf];
 	while (nfiles) {
 		for (c = 1; c == 1;) {
-			if (EOF == (c = get(cfile->flno, 0, NULL, nfiles,
-			   cfile->rec, cfile->end, ftbl))) {
+			c = get(cfile->flno, 0, NULL, nfiles, cfile->rec,
+			    cfile->end, ftbl);
+			if (c == EOF) {
 				put(flist[0]->rec, outfp);
 				if (--nfiles > 0) {
 					flist++;
@@ -236,15 +227,11 @@
 				continue;
 			}
 				
-			if (!(c = insert(flist, &cfile, nfiles, DELETE)))
+			c = insert(flist, &cfile, nfiles, DELETE);
+			if (c == 0)
 				put(cfile->rec, outfp);
 		}
 	}	
-
-	if (bufs_sz[0] > bufsize) {
-		buffer = bufs[0];
-		bufsize = bufs_sz[0];
-	}
 }
 
 /*
@@ -268,23 +255,19 @@
 			if (UNIQUE)
 				break;
 
-			if (stable_sort) {
-				/*
-				 * Apply sort by fileno, to give priority
-				 * to earlier specified files, hence providing
-				 * more stable sort.
-				 * If fileno is same, the new record should
-				 * be put _after_ the previous entry.
-				 */
-				cmpv = tmprec->flno - flist[mid]->flno;
-				if (cmpv >= 0)
-					bot = mid;
-				else /* cmpv == 0 */
-					bot = mid - 1;
-			} else {
-				/* non-stable sort */
+			/*
+			 * Apply sort by fileno, to give priority
+			 * to earlier specified files, hence providing
+			 * more stable sort.
+			 * If fileno is same, the new record should
+			 * be put _after_ the previous entry.
+			 */
+			/* XXX (dsl) this doesn't seem right */
+			cmpv = tmprec->flno - flist[mid]->flno;
+			if (cmpv >= 0)
+				bot = mid;
+			else
 				bot = mid - 1;
-			}
 
 			break;
 		}
@@ -336,12 +319,11 @@
 	prec = (RECHEADER *) (buffer + DEFLLEN + REC_DATA_OFFSET);
 	prec_end = buffer + 2*(DEFLLEN + REC_DATA_OFFSET);
 	wts = ftbl->weights;
-	if (SINGL_FLD && (ftbl->flags & F))
-		wts1 = (ftbl->flags & R) ? Rascii : ascii;
-	else
-		wts1 = NULL;
-	if (0 == get(-1, 0, filelist, 1, prec, prec_end, ftbl))
-	while (0 == get(-1, 0, filelist, 1, crec, crec_end, ftbl)) {
+
+	/* XXX this does exit(0) for overlong lines */
+	if (get(-1, 0, filelist, 1, prec, prec_end, ftbl) != 0)
+		exit(0);
+	while (get(-1, 0, filelist, 1, crec, crec_end, ftbl) == 0) {
 		if (0 < (c = cmp(prec, crec))) {
 			crec->data[crec->length-1] = 0;
 			errx(1, "found disorder: %s", crec->data+crec->offset);
@@ -370,20 +352,26 @@
 cmp(RECHEADER *rec1, RECHEADER *rec2)
 {
 	int r;
-	u_char *pos1, *pos2, *end;
+	size_t len, i;
+	u_char *pos1, *pos2;
 	u_char *cwts;
-	for (cwts = wts; cwts; cwts = (cwts == wts1 ? NULL : wts1)) {
-		pos1 = rec1->data;
-		pos2 = rec2->data;
-		if (!SINGL_FLD && (UNIQUE || stable_sort))
-			end = pos1 + min(rec1->offset, rec2->offset);
-		else
-			end = pos1 + min(rec1->length, rec2->length);
-
-		for (; pos1 < end; ) {
-			if ((r = cwts[*pos1++] - cwts[*pos2++]))
-				return (r);
-		}
+
+	if (!SINGL_FLD)
+		/* key is weights, and is 0x00 terminated */
+		return memcmp(rec1->data, rec2->data, rec1->offset);
+
+	/* We have to apply the weights ourselves */
+	cwts = wts;
+
+	pos1 = rec1->data;
+	pos2 = rec2->data;
+	len = rec1->length;
+
+	for (i = 0; i < len; i++) {
+		r = cwts[pos1[i]] - cwts[pos2[i]];
+		if (r)
+			return r;
 	}
+
 	return (0);
 }

Index: src/usr.bin/sort/sort.c
diff -u src/usr.bin/sort/sort.c:1.51 src/usr.bin/sort/sort.c:1.52
--- src/usr.bin/sort/sort.c:1.51	Thu Aug 20 06:36:25 2009
+++ src/usr.bin/sort/sort.c	Sat Aug 22 10:53:28 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: sort.c,v 1.51 2009/08/20 06:36:25 dsl Exp $	*/
+/*	$NetBSD: sort.c,v 1.52 2009/08/22 10:53:28 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -76,7 +76,7 @@
 #endif /* not lint */
 
 #ifndef lint
-__RCSID("$NetBSD: sort.c,v 1.51 2009/08/20 06:36:25 dsl Exp $");
+__RCSID("$NetBSD: sort.c,v 1.52 2009/08/22 10:53:28 dsl Exp $");
 __SCCSID("@(#)sort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -98,15 +98,18 @@
  * weight tables.  Gweights is one of ascii, Rascii..
  * modified to weight rec_d = 0 (or 255)
  */
+u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable };
 u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS];
+u_char unweighted[NBINS];
 int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0;
 
 /*
  * Default to stable sort.
  */
-int stable_sort = 1;
 int (*radix_sort)(const u_char **, int, const u_char *, u_int) = sradixsort;
 
+unsigned int debug_flags = 0;
+
 static char toutpath[MAXPATHLEN];
 
 const char *tmpdir;	/* where temporary files should be put */
@@ -153,7 +156,7 @@
 	if (!(tmpdir = getenv("TMPDIR")))
 		tmpdir = _PATH_TMP;
 
-	while ((ch = getopt(argc, argv, "bcdfik:mHno:rR:sSt:T:ux")) != -1) {
+	while ((ch = getopt(argc, argv, "bcdD:fik:mHno:rR:sSt:T:ux")) != -1) {
 		switch (ch) {
 		case 'b':
 			fldtab->flags |= BI | BT;
@@ -161,14 +164,13 @@
 		case 'c':
 			cflag = 1;
 			break;
+		case 'D': /* Debug flags */
+			for (i = 0; optarg[i]; i++)
+			    debug_flags |= 1 << (optarg[i] & 31);
+			break;
 		case 'd': case 'f': case 'i': case 'n': case 'r':
 			tmp |= optval(ch, 0);
-			if ((tmp & R) && (tmp & F))
-				fldtab->weights = RFtable;
-			else if (tmp & F)
-				fldtab->weights = Ftable;
-			else if (tmp & R)
-				fldtab->weights = Rascii;
+			fldtab->weights = weight_tables[tmp & (R | F)];
 			fldtab->flags |= tmp;
 			break;
 		case 'H':
@@ -194,11 +196,9 @@
 			break;
 		case 's':
 			/* for GNU sort compatibility (this is our default) */
-			stable_sort = 1;
 			radix_sort = radixsort;
 			break;
 		case 'S':
-			stable_sort = 0;
 			radix_sort = sradixsort;
 			break;
 		case 't':
@@ -214,7 +214,8 @@
 		case 'R':
 			if (REC_D != '\n')
 				usage("multiple record delimiters");
-			if ('\n' == (REC_D = *optarg))
+			REC_D = *optarg;
+			if (REC_D == '\n')
 				break;
 			if (optarg[1] != '\0') {
 				char *ep;
@@ -280,11 +281,10 @@
 			setfield("1", &fldtab[++fidx], fldtab->flags);
 		}
 		fldreset(fldtab);
-		fldtab[0].flags &= ~F;
+		// fldtab[0].flags &= ~F;
 	}
 	settables(fldtab[0].flags);
-	num_init();
-	fldtab->weights = gweights;
+	fldtab->weights = weight_tables[fldtab->flags & (R | F)];
 
 	if (optind == argc) {
 		static const char * const names[] = { _PATH_STDIN, NULL };

CVS commit: src/usr.bin/sort

Reply via email to