Module Name:    src
Committed By:   dsl
Date:           Sat Sep 26 21:16:55 UTC 2009

Modified Files:
        src/usr.bin/sort: files.c fsort.c fsort.h msort.c sort.c sort.h tmp.c

Log Message:
Move all the fopen() calls out of the record read routines into the callers.
Split the merge sort so that fsort() can pass the 'FILE *' of the temporary
files to be merged into the merge code.
Don't rely on realloc() not moving the end address of a buffer!
Rework merge sort so that it sorts pointers to 'struct mfile' and only
copies about sort record descriptors.
No functional change intended.


To generate a diff of this commit:
cvs rdiff -u -r1.37 -r1.38 src/usr.bin/sort/files.c
cvs rdiff -u -r1.41 -r1.42 src/usr.bin/sort/fsort.c
cvs rdiff -u -r1.16 -r1.17 src/usr.bin/sort/fsort.h
cvs rdiff -u -r1.26 -r1.27 src/usr.bin/sort/msort.c
cvs rdiff -u -r1.55 -r1.56 src/usr.bin/sort/sort.c
cvs rdiff -u -r1.28 -r1.29 src/usr.bin/sort/sort.h
cvs rdiff -u -r1.14 -r1.15 src/usr.bin/sort/tmp.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/sort/files.c
diff -u src/usr.bin/sort/files.c:1.37 src/usr.bin/sort/files.c:1.38
--- src/usr.bin/sort/files.c:1.37	Thu Sep 10 22:02:40 2009
+++ src/usr.bin/sort/files.c	Sat Sep 26 21:16:55 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $	*/
+/*	$NetBSD: files.c,v 1.38 2009/09/26 21:16:55 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $");
+__RCSID("$NetBSD: files.c,v 1.38 2009/09/26 21:16:55 dsl Exp $");
 __SCCSID("@(#)files.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -78,20 +78,15 @@
  * in the first fsort pass.
  */
 int
-makeline(int flno, int top, struct filelist *filelist, int nfiles,
-    RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
+makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2)
 {
-	static u_char *obufend;
+	static u_char *opos = NULL;
 	static size_t osz;
 	u_char *pos;
-	static int filenum = 0, overflow = 0;
-	static FILE *fp = 0;
 	int c;
 
-	c = 0;		/* XXXGCC -Wuninitialized [pmppc] */
-
 	pos = recbuf->data;
-	if (overflow) {
+	if (opos != NULL) {
 		/*
 		 * Buffer shortage is solved by either of two ways:
 		 * o flush previous buffered data and start using the
@@ -101,77 +96,47 @@
 		 * The former is preferred, realloc is only done when
 		 * there is exactly one item in buffer which does not fit. 
 		 */
-		if (bufend == obufend)
-			memmove(pos, bufend - osz, osz);
+		if (pos != opos)
+			memmove(pos, opos, osz);
 
 		pos += osz;
-		overflow = 0;
+		opos = NULL;
 	}
 
-	for (;;) {
-		if (flno >= 0 && (fp = fstack[flno].fp) == NULL)
-			return (EOF);
-		else if (fp == NULL) {
-			if (filenum  >= nfiles)
-				return (EOF);
-			if (!(fp = fopen(filelist->names[filenum], "r")))
-				err(2, "%s", filelist->names[filenum]);
-			filenum++;
-		}
-		while ((pos < bufend) && ((c = getc(fp)) != EOF)) {
-			*pos++ = c;
-			if (c == REC_D) {
-				recbuf->offset = 0;
-				recbuf->length = pos - recbuf->data;
-				recbuf->keylen = recbuf->length - 1;
-				return (0);
+	while (pos < bufend) {
+		c = getc(fp);
+		if (c == EOF) {
+			if (pos == recbuf->data) {
+				FCLOSE(fp);
+				return EOF;
 			}
+			/* Add terminator to partial line */
+			c = REC_D;
 		}
-		if (pos >= bufend) {
-			if (recbuf->data < bufend) {
-				overflow = 1;
-				obufend = bufend;
-				osz = (pos - recbuf->data);
-			}
-			return (BUFFEND);
-		} else if (c == EOF) {
-			if (recbuf->data != pos) {
-				*pos++ = REC_D;
-				recbuf->offset = 0;
-				recbuf->length = pos - recbuf->data;
-				recbuf->keylen = recbuf->length - 1;
-				return (0);
-			}
-			FCLOSE(fp);
-			fp = 0;
-			if (flno >= 0)
-				fstack[flno].fp = 0;
-		} else {
-			
-			warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data);
-
-			/* Consume the rest of line from input */
-			while ((c = getc(fp)) != REC_D && c != EOF)
-				;
-
+		*pos++ = c;
+		if (c == REC_D) {
 			recbuf->offset = 0;
-			recbuf->length = 0;
-			recbuf->keylen = 0;
-
-			return (BUFFEND);
+			recbuf->length = pos - recbuf->data;
+			recbuf->keylen = recbuf->length - 1;
+			return (0);
 		}
 	}
+
+	/* Ran out of buffer space... */
+	if (recbuf->data < bufend) {
+		/* Remember where the partial record is */
+		osz = pos - recbuf->data;
+		opos = recbuf->data;
+	}
+	return (BUFFEND);
 }
 
 /*
  * This generates keys. It's only called in the first fsort pass
  */
 int
-makekey(int flno, int top, struct filelist *filelist, int nfiles,
-    RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
+makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl)
 {
-	static int filenum = 0;
-	static FILE *dbdesc = 0;
 	static u_char *line_data;
 	static ssize_t line_size;
 	static int overflow = 0;
@@ -182,29 +147,10 @@
 		return overflow ? BUFFEND : 0;
 	}
 
-	/* Loop through files until we find a line of input */
-	for (;;) {
-		if (flno >= 0) {
-			if (!(dbdesc = fstack[flno].fp))
-				return (EOF);
-		} else if (!dbdesc) {
-			if (filenum  >= nfiles)
-				return (EOF);
-			dbdesc = fopen(filelist->names[filenum], "r");
-			if (!dbdesc)
-				err(2, "%s", filelist->names[filenum]);
-			filenum++;
-		}
-		line_size = seq(dbdesc, &line_data);
-		if (line_size != 0)
-			/* Got a line */
-			break;
-
-		/* End of file ... */
-		FCLOSE(dbdesc);
-		dbdesc = 0;
-		if (flno >= 0)
-			fstack[flno].fp = 0;
+	line_size = seq(fp, &line_data);
+	if (line_size == 0) {
+		FCLOSE(fp);
+		return EOF;
 	}
 
 	if (line_size > bufend - recbuf->data) {
@@ -299,18 +245,14 @@
  * get a record from a temporary file. (Used by merge sort.)
  */
 int
-geteasy(int flno, int top, struct filelist *filelist, int nfiles,
-    RECHEADER *rec, u_char *end, struct field *dummy2)
+geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2)
 {
 	int i;
-	FILE *fp;
 
-	fp = fstack[flno].fp;
 	if ((u_char *)(rec + 1) > end)
 		return (BUFFEND);
 	if (!fread(rec, 1, offsetof(RECHEADER, data), fp)) {
 		fclose(fp);
-		fstack[flno].fp = 0;
 		return (EOF);
 	}
 	if (end - rec->data < (ptrdiff_t)rec->length) {

Index: src/usr.bin/sort/fsort.c
diff -u src/usr.bin/sort/fsort.c:1.41 src/usr.bin/sort/fsort.c:1.42
--- src/usr.bin/sort/fsort.c:1.41	Thu Sep 10 22:02:40 2009
+++ src/usr.bin/sort/fsort.c	Sat Sep 26 21:16:55 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: fsort.c,v 1.41 2009/09/10 22:02:40 dsl Exp $	*/
+/*	$NetBSD: fsort.c,v 1.42 2009/09/26 21:16:55 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -72,15 +72,13 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: fsort.c,v 1.41 2009/09/10 22:02:40 dsl Exp $");
+__RCSID("$NetBSD: fsort.c,v 1.42 2009/09/26 21:16:55 dsl Exp $");
 __SCCSID("@(#)fsort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
 #include <stdlib.h>
 #include <string.h>
 
-struct tempfile fstack[MAXFCT];
-
 #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1))
 
 void
@@ -96,12 +94,14 @@
 	get_func_t get;
 	RECHEADER *crec;
 	RECHEADER *nbuffer;
-	FILE *fp;
+	FILE *fp, *tmp_fp;
+	int file_no;
+	int max_recs = DEBUG('m') ? 16 : MAXNUM;
 
 	buffer = malloc(bufsize);
 	bufend = (u_char *)buffer + bufsize;
 	/* Allocate double length keymap for radix_sort */
-	keylist = malloc(2 * MAXNUM * sizeof(*keylist));
+	keylist = malloc(2 * max_recs * sizeof(*keylist));
 	if (buffer == NULL || keylist == NULL)
 		err(2, "failed to malloc initial buffer or keylist");
 
@@ -112,6 +112,11 @@
 		/* Key (merged key fields) added before data */
 		get = makekey;
 
+	file_no = 0;
+	fp = fopen(filelist->names[0], "r");
+	if (fp == NULL)
+		err(2, "%s", filelist->names[0]);
+
 	/* Loop through reads of chunk of input files that get sorted
 	 * and then merged together. */
 	for (;;) {
@@ -121,21 +126,29 @@
 
 		/* Loop reading records */
 		for (;;) {
-			c = get(-1, 0, filelist, nfiles, crec, bufend, ftbl);
+			c = get(fp, crec, bufend, ftbl);
 			/* 'c' is 0, EOF or BUFFEND */
 			if (c == 0) {
 				/* Save start of key in input buffer */
 				*keypos++ = crec;
-				if (++nelem == MAXNUM) {
+				if (++nelem == max_recs) {
 					c = BUFFEND;
 					break;
 				}
 				crec = (RECHEADER *)(crec->data + SALIGN(crec->length));
 				continue;
 			}
-			if (c == EOF)
-				break;
-			if (nelem >= MAXNUM || bufsize >= MAXBUFSIZE)
+			if (c == EOF) {
+				/* try next file */
+				if (++file_no >= nfiles)
+					/* no more files */
+					break;
+				fp = fopen(filelist->names[file_no], "r");
+				if (fp == NULL)
+					err(2, "%s", filelist->names[file_no]);
+				continue;
+			}
+			if (nelem >= max_recs || bufsize >= MAXBUFSIZE)
 				/* Need to sort and save this lot of data */
 				break;
 
@@ -158,7 +171,7 @@
 		}
 
 		/* Sort this set of records */
-		radix_sort(keylist, keylist + MAXNUM, nelem);
+		radix_sort(keylist, keylist + max_recs, nelem);
 
 		if (c == EOF && mfct == 0) {
 			/* all the data is (sorted) in the buffer */
@@ -168,25 +181,17 @@
 		}
 
 		/* Save current data to a temporary file for a later merge */
-		fp = ftmp();
-		fstack[mfct].fp = fp;
-		append(keylist, nelem, fp, putrec);
-		mfct++;
+		tmp_fp = ftmp();
+		append(keylist, nelem, tmp_fp, putrec);
+		save_for_merge(tmp_fp, geteasy, ftbl);
+		mfct = 1;
 
 		if (c == EOF) {
 			/* merge to output file */
-			fmerge(0, filelist, mfct, geteasy, outfp,
+			merge_sort(outfp, 
 			    DEBUG('k') ? putkeydump : putline, ftbl);
 			break;
 		}
-
-		if (mfct == MERGE_FNUM) {
-			/* Merge the files we have */
-			fp = ftmp();
-			fmerge(0, filelist, mfct, geteasy, fp, putrec, ftbl);
-			mfct = 1;
-			fstack[0].fp = fp;
-		}
 	}
 
 	free(keylist);

Index: src/usr.bin/sort/fsort.h
diff -u src/usr.bin/sort/fsort.h:1.16 src/usr.bin/sort/fsort.h:1.17
--- src/usr.bin/sort/fsort.h:1.16	Sat Sep  5 12:00:25 2009
+++ src/usr.bin/sort/fsort.h	Sat Sep 26 21:16:55 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: fsort.h,v 1.16 2009/09/05 12:00:25 dsl Exp $	*/
+/*	$NetBSD: fsort.h,v 1.17 2009/09/26 21:16:55 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -76,16 +76,3 @@
  */
 #define DEFBUFSIZE	(1 << 20)	/* 1MB */
 #define MAXBUFSIZE	(8 << 20)	/* 10 MB */
-
-/*
- * Number of files merge() can merge in one pass.
- * This should be power of two so that it's possible to use this value
- * for rouding.
- */
-#define MERGE_FNUM	16
-
-/* Temporary files contian data (with record headers) in sorted order */
-struct tempfile {
-	FILE *fp;
-};
-extern struct tempfile fstack[MAXFCT];

Index: src/usr.bin/sort/msort.c
diff -u src/usr.bin/sort/msort.c:1.26 src/usr.bin/sort/msort.c:1.27
--- src/usr.bin/sort/msort.c:1.26	Thu Sep 10 22:02:40 2009
+++ src/usr.bin/sort/msort.c	Sat Sep 26 21:16:55 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: msort.c,v 1.26 2009/09/10 22:02:40 dsl Exp $	*/
+/*	$NetBSD: msort.c,v 1.27 2009/09/26 21:16:55 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,128 +65,119 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: msort.c,v 1.26 2009/09/10 22:02:40 dsl Exp $");
+__RCSID("$NetBSD: msort.c,v 1.27 2009/09/26 21:16:55 dsl Exp $");
 __SCCSID("@(#)msort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <util.h>
 
 /* Subroutines using comparisons: merge sort and check order */
 #define DELETE (1)
 
 typedef struct mfile {
-	u_char *end;
-	int flno;
-	RECHEADER rec[1];
+	FILE         *fp;
+	get_func_t   get;
+	RECHEADER    *rec;
+	u_char       *end;
 } MFILE;
 
 static int cmp(RECHEADER *, RECHEADER *);
-static int insert(struct mfile **, struct mfile **, int, int);
-static void merge(int, int, get_func_t, FILE *, put_func_t, struct field *);
+static int insert(struct mfile **, struct mfile *, int, int);
+
+/*
+ * Number of files merge() can merge in one pass.
+ * This should be power of two so that it's possible to use this value
+ * for rouding.
+ */
+#define MERGE_FNUM      16
+
+static struct mfile fstack[MERGE_FNUM];
+static int fstack_count;
 
 void
-fmerge(int binno, struct filelist *filelist, int nfiles,
-    get_func_t get, FILE *outfp, put_func_t fput, struct field *ftbl)
+save_for_merge(FILE *fp, get_func_t get, struct field *ftbl)
 {
-	FILE *tout;
-	int i, j, last;
-	put_func_t put;
-
-	while (nfiles) {
-		put = putrec;
-		for (j = 0; j < nfiles; j += MERGE_FNUM) {
-			if (nfiles <= MERGE_FNUM) {
-				tout = outfp;
-				put = fput;
-			}
-			else
-				tout = ftmp();
-			last = min(MERGE_FNUM, nfiles - j);
-			if (binno < 0) {
-				for (i = 0; i < last; i++)
-					if (!(fstack[i+MAXFCT-1-MERGE_FNUM].fp =
-					    fopen(filelist->names[j+i], "r")))
-						err(2, "%s",
-							filelist->names[j+i]);
-				merge(MAXFCT-1-MERGE_FNUM, last, get, tout, put, ftbl);
-			} else {
-				for (i = 0; i< last; i++)
-					rewind(fstack[i+j].fp);
-				merge(j, last, get, tout, put, ftbl);
-			}
-			if (nfiles > MERGE_FNUM)
-				fstack[j/MERGE_FNUM].fp = tout;
-		}
-		nfiles = (nfiles + (MERGE_FNUM - 1)) / MERGE_FNUM;
-		if (nfiles == 1)
-			nfiles = 0;
-		if (binno < 0) {
-			binno = 0;
-			get = geteasy;
-		}
+	FILE *mfp;
+
+	if (fstack_count == MERGE_FNUM) {
+		/* Must reduce the number of temporary files */
+		mfp = ftmp();
+		merge_sort(mfp, putrec, ftbl);
+		fstack[0].fp = mfp;
+		fstack[0].get = geteasy;
+		fstack_count = 1;
 	}
+
+	fstack[fstack_count].fp = fp;
+	fstack[fstack_count++].get = get;
 }
 
-static void
-merge(int infl0, int nfiles, get_func_t get, FILE *outfp, put_func_t put,
-    struct field *ftbl)
+void
+fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl)
 {
-	int c, i, j, nf = nfiles;
-	struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
-	size_t availsz;
-	static void *bufs[MERGE_FNUM + 1];
-	static size_t bufs_sz[MERGE_FNUM + 1];
+	get_func_t get = SINGL_FLD ? makeline : makekey;
+	FILE *fp;
+	int i;
+
+	for (i = 0; i < nfiles; i++) {
+		fp = fopen(filelist->names[i], "r");
+		if (fp == NULL)
+			err(2, "%s", filelist->names[i]);
+		save_for_merge(fp, get, ftbl);
+	}
 
-	/*
-	 * We need nfiles + 1 buffers.
-	 */
-	for (i = 0; i < nfiles + 1; i++) {
-		if (bufs[i])
-			continue;
+	merge_sort(outfp, putline, ftbl);
+}
 
-		bufs[i] = malloc(DEFLLEN);
-		if (!bufs[i])
-			err(2, "merge: malloc");
-		memset(bufs[i], 0, DEFLLEN);
-		bufs_sz[i] = DEFLLEN;
-	}
+void
+merge_sort(FILE *outfp, put_func_t put, struct field *ftbl)
+{
+	struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
+	RECHEADER *new_rec;
+	u_char *new_end;
+	void *tmp;
+	int c, i, nfiles;
+	size_t sz;
 
 	/* Read one record from each file (read again if a duplicate) */
-	for (i = j = 0; i < nfiles; i++, j++) {
-		cfile = (struct mfile *) bufs[j];
-		cfile->flno = infl0 + j;
-		cfile->end = (u_char *) bufs[j] + bufs_sz[j];
-		for (c = 1; c == 1;) {
-			c = get(cfile->flno, 0, NULL, nfiles, cfile->rec,
-			    cfile->end, ftbl);
-			if (c == EOF) {
-				--i;
-				--nfiles;
+	for (nfiles = i = 0; i < fstack_count; i++) {
+		cfile = &fstack[i];
+		if (cfile->rec == NULL) {
+			cfile->rec = emalloc(DEFLLEN);
+			cfile->end = (u_char *)cfile->rec + DEFLLEN;
+		}
+		rewind(cfile->fp);
+
+		for (;;) {
+			c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl);
+			if (c == EOF)
 				break;
-			}
 
 			if (c == BUFFEND) {
-				bufs_sz[j] *= 2;
-				cfile = realloc(bufs[j], bufs_sz[j]);
-				if (!cfile)
-					err(2, "merge: realloc");
-
-				bufs[j] = (void *) cfile;
-				cfile->end = (u_char *)cfile + bufs_sz[j];
-
-				c = 1;
+				/* Double buffer size */
+				sz = (cfile->end - (u_char *)cfile->rec) * 2;
+				cfile->rec = erealloc(cfile->rec, sz);
+				cfile->end = (u_char *)cfile->rec + sz;
 				continue;
 			}
 
-			if (i)
-				c = insert(flist, &cfile, i, !DELETE);
-			else
+			if (nfiles != 0) {
+				if (insert(flist, cfile, nfiles, !DELETE))
+					/* Duplicate removed */
+					continue;
+			} else
 				flist[0] = cfile;
+			nfiles++;
+			break;
 		}
 	}
 
+	if (nfiles == 0)
+		return;
+
 	/*
 	 * We now loop reading a new record from the file with the
 	 * 'sorted first' existing record.
@@ -194,86 +185,85 @@
 	 * output file - maintaining one record from each file in the sorted
 	 * list.
 	 */
-	cfile = (struct mfile *) bufs[nf];
-	cfile->end = (u_char *) cfile + bufs_sz[nf];
-	cfile->flno = flist[0]->flno;
+	new_rec = emalloc(DEFLLEN);
+	new_end = (u_char *)new_rec + DEFLLEN;
 	for (;;) {
-		c = get(cfile->flno, 0, NULL, nfiles, cfile->rec,
-		    cfile->end, ftbl);
+		cfile = flist[0];
+		c = cfile->get(cfile->fp, new_rec, new_end, ftbl);
 		if (c == EOF) {
 			/* Write out last record from now-empty input */
-			put(flist[0]->rec, outfp);
+			put(cfile->rec, outfp);
 			if (--nfiles == 0)
 				break;
 			/* Replace from file with now-first sorted record. */
 			/* (Moving base 'flist' saves copying everything!) */
 			flist++;
-			cfile->flno = flist[0]->flno;
 			continue;
 		}
 		if (c == BUFFEND) {
 			/* Buffer not large enough - double in size */
-			char *oldbuf = (char *) cfile;
-			availsz = (char *) cfile->end - oldbuf;
-			availsz *= 2;
-			cfile = realloc(oldbuf, availsz);
-			if (!cfile)
-				err(2, "merge: realloc");
-			cfile->end = (u_char *)cfile + availsz;
-
-			/* Update pointers we'll use for next merge */
-			for (i = 0; i < nf + 1; i++) {
-				if (bufs[i] == oldbuf) {
-					bufs[i] = (char *)cfile;
-					bufs_sz[i] = availsz;
-					break;
-				}
-			}
-			/* Read again from same file into same buffer */
+			sz = (new_end - (u_char *)new_rec) * 2;
+			new_rec = erealloc(new_rec, sz);
+			new_end = (u_char *)new_rec +sz;
 			continue;
 		}
-			
+
+		/* Swap in new buffer, saving old */
+		tmp = cfile->rec;
+		cfile->rec = new_rec;
+		new_rec = tmp;
+		tmp = cfile->end;
+		cfile->end = new_end;
+		new_end = tmp;
+
 		/* Add into sort, removing the original first entry */
-		c = insert(flist, &cfile, nfiles, DELETE);
+		c = insert(flist, cfile, nfiles, DELETE);
+		if (c != 0 || (UNIQUE && cfile == flist[0]
+			    && cmp(new_rec, cfile->rec) == 0)) {
+			/* Was an unwanted duplicate, restore buffer */
+			tmp = cfile->rec;
+			cfile->rec = new_rec;
+			new_rec = tmp;
+			tmp = cfile->end;
+			cfile->end = new_end;
+			new_end = tmp;
+			continue;
+		}
 
-		/*
-		 * 'cfile' is now the buffer from the old record from the
-		 * file we just read, but with the file number of the
-		 * current 'first record.
-		 * (Unless we are rejecting a duplicate, when c == 1 and
-		 * it is unchanged!)
-		 */
-		if (c == 0)
-			put(cfile->rec, outfp);
+		/* Write out 'old' record */
+		put(new_rec, outfp);
 	}
+
+	free(new_rec);
 }
 
 /*
- * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec;
+ * if delete: inserts rec in flist, deletes flist[0];
  * otherwise just inserts *rec in flist.
+ * Returns 1 if record is a duplicate to be ignored.
  */
 static int
-insert(struct mfile **flist, struct mfile **rec, int ttop, int delete)
+insert(struct mfile **flist, struct mfile *rec, int ttop, int delete)
 {
-	struct mfile *tmprec = *rec;
 	int mid, top = ttop, bot = 0, cmpv = 1;
 
 	for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
-		cmpv = cmp(tmprec->rec, flist[mid]->rec);
+		cmpv = cmp(rec->rec, flist[mid]->rec);
 		if (cmpv == 0 ) {
 			if (UNIQUE)
 				/* Duplicate key, read another record */
+				/* NB: This doesn't guarantee to keep any
+				 * particular record. */
 				return 1;
 			/*
-			 * Apply sort by fileno, to give priority to earlier
-			 * specified files, hence providing a stable sort.
+			 * Apply sort by input file order.
 			 * We could truncate the sort is the fileno are
 			 * adjacent - but that is all too hard!
 			 * The fileno cannot be equal, since we only have one
 			 * record from each file (+ flist[0] which never
 			 * comes here).
 			 */
-			cmpv = tmprec->flno - flist[mid]->flno;
+			cmpv = rec < flist[mid] ? -1 : 1;
 			if (REVERSE)
 				cmpv = -cmpv;
 		}
@@ -286,16 +276,11 @@
 	/* At this point we haven't yet compared against flist[0] */
 
 	if (delete) {
-		/* flist[0] came from the same file, it cannot be earlier. */
-		if (UNIQUE && bot == 0 && cmp(tmprec->rec, flist[0]->rec) == 0)
-			/* Duplicate record (key) in original file */
-			return 1;
-		tmprec = flist[0];
-		if (bot)
-			memmove(flist, flist + 1, bot * sizeof(MFILE **));
-		flist[bot] = *rec;
-		tmprec->flno = flist[0]->flno;
-		*rec = tmprec;
+		/* flist[0] is ourselves, only the caller knows the old data */
+		if (bot != 0) {
+			memmove(flist, flist + 1, bot * sizeof(MFILE *));
+			flist[bot] = rec;
+		}
 		return 0;
 	}
 
@@ -303,7 +288,7 @@
 
 	if (bot == 0 && cmpv != 0) {
 		/* Doesn't match flist[1], must compare with flist[0] */
-		cmpv = cmp(tmprec->rec, flist[0]->rec);
+		cmpv = cmp(rec->rec, flist[0]->rec);
 		if (cmpv == 0 && UNIQUE)
 			return 1;
 		/* Add matching keys in file order (ie new is later) */
@@ -311,8 +296,8 @@
 			bot = -1;
 	}
 	bot++;
-	memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE **));
-	flist[bot] = *rec;
+	memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *));
+	flist[bot] = rec;
 	return 0;
 }
 
@@ -320,21 +305,27 @@
  * check order on one file
  */
 void
-order(struct filelist *filelist, get_func_t get, struct field *ftbl)
+order(struct filelist *filelist, struct field *ftbl)
 {
+	get_func_t get = SINGL_FLD ? makeline : makekey;
 	RECHEADER *crec, *prec, *trec;
 	u_char *crec_end, *prec_end, *trec_end;
+	FILE *fp;
 	int c;
 
+	fp = fopen(filelist->names[0], "r");
+	if (fp == NULL)
+		err(2, "%s", filelist->names[0]);
+
 	crec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
 	crec_end = crec->data + DEFLLEN;
 	prec = malloc(offsetof(RECHEADER, data[DEFLLEN]));
 	prec_end = prec->data + DEFLLEN;
 
 	/* XXX this does exit(0) for overlong lines */
-	if (get(-1, 0, filelist, 1, prec, prec_end, ftbl) != 0)
+	if (get(fp, prec, prec_end, ftbl) != 0)
 		exit(0);
-	while (get(-1, 0, filelist, 1, crec, crec_end, ftbl) == 0) {
+	while (get(fp, crec, crec_end, ftbl) == 0) {
 		if (0 < (c = cmp(prec, crec))) {
 			crec->data[crec->length-1] = 0;
 			errx(1, "found disorder: %s", crec->data+crec->offset);

Index: src/usr.bin/sort/sort.c
diff -u src/usr.bin/sort/sort.c:1.55 src/usr.bin/sort/sort.c:1.56
--- src/usr.bin/sort/sort.c:1.55	Thu Sep 10 22:02:40 2009
+++ src/usr.bin/sort/sort.c	Sat Sep 26 21:16:55 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: sort.c,v 1.55 2009/09/10 22:02:40 dsl Exp $	*/
+/*	$NetBSD: sort.c,v 1.56 2009/09/26 21:16:55 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -77,7 +77,7 @@
 #endif /* not lint */
 
 #ifndef lint
-__RCSID("$NetBSD: sort.c,v 1.55 2009/09/10 22:02:40 dsl Exp $");
+__RCSID("$NetBSD: sort.c,v 1.56 2009/09/26 21:16:55 dsl Exp $");
 __SCCSID("@(#)sort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -121,7 +121,6 @@
 int
 main(int argc, char *argv[])
 {
-	get_func_t get;
 	int ch, i, stdinflag = 0;
 	char cflag = 0, mflag = 0;
 	char *outfile, *outpath = 0;
@@ -313,15 +312,11 @@
 		num_input_files = argc - optind;
 	}
 
-	if (SINGL_FLD)
-		get = makeline;
-	else
-		get = makekey;
-
 	if (cflag) {
-		order(&filelist, get, fldtab);
+		order(&filelist, fldtab);
 		/* NOT REACHED */
 	}
+
 	if (!outpath) {
 		toutpath[0] = '\0';	/* path not used in this case */
 		outfile = outpath = toutpath;
@@ -356,10 +351,9 @@
 			err(2, "output file %s", outfile);
 	}
 
-	if (mflag) {
-		fmerge(-1, &filelist, num_input_files, get, outfp, putline,
-			fldtab);
-	} else
+	if (mflag)
+		fmerge(&filelist, num_input_files, outfp, fldtab);
+	else
 		fsort(&filelist, num_input_files, outfp, fldtab);
 
 	if (outfile != outpath) {

Index: src/usr.bin/sort/sort.h
diff -u src/usr.bin/sort/sort.h:1.28 src/usr.bin/sort/sort.h:1.29
--- src/usr.bin/sort/sort.h:1.28	Thu Sep 10 22:02:40 2009
+++ src/usr.bin/sort/sort.h	Sat Sep 26 21:16:55 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: sort.h,v 1.28 2009/09/10 22:02:40 dsl Exp $	*/
+/*	$NetBSD: sort.h,v 1.29 2009/09/26 21:16:55 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -158,8 +158,7 @@
 	const char * const * names;
 };
 
-typedef int (*get_func_t)(int, int, struct filelist *, int,
-		RECHEADER *, u_char *, struct field *);
+typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
 typedef void (*put_func_t)(const RECHEADER *, FILE *);
 
 extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
@@ -181,17 +180,15 @@
 void	 fixit(int *, char **);
 void	 fldreset(struct field *);
 FILE	*ftmp(void);
-void	 fmerge(int, struct filelist *, int,
-		get_func_t, FILE *, put_func_t, struct field *);
+void	 fmerge(struct filelist *, int, FILE *, struct field *);
+void	 save_for_merge(FILE *, get_func_t, struct field *);
+void	 merge_sort(FILE *, put_func_t, struct field *);
 void	 fsort(struct filelist *, int, FILE *, struct field *);
-int	 geteasy(int, int, struct filelist *,
-	    int, RECHEADER *, u_char *, struct field *);
-int	 makekey(int, int, struct filelist *,
-	    int, RECHEADER *, u_char *, struct field *);
-int	 makeline(int, int, struct filelist *,
-	    int, RECHEADER *, u_char *, struct field *);
+int	 geteasy(FILE *, RECHEADER *, u_char *, struct field *);
+int	 makekey(FILE *, RECHEADER *, u_char *, struct field *);
+int	 makeline(FILE *, RECHEADER *, u_char *, struct field *);
 int	 optval(int, int);
-void	 order(struct filelist *, get_func_t, struct field *);
+void	 order(struct filelist *, struct field *);
 void	 putline(const RECHEADER *, FILE *);
 void	 putrec(const RECHEADER *, FILE *);
 void	 putkeydump(const RECHEADER *, FILE *);

Index: src/usr.bin/sort/tmp.c
diff -u src/usr.bin/sort/tmp.c:1.14 src/usr.bin/sort/tmp.c:1.15
--- src/usr.bin/sort/tmp.c:1.14	Sat Aug 15 09:48:46 2009
+++ src/usr.bin/sort/tmp.c	Sat Sep 26 21:16:55 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: tmp.c,v 1.14 2009/08/15 09:48:46 dsl Exp $	*/
+/*	$NetBSD: tmp.c,v 1.15 2009/09/26 21:16:55 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -64,7 +64,7 @@
 #include <sys/cdefs.h>
 
 #ifndef lint
-__RCSID("$NetBSD: tmp.c,v 1.14 2009/08/15 09:48:46 dsl Exp $");
+__RCSID("$NetBSD: tmp.c,v 1.15 2009/09/26 21:16:55 dsl Exp $");
 __SCCSID("@(#)tmp.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -101,7 +101,8 @@
 		err(2, "ftmp: mkstemp(\"%s\")", path);
 	if (!(fp = fdopen(fd, "w+")))
 		err(2, "ftmp: fdopen(\"%s\")", path);
-	(void)unlink(path);
+	if (!DEBUG('t'))
+		(void)unlink(path);
 
 	(void)sigprocmask(SIG_SETMASK, &oset, NULL);
 	return (fp);

Reply via email to