Module Name:    src
Committed By:   dsl
Date:           Sat Aug 22 15:16:50 UTC 2009

Modified Files:
        src/usr.bin/sort: msort.c

Log Message:
Add some comments and clarifications to this inpeneterable code.
When merging ensure we accurable sort records with identical keys by
file-number, otherwise a 'stable' sort won't be!


To generate a diff of this commit:
cvs rdiff -u -r1.23 -r1.24 src/usr.bin/sort/msort.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/usr.bin/sort/msort.c
diff -u src/usr.bin/sort/msort.c:1.23 src/usr.bin/sort/msort.c:1.24
--- src/usr.bin/sort/msort.c:1.23	Sat Aug 22 10:53:28 2009
+++ src/usr.bin/sort/msort.c	Sat Aug 22 15:16:50 2009
@@ -1,4 +1,4 @@
-/*	$NetBSD: msort.c,v 1.23 2009/08/22 10:53:28 dsl Exp $	*/
+/*	$NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $	*/
 
 /*-
  * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
@@ -65,7 +65,7 @@
 #include "fsort.h"
 
 #ifndef lint
-__RCSID("$NetBSD: msort.c,v 1.23 2009/08/22 10:53:28 dsl Exp $");
+__RCSID("$NetBSD: msort.c,v 1.24 2009/08/22 15:16:50 dsl Exp $");
 __SCCSID("@(#)msort.c	8.1 (Berkeley) 6/6/93");
 #endif /* not lint */
 
@@ -139,7 +139,7 @@
 {
 	int c, i, j, nf = nfiles;
 	struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile;
-	size_t availsz = bufsize;
+	size_t availsz;
 	static void *bufs[MERGE_FNUM + 1];
 	static size_t bufs_sz[MERGE_FNUM + 1];
 
@@ -191,47 +191,65 @@
 		}
 	}
 
+	/*
+	 * We now loop reading a new record from the file with the
+	 * 'sorted first' existing record.
+	 * As each record is added, the 'first' record is written to the
+	 * output file - maintaining one record from each file in the sorted
+	 * list.
+	 */
 	cfile = (struct mfile *) bufs[nf];
-	cfile->flno = flist[0]->flno;
 	cfile->end = (u_char *) cfile + bufs_sz[nf];
-	while (nfiles) {
-		for (c = 1; c == 1;) {
-			c = get(cfile->flno, 0, NULL, nfiles, cfile->rec,
-			    cfile->end, ftbl);
-			if (c == EOF) {
-				put(flist[0]->rec, outfp);
-				if (--nfiles > 0) {
-					flist++;
-					cfile->flno = flist[0]->flno;
-				}
+	cfile->flno = flist[0]->flno;
+	for (;;) {
+		c = get(cfile->flno, 0, NULL, nfiles, cfile->rec,
+		    cfile->end, ftbl);
+		if (c == EOF) {
+			/* Write out last record from now-empty input */
+			put(flist[0]->rec, outfp);
+			if (--nfiles == 0)
 				break;
-			}
-			if (c == BUFFEND) {
-				char *oldbuf = (char *) cfile;
-				availsz = (char *) cfile->end - oldbuf;
-				availsz *= 2;
-				cfile = realloc(oldbuf, availsz);
-				if (!cfile)
-					err(2, "merge: realloc");
-
-				for (i = 0; i < nf + 1; i++) {
-					if (bufs[i] == oldbuf) {
-						bufs[i] = (char *)cfile;
-						bufs_sz[i] = availsz;
-						break;
-					}
+			/* Replace from file with now-first sorted record. */
+			/* (Moving base 'flist' saves copying everything!) */
+			flist++;
+			cfile->flno = flist[0]->flno;
+			continue;
+		}
+		if (c == BUFFEND) {
+			/* Buffer not large enough - double in size */
+			char *oldbuf = (char *) cfile;
+			availsz = (char *) cfile->end - oldbuf;
+			availsz *= 2;
+			cfile = realloc(oldbuf, availsz);
+			if (!cfile)
+				err(2, "merge: realloc");
+			cfile->end = (u_char *)cfile + availsz;
+
+			/* Update pointers we'll use for next merge */
+			for (i = 0; i < nf + 1; i++) {
+				if (bufs[i] == oldbuf) {
+					bufs[i] = (char *)cfile;
+					bufs_sz[i] = availsz;
+					break;
 				}
-
-				cfile->end = (u_char *)cfile + availsz;
-				c = 1;
-				continue;
 			}
-				
-			c = insert(flist, &cfile, nfiles, DELETE);
-			if (c == 0)
-				put(cfile->rec, outfp);
+			/* Read again from same file into same buffer */
+			continue;
 		}
-	}	
+			
+		/* Add into sort, removing the original first entry */
+		c = insert(flist, &cfile, nfiles, DELETE);
+
+		/*
+		 * 'cfile' is now the buffer from the old record from the
+		 * file we just read, but with the file number of the
+		 * current 'first record.
+		 * (Unless we are rejecting a duplicate, when c == 1 and
+		 * it is unchanged!)
+		 */
+		if (c == 0)
+			put(cfile->rec, outfp);
+	}
 }
 
 /*
@@ -240,67 +258,64 @@
  */
 static int
 insert(struct mfile **flist, struct mfile **rec, int ttop, int delete)
-	/* delete, ttop:			 delete = 0 or 1 */
 {
 	struct mfile *tmprec = *rec;
 	int mid, top = ttop, bot = 0, cmpv = 1;
 
 	for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) {
 		cmpv = cmp(tmprec->rec, flist[mid]->rec);
-		if (cmpv < 0)
-			top = mid;
-		else if (cmpv > 0)
-			bot = mid;
-		else {
+		if (cmpv == 0 ) {
 			if (UNIQUE)
-				break;
-
+				/* Duplicate key, read another record */
+				return 1;
 			/*
-			 * Apply sort by fileno, to give priority
-			 * to earlier specified files, hence providing
-			 * more stable sort.
-			 * If fileno is same, the new record should
-			 * be put _after_ the previous entry.
+			 * Apply sort by fileno, to give priority to earlier
+			 * specified files, hence providing a stable sort.
+			 * We could truncate the sort is the fileno are
+			 * adjacent - but that is all too hard!
+			 * The fileno cannot be equal, since we only have one
+			 * record from each file (+ flist[0] which never
+			 * comes here).
 			 */
-			/* XXX (dsl) this doesn't seem right */
 			cmpv = tmprec->flno - flist[mid]->flno;
-			if (cmpv >= 0)
-				bot = mid;
-			else
-				bot = mid - 1;
-
-			break;
 		}
+		if (cmpv < 0)
+			top = mid;
+		else
+			bot = mid;
 	}
 
+	/* At this point we haven't yet compared against flist[0] */
+
 	if (delete) {
-		if (UNIQUE) {
-			if (!bot && cmpv)
-				cmpv = cmp(tmprec->rec, flist[0]->rec);
-			if (!cmpv)
-				return (1);
-		}
+		/* flist[0] came from the same file, it cannot be earlier. */
+		if (UNIQUE && bot == 0 && cmp(tmprec->rec, flist[0]->rec) == 0)
+			/* Duplicate record (key) in original file */
+			return 1;
 		tmprec = flist[0];
 		if (bot)
 			memmove(flist, flist + 1, bot * sizeof(MFILE **));
 		flist[bot] = *rec;
+		tmprec->flno = flist[0]->flno;
 		*rec = tmprec;
-		(*rec)->flno = flist[0]->flno;
-		return (0);
-	} else {
-		if (!bot && !(UNIQUE && !cmpv)) {
-			cmpv = cmp(tmprec->rec, flist[0]->rec);
-			if (cmpv < 0)
-				bot = -1;
-		}
-		if (UNIQUE && !cmpv)
-			return (1);
-		bot++;
-		memmove(flist + bot + 1, flist + bot,
-		    (ttop - bot) * sizeof(MFILE **));
-		flist[bot] = *rec;
-		return (0);
+		return 0;
+	}
+
+	/* Inserting original set of records */
+
+	if (bot == 0 && cmpv != 0) {
+		/* Doesn't match flist[1], must compare with flist[0] */
+		cmpv = cmp(tmprec->rec, flist[0]->rec);
+		if (cmpv == 0 && UNIQUE)
+			return 1;
+		/* Add matching keys in file order (ie new is later) */
+		if (cmpv < 0)
+			bot = -1;
 	}
+	bot++;
+	memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE **));
+	flist[bot] = *rec;
+	return 0;
 }
 
 /*

Reply via email to