Module Name: src Committed By: dsl Date: Sat Sep 26 21:16:55 UTC 2009
Modified Files: src/usr.bin/sort: files.c fsort.c fsort.h msort.c sort.c sort.h tmp.c Log Message: Move all the fopen() calls out of the record read routines into the callers. Split the merge sort so that fsort() can pass the 'FILE *' of the temporary files to be merged into the merge code. Don't rely on realloc() not moving the end address of a buffer! Rework merge sort so that it sorts pointers to 'struct mfile' and only copies about sort record descriptors. No functional change intended. To generate a diff of this commit: cvs rdiff -u -r1.37 -r1.38 src/usr.bin/sort/files.c cvs rdiff -u -r1.41 -r1.42 src/usr.bin/sort/fsort.c cvs rdiff -u -r1.16 -r1.17 src/usr.bin/sort/fsort.h cvs rdiff -u -r1.26 -r1.27 src/usr.bin/sort/msort.c cvs rdiff -u -r1.55 -r1.56 src/usr.bin/sort/sort.c cvs rdiff -u -r1.28 -r1.29 src/usr.bin/sort/sort.h cvs rdiff -u -r1.14 -r1.15 src/usr.bin/sort/tmp.c Please note that diffs are not public domain; they are subject to the copyright notices on the relevant files.
Modified files: Index: src/usr.bin/sort/files.c diff -u src/usr.bin/sort/files.c:1.37 src/usr.bin/sort/files.c:1.38 --- src/usr.bin/sort/files.c:1.37 Thu Sep 10 22:02:40 2009 +++ src/usr.bin/sort/files.c Sat Sep 26 21:16:55 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $ */ +/* $NetBSD: files.c,v 1.38 2009/09/26 21:16:55 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -65,7 +65,7 @@ #include "fsort.h" #ifndef lint -__RCSID("$NetBSD: files.c,v 1.37 2009/09/10 22:02:40 dsl Exp $"); +__RCSID("$NetBSD: files.c,v 1.38 2009/09/26 21:16:55 dsl Exp $"); __SCCSID("@(#)files.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ @@ -78,20 +78,15 @@ * in the first fsort pass. */ int -makeline(int flno, int top, struct filelist *filelist, int nfiles, - RECHEADER *recbuf, u_char *bufend, struct field *dummy2) +makeline(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *dummy2) { - static u_char *obufend; + static u_char *opos = NULL; static size_t osz; u_char *pos; - static int filenum = 0, overflow = 0; - static FILE *fp = 0; int c; - c = 0; /* XXXGCC -Wuninitialized [pmppc] */ - pos = recbuf->data; - if (overflow) { + if (opos != NULL) { /* * Buffer shortage is solved by either of two ways: * o flush previous buffered data and start using the @@ -101,77 +96,47 @@ * The former is preferred, realloc is only done when * there is exactly one item in buffer which does not fit. */ - if (bufend == obufend) - memmove(pos, bufend - osz, osz); + if (pos != opos) + memmove(pos, opos, osz); pos += osz; - overflow = 0; + opos = NULL; } - for (;;) { - if (flno >= 0 && (fp = fstack[flno].fp) == NULL) - return (EOF); - else if (fp == NULL) { - if (filenum >= nfiles) - return (EOF); - if (!(fp = fopen(filelist->names[filenum], "r"))) - err(2, "%s", filelist->names[filenum]); - filenum++; - } - while ((pos < bufend) && ((c = getc(fp)) != EOF)) { - *pos++ = c; - if (c == REC_D) { - recbuf->offset = 0; - recbuf->length = pos - recbuf->data; - recbuf->keylen = recbuf->length - 1; - return (0); + while (pos < bufend) { + c = getc(fp); + if (c == EOF) { + if (pos == recbuf->data) { + FCLOSE(fp); + return EOF; } + /* Add terminator to partial line */ + c = REC_D; } - if (pos >= bufend) { - if (recbuf->data < bufend) { - overflow = 1; - obufend = bufend; - osz = (pos - recbuf->data); - } - return (BUFFEND); - } else if (c == EOF) { - if (recbuf->data != pos) { - *pos++ = REC_D; - recbuf->offset = 0; - recbuf->length = pos - recbuf->data; - recbuf->keylen = recbuf->length - 1; - return (0); - } - FCLOSE(fp); - fp = 0; - if (flno >= 0) - fstack[flno].fp = 0; - } else { - - warnx("makeline: line too long: ignoring '%.100s...'", recbuf->data); - - /* Consume the rest of line from input */ - while ((c = getc(fp)) != REC_D && c != EOF) - ; - + *pos++ = c; + if (c == REC_D) { recbuf->offset = 0; - recbuf->length = 0; - recbuf->keylen = 0; - - return (BUFFEND); + recbuf->length = pos - recbuf->data; + recbuf->keylen = recbuf->length - 1; + return (0); } } + + /* Ran out of buffer space... */ + if (recbuf->data < bufend) { + /* Remember where the partial record is */ + osz = pos - recbuf->data; + opos = recbuf->data; + } + return (BUFFEND); } /* * This generates keys. It's only called in the first fsort pass */ int -makekey(int flno, int top, struct filelist *filelist, int nfiles, - RECHEADER *recbuf, u_char *bufend, struct field *ftbl) +makekey(FILE *fp, RECHEADER *recbuf, u_char *bufend, struct field *ftbl) { - static int filenum = 0; - static FILE *dbdesc = 0; static u_char *line_data; static ssize_t line_size; static int overflow = 0; @@ -182,29 +147,10 @@ return overflow ? BUFFEND : 0; } - /* Loop through files until we find a line of input */ - for (;;) { - if (flno >= 0) { - if (!(dbdesc = fstack[flno].fp)) - return (EOF); - } else if (!dbdesc) { - if (filenum >= nfiles) - return (EOF); - dbdesc = fopen(filelist->names[filenum], "r"); - if (!dbdesc) - err(2, "%s", filelist->names[filenum]); - filenum++; - } - line_size = seq(dbdesc, &line_data); - if (line_size != 0) - /* Got a line */ - break; - - /* End of file ... */ - FCLOSE(dbdesc); - dbdesc = 0; - if (flno >= 0) - fstack[flno].fp = 0; + line_size = seq(fp, &line_data); + if (line_size == 0) { + FCLOSE(fp); + return EOF; } if (line_size > bufend - recbuf->data) { @@ -299,18 +245,14 @@ * get a record from a temporary file. (Used by merge sort.) */ int -geteasy(int flno, int top, struct filelist *filelist, int nfiles, - RECHEADER *rec, u_char *end, struct field *dummy2) +geteasy(FILE *fp, RECHEADER *rec, u_char *end, struct field *dummy2) { int i; - FILE *fp; - fp = fstack[flno].fp; if ((u_char *)(rec + 1) > end) return (BUFFEND); if (!fread(rec, 1, offsetof(RECHEADER, data), fp)) { fclose(fp); - fstack[flno].fp = 0; return (EOF); } if (end - rec->data < (ptrdiff_t)rec->length) { Index: src/usr.bin/sort/fsort.c diff -u src/usr.bin/sort/fsort.c:1.41 src/usr.bin/sort/fsort.c:1.42 --- src/usr.bin/sort/fsort.c:1.41 Thu Sep 10 22:02:40 2009 +++ src/usr.bin/sort/fsort.c Sat Sep 26 21:16:55 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: fsort.c,v 1.41 2009/09/10 22:02:40 dsl Exp $ */ +/* $NetBSD: fsort.c,v 1.42 2009/09/26 21:16:55 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -72,15 +72,13 @@ #include "fsort.h" #ifndef lint -__RCSID("$NetBSD: fsort.c,v 1.41 2009/09/10 22:02:40 dsl Exp $"); +__RCSID("$NetBSD: fsort.c,v 1.42 2009/09/26 21:16:55 dsl Exp $"); __SCCSID("@(#)fsort.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ #include <stdlib.h> #include <string.h> -struct tempfile fstack[MAXFCT]; - #define SALIGN(n) ((n+sizeof(length_t)-1) & ~(sizeof(length_t)-1)) void @@ -96,12 +94,14 @@ get_func_t get; RECHEADER *crec; RECHEADER *nbuffer; - FILE *fp; + FILE *fp, *tmp_fp; + int file_no; + int max_recs = DEBUG('m') ? 16 : MAXNUM; buffer = malloc(bufsize); bufend = (u_char *)buffer + bufsize; /* Allocate double length keymap for radix_sort */ - keylist = malloc(2 * MAXNUM * sizeof(*keylist)); + keylist = malloc(2 * max_recs * sizeof(*keylist)); if (buffer == NULL || keylist == NULL) err(2, "failed to malloc initial buffer or keylist"); @@ -112,6 +112,11 @@ /* Key (merged key fields) added before data */ get = makekey; + file_no = 0; + fp = fopen(filelist->names[0], "r"); + if (fp == NULL) + err(2, "%s", filelist->names[0]); + /* Loop through reads of chunk of input files that get sorted * and then merged together. */ for (;;) { @@ -121,21 +126,29 @@ /* Loop reading records */ for (;;) { - c = get(-1, 0, filelist, nfiles, crec, bufend, ftbl); + c = get(fp, crec, bufend, ftbl); /* 'c' is 0, EOF or BUFFEND */ if (c == 0) { /* Save start of key in input buffer */ *keypos++ = crec; - if (++nelem == MAXNUM) { + if (++nelem == max_recs) { c = BUFFEND; break; } crec = (RECHEADER *)(crec->data + SALIGN(crec->length)); continue; } - if (c == EOF) - break; - if (nelem >= MAXNUM || bufsize >= MAXBUFSIZE) + if (c == EOF) { + /* try next file */ + if (++file_no >= nfiles) + /* no more files */ + break; + fp = fopen(filelist->names[file_no], "r"); + if (fp == NULL) + err(2, "%s", filelist->names[file_no]); + continue; + } + if (nelem >= max_recs || bufsize >= MAXBUFSIZE) /* Need to sort and save this lot of data */ break; @@ -158,7 +171,7 @@ } /* Sort this set of records */ - radix_sort(keylist, keylist + MAXNUM, nelem); + radix_sort(keylist, keylist + max_recs, nelem); if (c == EOF && mfct == 0) { /* all the data is (sorted) in the buffer */ @@ -168,25 +181,17 @@ } /* Save current data to a temporary file for a later merge */ - fp = ftmp(); - fstack[mfct].fp = fp; - append(keylist, nelem, fp, putrec); - mfct++; + tmp_fp = ftmp(); + append(keylist, nelem, tmp_fp, putrec); + save_for_merge(tmp_fp, geteasy, ftbl); + mfct = 1; if (c == EOF) { /* merge to output file */ - fmerge(0, filelist, mfct, geteasy, outfp, + merge_sort(outfp, DEBUG('k') ? putkeydump : putline, ftbl); break; } - - if (mfct == MERGE_FNUM) { - /* Merge the files we have */ - fp = ftmp(); - fmerge(0, filelist, mfct, geteasy, fp, putrec, ftbl); - mfct = 1; - fstack[0].fp = fp; - } } free(keylist); Index: src/usr.bin/sort/fsort.h diff -u src/usr.bin/sort/fsort.h:1.16 src/usr.bin/sort/fsort.h:1.17 --- src/usr.bin/sort/fsort.h:1.16 Sat Sep 5 12:00:25 2009 +++ src/usr.bin/sort/fsort.h Sat Sep 26 21:16:55 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: fsort.h,v 1.16 2009/09/05 12:00:25 dsl Exp $ */ +/* $NetBSD: fsort.h,v 1.17 2009/09/26 21:16:55 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -76,16 +76,3 @@ */ #define DEFBUFSIZE (1 << 20) /* 1MB */ #define MAXBUFSIZE (8 << 20) /* 10 MB */ - -/* - * Number of files merge() can merge in one pass. - * This should be power of two so that it's possible to use this value - * for rouding. - */ -#define MERGE_FNUM 16 - -/* Temporary files contian data (with record headers) in sorted order */ -struct tempfile { - FILE *fp; -}; -extern struct tempfile fstack[MAXFCT]; Index: src/usr.bin/sort/msort.c diff -u src/usr.bin/sort/msort.c:1.26 src/usr.bin/sort/msort.c:1.27 --- src/usr.bin/sort/msort.c:1.26 Thu Sep 10 22:02:40 2009 +++ src/usr.bin/sort/msort.c Sat Sep 26 21:16:55 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: msort.c,v 1.26 2009/09/10 22:02:40 dsl Exp $ */ +/* $NetBSD: msort.c,v 1.27 2009/09/26 21:16:55 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -65,128 +65,119 @@ #include "fsort.h" #ifndef lint -__RCSID("$NetBSD: msort.c,v 1.26 2009/09/10 22:02:40 dsl Exp $"); +__RCSID("$NetBSD: msort.c,v 1.27 2009/09/26 21:16:55 dsl Exp $"); __SCCSID("@(#)msort.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <util.h> /* Subroutines using comparisons: merge sort and check order */ #define DELETE (1) typedef struct mfile { - u_char *end; - int flno; - RECHEADER rec[1]; + FILE *fp; + get_func_t get; + RECHEADER *rec; + u_char *end; } MFILE; static int cmp(RECHEADER *, RECHEADER *); -static int insert(struct mfile **, struct mfile **, int, int); -static void merge(int, int, get_func_t, FILE *, put_func_t, struct field *); +static int insert(struct mfile **, struct mfile *, int, int); + +/* + * Number of files merge() can merge in one pass. + * This should be power of two so that it's possible to use this value + * for rouding. + */ +#define MERGE_FNUM 16 + +static struct mfile fstack[MERGE_FNUM]; +static int fstack_count; void -fmerge(int binno, struct filelist *filelist, int nfiles, - get_func_t get, FILE *outfp, put_func_t fput, struct field *ftbl) +save_for_merge(FILE *fp, get_func_t get, struct field *ftbl) { - FILE *tout; - int i, j, last; - put_func_t put; - - while (nfiles) { - put = putrec; - for (j = 0; j < nfiles; j += MERGE_FNUM) { - if (nfiles <= MERGE_FNUM) { - tout = outfp; - put = fput; - } - else - tout = ftmp(); - last = min(MERGE_FNUM, nfiles - j); - if (binno < 0) { - for (i = 0; i < last; i++) - if (!(fstack[i+MAXFCT-1-MERGE_FNUM].fp = - fopen(filelist->names[j+i], "r"))) - err(2, "%s", - filelist->names[j+i]); - merge(MAXFCT-1-MERGE_FNUM, last, get, tout, put, ftbl); - } else { - for (i = 0; i< last; i++) - rewind(fstack[i+j].fp); - merge(j, last, get, tout, put, ftbl); - } - if (nfiles > MERGE_FNUM) - fstack[j/MERGE_FNUM].fp = tout; - } - nfiles = (nfiles + (MERGE_FNUM - 1)) / MERGE_FNUM; - if (nfiles == 1) - nfiles = 0; - if (binno < 0) { - binno = 0; - get = geteasy; - } + FILE *mfp; + + if (fstack_count == MERGE_FNUM) { + /* Must reduce the number of temporary files */ + mfp = ftmp(); + merge_sort(mfp, putrec, ftbl); + fstack[0].fp = mfp; + fstack[0].get = geteasy; + fstack_count = 1; } + + fstack[fstack_count].fp = fp; + fstack[fstack_count++].get = get; } -static void -merge(int infl0, int nfiles, get_func_t get, FILE *outfp, put_func_t put, - struct field *ftbl) +void +fmerge(struct filelist *filelist, int nfiles, FILE *outfp, struct field *ftbl) { - int c, i, j, nf = nfiles; - struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile; - size_t availsz; - static void *bufs[MERGE_FNUM + 1]; - static size_t bufs_sz[MERGE_FNUM + 1]; + get_func_t get = SINGL_FLD ? makeline : makekey; + FILE *fp; + int i; + + for (i = 0; i < nfiles; i++) { + fp = fopen(filelist->names[i], "r"); + if (fp == NULL) + err(2, "%s", filelist->names[i]); + save_for_merge(fp, get, ftbl); + } - /* - * We need nfiles + 1 buffers. - */ - for (i = 0; i < nfiles + 1; i++) { - if (bufs[i]) - continue; + merge_sort(outfp, putline, ftbl); +} - bufs[i] = malloc(DEFLLEN); - if (!bufs[i]) - err(2, "merge: malloc"); - memset(bufs[i], 0, DEFLLEN); - bufs_sz[i] = DEFLLEN; - } +void +merge_sort(FILE *outfp, put_func_t put, struct field *ftbl) +{ + struct mfile *flistb[MERGE_FNUM], **flist = flistb, *cfile; + RECHEADER *new_rec; + u_char *new_end; + void *tmp; + int c, i, nfiles; + size_t sz; /* Read one record from each file (read again if a duplicate) */ - for (i = j = 0; i < nfiles; i++, j++) { - cfile = (struct mfile *) bufs[j]; - cfile->flno = infl0 + j; - cfile->end = (u_char *) bufs[j] + bufs_sz[j]; - for (c = 1; c == 1;) { - c = get(cfile->flno, 0, NULL, nfiles, cfile->rec, - cfile->end, ftbl); - if (c == EOF) { - --i; - --nfiles; + for (nfiles = i = 0; i < fstack_count; i++) { + cfile = &fstack[i]; + if (cfile->rec == NULL) { + cfile->rec = emalloc(DEFLLEN); + cfile->end = (u_char *)cfile->rec + DEFLLEN; + } + rewind(cfile->fp); + + for (;;) { + c = cfile->get(cfile->fp, cfile->rec, cfile->end, ftbl); + if (c == EOF) break; - } if (c == BUFFEND) { - bufs_sz[j] *= 2; - cfile = realloc(bufs[j], bufs_sz[j]); - if (!cfile) - err(2, "merge: realloc"); - - bufs[j] = (void *) cfile; - cfile->end = (u_char *)cfile + bufs_sz[j]; - - c = 1; + /* Double buffer size */ + sz = (cfile->end - (u_char *)cfile->rec) * 2; + cfile->rec = erealloc(cfile->rec, sz); + cfile->end = (u_char *)cfile->rec + sz; continue; } - if (i) - c = insert(flist, &cfile, i, !DELETE); - else + if (nfiles != 0) { + if (insert(flist, cfile, nfiles, !DELETE)) + /* Duplicate removed */ + continue; + } else flist[0] = cfile; + nfiles++; + break; } } + if (nfiles == 0) + return; + /* * We now loop reading a new record from the file with the * 'sorted first' existing record. @@ -194,86 +185,85 @@ * output file - maintaining one record from each file in the sorted * list. */ - cfile = (struct mfile *) bufs[nf]; - cfile->end = (u_char *) cfile + bufs_sz[nf]; - cfile->flno = flist[0]->flno; + new_rec = emalloc(DEFLLEN); + new_end = (u_char *)new_rec + DEFLLEN; for (;;) { - c = get(cfile->flno, 0, NULL, nfiles, cfile->rec, - cfile->end, ftbl); + cfile = flist[0]; + c = cfile->get(cfile->fp, new_rec, new_end, ftbl); if (c == EOF) { /* Write out last record from now-empty input */ - put(flist[0]->rec, outfp); + put(cfile->rec, outfp); if (--nfiles == 0) break; /* Replace from file with now-first sorted record. */ /* (Moving base 'flist' saves copying everything!) */ flist++; - cfile->flno = flist[0]->flno; continue; } if (c == BUFFEND) { /* Buffer not large enough - double in size */ - char *oldbuf = (char *) cfile; - availsz = (char *) cfile->end - oldbuf; - availsz *= 2; - cfile = realloc(oldbuf, availsz); - if (!cfile) - err(2, "merge: realloc"); - cfile->end = (u_char *)cfile + availsz; - - /* Update pointers we'll use for next merge */ - for (i = 0; i < nf + 1; i++) { - if (bufs[i] == oldbuf) { - bufs[i] = (char *)cfile; - bufs_sz[i] = availsz; - break; - } - } - /* Read again from same file into same buffer */ + sz = (new_end - (u_char *)new_rec) * 2; + new_rec = erealloc(new_rec, sz); + new_end = (u_char *)new_rec +sz; continue; } - + + /* Swap in new buffer, saving old */ + tmp = cfile->rec; + cfile->rec = new_rec; + new_rec = tmp; + tmp = cfile->end; + cfile->end = new_end; + new_end = tmp; + /* Add into sort, removing the original first entry */ - c = insert(flist, &cfile, nfiles, DELETE); + c = insert(flist, cfile, nfiles, DELETE); + if (c != 0 || (UNIQUE && cfile == flist[0] + && cmp(new_rec, cfile->rec) == 0)) { + /* Was an unwanted duplicate, restore buffer */ + tmp = cfile->rec; + cfile->rec = new_rec; + new_rec = tmp; + tmp = cfile->end; + cfile->end = new_end; + new_end = tmp; + continue; + } - /* - * 'cfile' is now the buffer from the old record from the - * file we just read, but with the file number of the - * current 'first record. - * (Unless we are rejecting a duplicate, when c == 1 and - * it is unchanged!) - */ - if (c == 0) - put(cfile->rec, outfp); + /* Write out 'old' record */ + put(new_rec, outfp); } + + free(new_rec); } /* - * if delete: inserts *rec in flist, deletes flist[0], and leaves it in *rec; + * if delete: inserts rec in flist, deletes flist[0]; * otherwise just inserts *rec in flist. + * Returns 1 if record is a duplicate to be ignored. */ static int -insert(struct mfile **flist, struct mfile **rec, int ttop, int delete) +insert(struct mfile **flist, struct mfile *rec, int ttop, int delete) { - struct mfile *tmprec = *rec; int mid, top = ttop, bot = 0, cmpv = 1; for (mid = top / 2; bot + 1 != top; mid = (bot + top) / 2) { - cmpv = cmp(tmprec->rec, flist[mid]->rec); + cmpv = cmp(rec->rec, flist[mid]->rec); if (cmpv == 0 ) { if (UNIQUE) /* Duplicate key, read another record */ + /* NB: This doesn't guarantee to keep any + * particular record. */ return 1; /* - * Apply sort by fileno, to give priority to earlier - * specified files, hence providing a stable sort. + * Apply sort by input file order. * We could truncate the sort is the fileno are * adjacent - but that is all too hard! * The fileno cannot be equal, since we only have one * record from each file (+ flist[0] which never * comes here). */ - cmpv = tmprec->flno - flist[mid]->flno; + cmpv = rec < flist[mid] ? -1 : 1; if (REVERSE) cmpv = -cmpv; } @@ -286,16 +276,11 @@ /* At this point we haven't yet compared against flist[0] */ if (delete) { - /* flist[0] came from the same file, it cannot be earlier. */ - if (UNIQUE && bot == 0 && cmp(tmprec->rec, flist[0]->rec) == 0) - /* Duplicate record (key) in original file */ - return 1; - tmprec = flist[0]; - if (bot) - memmove(flist, flist + 1, bot * sizeof(MFILE **)); - flist[bot] = *rec; - tmprec->flno = flist[0]->flno; - *rec = tmprec; + /* flist[0] is ourselves, only the caller knows the old data */ + if (bot != 0) { + memmove(flist, flist + 1, bot * sizeof(MFILE *)); + flist[bot] = rec; + } return 0; } @@ -303,7 +288,7 @@ if (bot == 0 && cmpv != 0) { /* Doesn't match flist[1], must compare with flist[0] */ - cmpv = cmp(tmprec->rec, flist[0]->rec); + cmpv = cmp(rec->rec, flist[0]->rec); if (cmpv == 0 && UNIQUE) return 1; /* Add matching keys in file order (ie new is later) */ @@ -311,8 +296,8 @@ bot = -1; } bot++; - memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE **)); - flist[bot] = *rec; + memmove(flist + bot + 1, flist + bot, (ttop - bot) * sizeof(MFILE *)); + flist[bot] = rec; return 0; } @@ -320,21 +305,27 @@ * check order on one file */ void -order(struct filelist *filelist, get_func_t get, struct field *ftbl) +order(struct filelist *filelist, struct field *ftbl) { + get_func_t get = SINGL_FLD ? makeline : makekey; RECHEADER *crec, *prec, *trec; u_char *crec_end, *prec_end, *trec_end; + FILE *fp; int c; + fp = fopen(filelist->names[0], "r"); + if (fp == NULL) + err(2, "%s", filelist->names[0]); + crec = malloc(offsetof(RECHEADER, data[DEFLLEN])); crec_end = crec->data + DEFLLEN; prec = malloc(offsetof(RECHEADER, data[DEFLLEN])); prec_end = prec->data + DEFLLEN; /* XXX this does exit(0) for overlong lines */ - if (get(-1, 0, filelist, 1, prec, prec_end, ftbl) != 0) + if (get(fp, prec, prec_end, ftbl) != 0) exit(0); - while (get(-1, 0, filelist, 1, crec, crec_end, ftbl) == 0) { + while (get(fp, crec, crec_end, ftbl) == 0) { if (0 < (c = cmp(prec, crec))) { crec->data[crec->length-1] = 0; errx(1, "found disorder: %s", crec->data+crec->offset); Index: src/usr.bin/sort/sort.c diff -u src/usr.bin/sort/sort.c:1.55 src/usr.bin/sort/sort.c:1.56 --- src/usr.bin/sort/sort.c:1.55 Thu Sep 10 22:02:40 2009 +++ src/usr.bin/sort/sort.c Sat Sep 26 21:16:55 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: sort.c,v 1.55 2009/09/10 22:02:40 dsl Exp $ */ +/* $NetBSD: sort.c,v 1.56 2009/09/26 21:16:55 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -77,7 +77,7 @@ #endif /* not lint */ #ifndef lint -__RCSID("$NetBSD: sort.c,v 1.55 2009/09/10 22:02:40 dsl Exp $"); +__RCSID("$NetBSD: sort.c,v 1.56 2009/09/26 21:16:55 dsl Exp $"); __SCCSID("@(#)sort.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ @@ -121,7 +121,6 @@ int main(int argc, char *argv[]) { - get_func_t get; int ch, i, stdinflag = 0; char cflag = 0, mflag = 0; char *outfile, *outpath = 0; @@ -313,15 +312,11 @@ num_input_files = argc - optind; } - if (SINGL_FLD) - get = makeline; - else - get = makekey; - if (cflag) { - order(&filelist, get, fldtab); + order(&filelist, fldtab); /* NOT REACHED */ } + if (!outpath) { toutpath[0] = '\0'; /* path not used in this case */ outfile = outpath = toutpath; @@ -356,10 +351,9 @@ err(2, "output file %s", outfile); } - if (mflag) { - fmerge(-1, &filelist, num_input_files, get, outfp, putline, - fldtab); - } else + if (mflag) + fmerge(&filelist, num_input_files, outfp, fldtab); + else fsort(&filelist, num_input_files, outfp, fldtab); if (outfile != outpath) { Index: src/usr.bin/sort/sort.h diff -u src/usr.bin/sort/sort.h:1.28 src/usr.bin/sort/sort.h:1.29 --- src/usr.bin/sort/sort.h:1.28 Thu Sep 10 22:02:40 2009 +++ src/usr.bin/sort/sort.h Sat Sep 26 21:16:55 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: sort.h,v 1.28 2009/09/10 22:02:40 dsl Exp $ */ +/* $NetBSD: sort.h,v 1.29 2009/09/26 21:16:55 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -158,8 +158,7 @@ const char * const * names; }; -typedef int (*get_func_t)(int, int, struct filelist *, int, - RECHEADER *, u_char *, struct field *); +typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *); typedef void (*put_func_t)(const RECHEADER *, FILE *); extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS]; @@ -181,17 +180,15 @@ void fixit(int *, char **); void fldreset(struct field *); FILE *ftmp(void); -void fmerge(int, struct filelist *, int, - get_func_t, FILE *, put_func_t, struct field *); +void fmerge(struct filelist *, int, FILE *, struct field *); +void save_for_merge(FILE *, get_func_t, struct field *); +void merge_sort(FILE *, put_func_t, struct field *); void fsort(struct filelist *, int, FILE *, struct field *); -int geteasy(int, int, struct filelist *, - int, RECHEADER *, u_char *, struct field *); -int makekey(int, int, struct filelist *, - int, RECHEADER *, u_char *, struct field *); -int makeline(int, int, struct filelist *, - int, RECHEADER *, u_char *, struct field *); +int geteasy(FILE *, RECHEADER *, u_char *, struct field *); +int makekey(FILE *, RECHEADER *, u_char *, struct field *); +int makeline(FILE *, RECHEADER *, u_char *, struct field *); int optval(int, int); -void order(struct filelist *, get_func_t, struct field *); +void order(struct filelist *, struct field *); void putline(const RECHEADER *, FILE *); void putrec(const RECHEADER *, FILE *); void putkeydump(const RECHEADER *, FILE *); Index: src/usr.bin/sort/tmp.c diff -u src/usr.bin/sort/tmp.c:1.14 src/usr.bin/sort/tmp.c:1.15 --- src/usr.bin/sort/tmp.c:1.14 Sat Aug 15 09:48:46 2009 +++ src/usr.bin/sort/tmp.c Sat Sep 26 21:16:55 2009 @@ -1,4 +1,4 @@ -/* $NetBSD: tmp.c,v 1.14 2009/08/15 09:48:46 dsl Exp $ */ +/* $NetBSD: tmp.c,v 1.15 2009/09/26 21:16:55 dsl Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. @@ -64,7 +64,7 @@ #include <sys/cdefs.h> #ifndef lint -__RCSID("$NetBSD: tmp.c,v 1.14 2009/08/15 09:48:46 dsl Exp $"); +__RCSID("$NetBSD: tmp.c,v 1.15 2009/09/26 21:16:55 dsl Exp $"); __SCCSID("@(#)tmp.c 8.1 (Berkeley) 6/6/93"); #endif /* not lint */ @@ -101,7 +101,8 @@ err(2, "ftmp: mkstemp(\"%s\")", path); if (!(fp = fdopen(fd, "w+"))) err(2, "ftmp: fdopen(\"%s\")", path); - (void)unlink(path); + if (!DEBUG('t')) + (void)unlink(path); (void)sigprocmask(SIG_SETMASK, &oset, NULL); return (fp);