[hackers] [sbase][PATCH] ed: Fix j command bugs
Remove a double free() that caused joins to randomly segfault. Fix a bug in delete() that occasionally caused lines to be transposed. --- ed.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ed.c b/ed.c index e737d57..f6e195f 100644 --- a/ed.c +++ b/ed.c @@ -781,7 +781,7 @@ delete(int from, int to) lfrom = getindex(prevln(from)); lto = getindex(nextln(to)); lastln -= to - from + 1; - curln = (from > lastln) ? lastln : from;; + curln = prevln(from); relink(lto, lfrom, lto, lfrom); } @@ -819,7 +819,6 @@ join(void) size_t len = 0, cap = 0; static char *s; - free(s); for (s = NULL, i = line1;; i = nextln(i)) { for (t = gettxt(i); (c = *t) != '\n'; ++t) s = addchar(*t, s, &cap, &len); -- 2.9.0
[hackers] [sbase][PATCH] Rework l command for the Unicode world.
Rather than printing byte sequences for any non-ASCII characters, printable (via isprintrune()) Unicode characters are displayed normally. The usual \$, \t, \b and \\ escapes are displayed, but other non-printing characters are replaced with a Unicode escape (\u). This may be controversial, as it contradicts POSIX. Rationale: * Replacing printing non-ASCII runes with byte sequences is pointless. There is no reason to escape multibyte characters. * UTF-8 sequences should not be printed. It is far more useful to decode the sequence and print the Unicode code point. '\u2028' is much easier to understand than '\xe2\x80\xa8'--we are not forced to decode the transformation format. --- ed.c | 72 +--- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/ed.c b/ed.c index 4b28848..e737d57 100644 --- a/ed.c +++ b/ed.c @@ -13,6 +13,7 @@ #include #include +#include "utf.h" #include "util.h" #define REGEXSIZE 100 @@ -653,48 +654,53 @@ doread(const char *fname) } static void +lprint(char *s) +{ + int size; + Rune r; + + while ((size = chartorune(&r, s)) > 0 && r != '\n') { + switch (r) { + case '$': + fputs("\\$", stdout); + break; + case '\t': + fputs("\\t", stdout); + break; + case '\b': + fputs("\\b", stdout); + break; + case '\\': + fputs("", stdout); + break; + default: + if (!isprintrune(r)) + printf("\\u%04x", 0x & r); + else + fputrune(&r, stdout); + } + s += size; + } +} + +static void doprint(void) { - int i, c; - char *s, *str; + int i; + char *s; if (line1 <= 0 || line2 > lastln) error("incorrect address"); for (i = line1; i <= line2; ++i) { if (pflag == 'n') printf("%d\t", i); - for (s = gettxt(i); (c = *s) != '\n'; ++s) { - if (pflag != 'l') - goto print_char; - switch (c) { - case '$': - str = "\\$"; - goto print_str; - case '\t': - str = "\\t"; - goto print_str; - case '\b': - str = "\\b"; - goto print_str; - case '\\': - str = ""; - goto print_str; - default: - if (!isprint(c)) { - printf("\\x%x", 0xFF & c); - break; - } - print_char: - putchar(c); - break; - print_str: - fputs(str, stdout); - break; - } + s = gettxt(i); + if (pflag == 'l') { + lprint(s); + fputs("$\n", stdout); + } else { + fputs(s, stdout); } - if (pflag == 'l') - fputs("$", stdout); - putc('\n', stdout); } curln = i - 1; } -- 2.9.0
[hackers] [sbase] [PATCH] ed: Do not try to read-in a nonexistant file
This fixes a segfault caused by running ed with a nonexistant filename argument, e.g. 'ed not_a_file_yet'. --- ed.c | 39 ++- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/ed.c b/ed.c index 8903957..599e575 100644 --- a/ed.c +++ b/ed.c @@ -609,26 +609,31 @@ doread(char *fname) if (fp) fclose(fp); - if (!(fp = fopen(fname, "r"))) - error("input/output error"); - curln = line2; - for (cnt = 0; (n = getline(&s, &len, fp)) > 0; cnt += (size_t)n) { - if (s[n-1] != '\n') { - if (len == SIZE_MAX || !(p = realloc(s, ++len))) - error("out of memory"); - s = p; - s[n-1] = '\n'; - s[n] = '\0'; + if (access(fname, F_OK)) { + fprintf(stderr, "?%s\n", fname); /* new file */ + } else { + if (!(fp = fopen(fname, "r"))) + error("input/output error"); + + curln = line2; + for (cnt = 0; (n = getline(&s, &len, fp)) > 0; cnt += (size_t)n) { + if (s[n-1] != '\n') { + if (len == SIZE_MAX || !(p = realloc(s, ++len))) + error("out of memory"); + s = p; + s[n-1] = '\n'; + s[n] = '\0'; + } + inject(s); } - inject(s); + printf("%zu\n", cnt); + + aux = fp; + fp = NULL; + if (fclose(aux)) + error("input/output error"); } - printf("%zu\n", cnt); - - aux = fp; - fp = NULL; - if (fclose(aux)) - error("input/output error"); if (savfname[0] == '\0') { modflag = 0; -- 2.3.5
[hackers] [sbase] [PATCH] tail: Don't print garbage when input contains no newlines.
getline(3) expects newline-terminated input. While glibc's implementation seems to catch unterminated input and zero the buffer, other versions (notably musl's) do not. This is a workaround. Garbage will still be read, but not printed. --- tail.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tail.c b/tail.c index 306bbc1..417289f 100644 --- a/tail.c +++ b/tail.c @@ -39,6 +39,7 @@ taketail(FILE *fp, const char *str, size_t n) Rune *r = NULL; char **ring = NULL; size_t i, j, *size = NULL; + int seenln = 0; if (!n) return; @@ -47,7 +48,7 @@ taketail(FILE *fp, const char *str, size_t n) ring = ecalloc(n, sizeof(*ring)); size = ecalloc(n, sizeof(*size)); - for (i = j = 0; getline(ring + i, size + i, fp) > 0; ) + for (i = j = 0; getline(ring + i, size + i, fp) > 0; seenln = 1) i = j = (i + 1) % n; } else { r = ecalloc(n, sizeof(*r)); @@ -59,7 +60,7 @@ taketail(FILE *fp, const char *str, size_t n) eprintf("%s: read error:", str); do { - if (ring && ring[j]) { + if (seenln && ring && ring[j]) { fputs(ring[j], stdout); free(ring[j]); } else if (r) { -- 2.3.5
[hackers] [sbase] [PATCH] comm: Print first trailing unpaired line in file 1
Previously, a line read from file 1 before a strcmp was performed would be overwritten and lost. Something like this: comm one_line_file empty_file produced no output. This patch is a bit inelegant, but quite simple. --- comm.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/comm.c b/comm.c index e0ec7b7..138b5ea 100644 --- a/comm.c +++ b/comm.c @@ -33,7 +33,7 @@ main(int argc, char *argv[]) { FILE *fp[2]; size_t linelen[2] = { 0, 0 }; - int ret = 0, i, diff = 0; + int ret = 0, i, diff = 0, seenline = 0; char *line[2] = { NULL, NULL }; ARGBEGIN { @@ -62,11 +62,13 @@ main(int argc, char *argv[]) for (i = 0; i < 2; i++) { if (diff && i == (diff < 0)) continue; - if (getline(&line[i], &linelen[i], fp[i]) > 0) + if (getline(&line[i], &linelen[i], fp[i]) > 0) { + seenline = 1; continue; + } if (ferror(fp[i])) eprintf("getline %s:", argv[i]); - if (diff && line[!i][0]) + if ((diff || seenline) && line[!i][0]) printline(!i, line[!i]); while (getline(&line[!i], &linelen[!i], fp[!i]) > 0) printline(!i, line[!i]); @@ -76,6 +78,7 @@ main(int argc, char *argv[]) } diff = strcmp(line[0], line[1]); LIMIT(diff, -1, 1); + seenline = 0; printline((2 - diff) % 3, line[MAX(0, diff)]); } end: -- 2.3.5
[hackers] [sbase] [PATCH] join: Use LIMIT macro
--- join.c | 5 + 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/join.c b/join.c index 1f2fb8c..ddccdf1 100644 --- a/join.c +++ b/join.c @@ -173,10 +173,7 @@ linecmp(struct line *la, struct line *lb, size_t jfa, size_t jfb) } else { status = memcmp(la->fields[jfa].s, lb->fields[jfb].s, MAX (la->fields[jfa].len, lb->fields[jfb].len)); - if (status > 0) - status = 1; - else if (status < 0) - status = -1; + LIMIT(status, -1, 1); } return status; -- 2.3.5
[hackers] [9base] [PATCH] sam: Fix dprint format strings
The '%lud' format used in dprint caused 'd' to appear at the end of each line number or character range. Using %ld seems to fix this. --- sam/io.c | 2 +- sam/sam.c | 8 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sam/io.c b/sam/io.c index 8740c7c..a554b11 100644 --- a/sam/io.c +++ b/sam/io.c @@ -169,7 +169,7 @@ closeio(Posn p) close(io); io = 0; if(p >= 0) - dprint("#%lud\n", p); + dprint("#%ld\n", p); } intremotefd0 = 0; diff --git a/sam/sam.c b/sam/sam.c index f180290..e72d890 100644 --- a/sam/sam.c +++ b/sam/sam.c @@ -716,14 +716,14 @@ printposn(File *f, int charsonly) /* check if addr ends with '\n' */ if(addr.r.p2>0 && addr.r.p2>addr.r.p1 && filereadc(f, addr.r.p2-1)=='\n') --l2; - dprint("%lud", l1); + dprint("%ld", l1); if(l2 != l1) - dprint(",%lud", l2); + dprint(",%ld", l2); dprint("; "); } - dprint("#%lud", addr.r.p1); + dprint("#%ld", addr.r.p1); if(addr.r.p2 != addr.r.p1) - dprint(",#%lud", addr.r.p2); + dprint(",#%ld", addr.r.p2); dprint("\n"); } -- 2.3.5
[hackers] [sbase] [PATCH] join: Stricter parsing of -o list
This fixes naive parsing that would happily read a giant string of numbers into fileno provided the first character was correct. --- join.c | 17 ++--- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/join.c b/join.c index a013946..1f2fb8c 100644 --- a/join.c +++ b/join.c @@ -335,19 +335,14 @@ makespec(char *s) int fileno; size_t fldno; - switch (s[0]) { - case '0': /* join field */ + if (!strcmp(s, "0")) { /* join field must be 0 and nothing else */ fileno = 0; fldno = 0; - break; - case '1': case '2': - if (sscanf(s, "%d.%zu", &fileno, &fldno) != 2) - eprintf("\"%s\": invalid format\n", s); - fldno--; /* ugly */ - break; - default: - eprintf("%c: invalid file number (must be 0, 1 or 2)\n", s[0]); - break; + } else if ((s[0] == '1' || s[0] == '2') && s[1] == '.') { + fileno = s[0] - '0'; + fldno = estrtonum(&s[2], 1, MIN(LLONG_MAX, SIZE_MAX)) - 1; + } else { + eprintf("%s: invalid format\n", s); } sp = ereallocarray(NULL, INIT, sizeof(struct spec)); -- 2.3.5
Re: [hackers] [sbase][patch] find: empty line means no for -ok
emg, Quoth Evan Gates on Thu, Jun 18 2015 14:47 -0700: diff --git a/find.c b/find.c index 186263b..0de1951 100644 --- a/find.c +++ b/find.c @@ -415,10 +415,9 @@ pri_ok(struct arg *arg) reply = fgetc(stdin); /* throw away rest of line */ - while ((c = fgetc(stdin)) != '\n' && c != EOF) - /* FIXME: what if the first character of the rest of the line is a null -* byte? */ - ; + if (c != '\n') + while ((c = fgetc(stdin)) != '\n' && c != EOF) + ; I think you meant if (reply != '\n') ... Regards, -- WCM
Re: [hackers] [sbase] Better cksum patch
Carlos, Quoth Carlos Torres on Wed, Jun 17 2015 17:26 -0400: you forgot to attach the patch :) As I understand git send-email, patches are sent as replies to the intro message and not as attachments. Since FRIGN commented on the 'missing' patch, it couldn't have been lost. I don't think anything went wrong here, but correct me if I've made a mistake. Regards, -- WCM
Re: [hackers] [sbase] [PATCH 1/3] find: Fix unterminated array in -ok primary
Quoth Evan Gates on Mon, Jun 15 2015 13:51 -0700: On Mon, Jun 15, 2015 at 12:27 PM, Wolfgang Corcoran-Mathe wrote: --- find.c | 1 + 1 file changed, 1 insertion(+) diff --git a/find.c b/find.c index dedf5a1..a870a90 100644 --- a/find.c +++ b/find.c @@ -429,6 +429,7 @@ pri_ok(struct arg *arg) /* insert filename everywhere user gave us {} */ for (brace = o->braces; *brace; brace++) **brace = arg->path; + *brace = NULL; switch((pid = fork())) { case -1: -- 2.3.5 Not needed there, *brace will already be NULL or the loop wouldn't have stopped (the cause of the segfault). Needed in get_ok_arg() so that it is NULL when we get here. See attached. It occurred to me shortly after sending that patch that this should have been in get_ok_arg(), as with get_exec_arg(). But without this patch (or yours), I get a segfault when the end of the arg list is reached. Your patch is obviously the right one, but I would like to understand why both seem to have the same effect. There is another semi-bug in -ok's input parsing. If the char read into reply in pri_ok() at line 415 is a newline the user will have to enter another newline to skip the file. This seems clumsy, but it might be what you want. Regards, -- WCM
[hackers] [sbase] [PATCH 1/3] find: Fix unterminated array in -ok primary
--- find.c | 1 + 1 file changed, 1 insertion(+) diff --git a/find.c b/find.c index dedf5a1..a870a90 100644 --- a/find.c +++ b/find.c @@ -429,6 +429,7 @@ pri_ok(struct arg *arg) /* insert filename everywhere user gave us {} */ for (brace = o->braces; *brace; brace++) **brace = arg->path; + *brace = NULL; switch((pid = fork())) { case -1: -- 2.3.5
[hackers] [sbase] [PATCH 2/3] find: Fix flushing input buffer with -ok
The original flush-stdin loop (with fgets()) hung until the user entered some extraneous characters for it to kill. emg's FIXME about nulls still applies. --- find.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/find.c b/find.c index a870a90..3871105 100644 --- a/find.c +++ b/find.c @@ -408,16 +408,16 @@ pri_ok(struct arg *arg) { int status, reply; pid_t pid; - char ***brace, buf[256]; + char ***brace, c; struct okarg *o = arg->extra.p; fprintf(stderr, "%s: %s ?", *o->argv, arg->path); reply = fgetc(stdin); /* throw away rest of line */ - while (fgets(buf, sizeof(buf), stdin) && *buf && buf[strlen(buf) - 1] == '\n') + while ((c = fgetc(stdin)) != '\n' && c != EOF) /* FIXME: what if the first character of the rest of the line is a null -* byte? probably shouldn't juse fgets() */ +* byte? */ ; if (feof(stdin)) /* FIXME: ferror()? */ -- 2.3.5
Re: [hackers] [sbase] [PATCH] find
emg, I noticed a few other things in get_ok_arg(), primarily that the fgets() flushing loop was causing find to hang mysteriously. Patch 2/3 should fix this, hopefully. The isplus branch seems to work, based on limited testing. I've enjoyed reading your code, btw. -- WCM
[hackers] [sbase] [PATCH 3/3] find: Improve prompt spacing with -ok
--- find.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/find.c b/find.c index 3871105..4ce150f 100644 --- a/find.c +++ b/find.c @@ -411,7 +411,7 @@ pri_ok(struct arg *arg) char ***brace, c; struct okarg *o = arg->extra.p; - fprintf(stderr, "%s: %s ?", *o->argv, arg->path); + fprintf(stderr, "%s: %s ? ", *o->argv, arg->path); reply = fgetc(stdin); /* throw away rest of line */ -- 2.3.5
[hackers] [sbase] [PATCH] find: Fix unterminated array
This caused a segfault with semicolon-terminated -exec primaries. --- find.c | 1 + 1 file changed, 1 insertion(+) diff --git a/find.c b/find.c index dcefca5..dedf5a1 100644 --- a/find.c +++ b/find.c @@ -607,6 +607,7 @@ get_exec_arg(char *argv[], union extra *extra) for (arg = argv, braces = e->u.s.braces; *arg; arg++) if (!strcmp(*arg, "{}")) *braces++ = arg; + *braces = NULL; } gflags.print = 0; return arg; -- 2.3.5
[hackers] [sbase] [PATCH] cksum: Skip files with read errors and continue
Previously, 'cksum *' exited early if * contained a directory or other file causing an fread() error. Exit status is set to indicate an error has occurred. --- cksum.c | 9 ++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cksum.c b/cksum.c index 3355b4c..570ca81 100644 --- a/cksum.c +++ b/cksum.c @@ -5,6 +5,7 @@ #include "util.h" +static int ret = 0; static const unsigned long crctab[] = { 0x, 0x04c11db7, 0x09823b6e, 0x0d4326d9, 0x130476dc, 0x17c56b6b, 0x1a864db2, 0x1e475005, 0x2608edb8, 0x22c9f00f, 0x2f8ad6d6, @@ -71,8 +72,11 @@ cksum(FILE *fp, const char *s) ck = (ck << 8) ^ crctab[(ck >> 24) ^ buf[i]]; len += n; } - if (ferror(fp)) - eprintf("fread %s:", s ? s : ""); + if (ferror(fp)) { + weprintf("fread %s:", s ? s : ""); + ret = 1; + return; + } for (i = len; i; i >>= 8) ck = (ck << 8) ^ crctab[(ck >> 24) ^ (i & 0xFF)]; @@ -89,7 +93,6 @@ int main(int argc, char *argv[]) { FILE *fp; - int ret = 0; argv0 = argv[0], argc--, argv++; -- 2.3.5
Re: [hackers] [sbase] [PATCH] cksum: Skip files with read errors and continue
Patch redone with global ret. Thanks, FRIGN. -- WCM
[hackers] [sbase] [PATCH] cksum: Skip files with read errors and continue
Previously, 'cksum *' would exit early if * contained any directories or other files causing fread() errors. Exit status is set to indicate an error has occurred. --- cksum.c | 11 +++ 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/cksum.c b/cksum.c index 3355b4c..179ab05 100644 --- a/cksum.c +++ b/cksum.c @@ -59,7 +59,7 @@ static const unsigned long crctab[] = { 0x, 0xa2f33668, 0xbcb4666d, 0xb8757bda, 0xb5365d03, 0xb1f740b4 }; -static void +static int cksum(FILE *fp, const char *s) { size_t len = 0, i, n; @@ -71,8 +71,10 @@ cksum(FILE *fp, const char *s) ck = (ck << 8) ^ crctab[(ck >> 24) ^ buf[i]]; len += n; } - if (ferror(fp)) - eprintf("fread %s:", s ? s : ""); + if (ferror(fp)) { + weprintf("fread %s:", s ? s : ""); + return 1; + } for (i = len; i; i >>= 8) ck = (ck << 8) ^ crctab[(ck >> 24) ^ (i & 0xFF)]; @@ -83,6 +85,7 @@ cksum(FILE *fp, const char *s) fputs(s, stdout); } putchar('\n'); + return 0; } int @@ -105,7 +108,7 @@ main(int argc, char *argv[]) ret = 1; continue; } - cksum(fp, *argv); + ret |= cksum(fp, *argv); if (fp != stdin && fshut(fp, *argv)) ret = 1; } -- 2.3.5
[hackers] [sbase] Better cksum patch
I went too quickly with the last patch. This version sets the exit status properly when a file causes a read error, then continues to the next file. Sorry for patch spam. -- WCM
[hackers] [sbase] [PATCH] cksum: Skip files with errors and continue
Previously, 'cksum *' would exit early if * contained a directory or any other file causing a read error. --- cksum.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cksum.c b/cksum.c index 3355b4c..3b4f4f8 100644 --- a/cksum.c +++ b/cksum.c @@ -71,8 +71,10 @@ cksum(FILE *fp, const char *s) ck = (ck << 8) ^ crctab[(ck >> 24) ^ buf[i]]; len += n; } - if (ferror(fp)) - eprintf("fread %s:", s ? s : ""); + if (ferror(fp)) { + weprintf("fread %s:", s ? s : ""); + return; + } for (i = len; i; i >>= 8) ck = (ck << 8) ^ crctab[(ck >> 24) ^ (i & 0xFF)]; -- 2.3.5
Re: [hackers] [sbase] [PATCH 2/2] join: Fix typo
Quoth FRIGN on Sat, Jun 06 2015 22:40 +0200: Next time, please provide the patch with git format-patch, then authorship will be preserved. Thanks, noted. -- Wolfgang Corcoran-Mathe
Re: [hackers] [sbase] [PATCH 1/2] join: get rid of strlen--fwrite barbarity
FRIGN, Quoth FRIGN on Sat, Jun 06 2015 22:19 +0200: I have to admit, that it's not easy to decide which one is better. On the one hand, I favor good data structures and having the length at hand is a good thing. On the other hand, join is completed feature-wise, so why not just store the length on the heap locally in the function and be done with it? The original is probably better, especially since the patched version doesn't seem to be quicker. The length is only needed for that fwrite() call, anyway; it seems best to keep single-purpose values out of a heavily-used data structure. Patch withdrawn! I'm sorry if it was a waste of time. I appreciate your opinions on this. -- Wolfgang Corcoran-Mathe
[hackers] [sbase] [PATCH 2/2] join: Fix typo
This was causing some mysterious output bugs. -- Wolfgang Corcoran-Mathe diff --git a/join.c b/join.c index f682023..76f9ff5 100644 --- a/join.c +++ b/join.c @@ -141,7 +141,7 @@ prjoin(struct line *la, struct line *lb, size_t jfa, size_t jfb) for (i = 0; i < lb->nf; i++) { if (i != jfb) { prfield(&lb->fields[i]); - if (i < la->nf - 1) + if (i < lb->nf - 1) prsep(); } }
[hackers] [sbase] [PATCH 1/2] join: get rid of strlen--fwrite barbarity
join: Store string length in the line struct This gets rid of a barbarous strlen()/fwrite() construct. -- Wolfgang Corcoran-Mathe diff --git a/join.c b/join.c index b2fd07e..f682023 100644 --- a/join.c +++ b/join.c @@ -29,6 +29,7 @@ struct field { struct line { char *text; + size_t len; size_t nf; size_t maxf; struct field *fields; @@ -152,9 +153,7 @@ prjoin(struct line *la, struct line *lb, size_t jfa, size_t jfb) static void prline(struct line *lp) { - size_t len = strlen(lp->text); - - if (fwrite(lp->text, 1, len, stdout) != len) + if (fwrite(lp->text, 1, lp->len, stdout) != lp->len) eprintf("fwrite:"); putchar('\n'); @@ -218,6 +217,7 @@ makeline(char *s, size_t len) lp = ereallocarray(NULL, INIT, sizeof(struct line)); lp->text = s; + lp->len = len; lp->fields = ereallocarray(NULL, INIT, sizeof(struct field)); lp->nf = 0; lp->maxf = INIT;