For the last few days I've been looking at csplit again, trying to get tests implemented. I added about a dozen in the attached patch, did catch a handful regressions I didn't notice when I was making this (was manually testing because I didn't know how to use the test suite), so I guess it's doing it's job?
There are some insane trailing newline shenanigans, csplit doesn't print a trailing newline only on the last line of the last file, which is.... Certainly behavior, also certainly annoying to get working around a REPL. I got it working with the side effect of %exclude% rules still not working because the trailing newline printing mechanism and the rule processing logic do NOT like each other. Delay processing was also broken for /regex/ rules, so I fixed that. In any event, csplit.c with this patch applied is considerably better than without. It's a start and I have more debugging work which I'll do. But I've already spent a few days on this and have fixed more regressions then are still left. - Oliver Webb <[email protected]>
From a5a855ba851258314a1df105876bd0489cc6c9f0 Mon Sep 17 00:00:00 2001 From: Oliver Webb <[email protected]> Date: Sun, 17 Mar 2024 17:12:30 -0500 Subject: [PATCH] csplit: Test suite, newline shenanigans, fixed (some of the) regressions --- tests/csplit.test | 26 ++++++++++++++++ toys/pending/csplit.c | 69 ++++++++++++++++++++++--------------------- 2 files changed, 62 insertions(+), 33 deletions(-) create mode 100644 tests/csplit.test diff --git a/tests/csplit.test b/tests/csplit.test new file mode 100644 index 00000000..8dc12b92 --- /dev/null +++ b/tests/csplit.test @@ -0,0 +1,26 @@ +#!/bin/bash + +[ -f testing.sh ] && . testing.sh + +# testing "name" "command" "result" "infile" "stdin" + +testcmd "int" "- 5 >/dev/null && cat xx00; echo ==; cat xx01" \ + "1\n2\n3\n4\n==\n5" "" "$(seq 5)" +testcmd "-s" "-s - 5 && cat xx00; echo ==; cat xx01" \ + "1\n2\n3\n4\n==\n5" "" "$(seq 5)" +testcmd "-k" "-sk - 20 2>/dev/null || cat xx00" \ + "1\n2\n3\n4\n5" "" "$(seq 5)" +testcmd "-f" "-sf f - 5 && cat f00; echo ==; cat f01" \ + "1\n2\n3\n4\n==\n5" "" "$(seq 5)" +testcmd "-n" "-s -n 1 - 5 && cat xx0; echo ==; cat xx1" \ + "1\n2\n3\n4\n==\n5" "" "$(seq 5)" +testcmd "/regex/" "-s - /3/ && cat xx00; echo ==; cat xx01" \ + "1\n2\n==\n3\n4\n5" "" "$(seq 5)" +testcmd "/regex/delay" "-s - /3/2 && cat xx00; echo ==; cat xx01" \ + "1\n2\n3\n4\n==\n5" "" "$(seq 5)" +testcmd "{*}" "-s - '/[03]$/' '{*}' && cat xx00 && echo == && cat xx02"\ + "1\n2\n==\n10\n11\n12" "" "$(seq 12)" +testcmd "%exclude%" "-k - 2 %4% 7 -s && cat xx0{0..2}" \ + "1\n4\n5\n6\n7\n8\n9\n10" "" "$(seq 10)" +testcmd "%exclude%delay" "-k - 2 %4%2 7 -s && cat xx0{0..2}" \ + "1\n6\n7\n8\n9\n10" "" "$(seq 10)" diff --git a/toys/pending/csplit.c b/toys/pending/csplit.c index c659730e..b5a599d5 100644 --- a/toys/pending/csplit.c +++ b/toys/pending/csplit.c @@ -12,24 +12,23 @@ config CSPLIT bool "csplit" default n help - usage: csplit [-ks] [-f PREFIX] [-n INTEGER] file arg... + usage: csplit [-ks] [-f PREFIX] [-n N] file arg... Split files into multiple files based on list of rules -k Does not delete Files on error -s No file output size messages - -f [PREFIX] Use [PREFIX] as filename prefix instead of "xx" - -n [INTEGER] Make all filename numbers [INTEGER] characters long + -f [PREFIX] Use [PREFIX] as filename prefix instead of "xx" + -n N Make all filename numbers N characters long Valid Rules: - /regexp/[INTEGER] Break file before line that regexp matches, - %regexp%[INTEGER] Exclude untill line matches regexp - If a offset is specified for these rules, the break will happen [INTEGER] - lines after the regexp match - if a offset is specified, it will break at [INTEGER] lines after the offset - [INTEGER] Break file at line before [INTEGER] - {INTEGER} Repeat Previous Pattern INTEGER Number of times if INTEGER is * - The pattern repeats forever + /regexp/[N] Break file before line that regexp matches, + %regexp%[N] Exclude untill line matches regexp + If N is specified for these rules, the break will happen N lines after + the regexp match + N Break file at line before N + {N} Repeat Previous Pattern N Number of times if N is * The pattern + repeats forever */ #define FOR_csplit @@ -43,7 +42,7 @@ GLOBALS( char *filefmt, *prefix; // Variables the context checker need to track between lines size_t btc, tmp; - int offset, withld, inf; + int delay, withld, inf; ) static _Noreturn void abrt(char *err) @@ -52,7 +51,7 @@ static _Noreturn void abrt(char *err) if (!FLAG(k)) for (; TT.indx>=1; TT.indx--) remove(xmprintf(TT.filefmt, TT.prefix, TT.findx)); - error_exit("%s\n", err); + error_exit("%s", err); } static int rgmatch(char *rxrl, char *line, char *fmt) @@ -60,11 +59,15 @@ static int rgmatch(char *rxrl, char *line, char *fmt) regex_t rxp; int rr; - sscanf(rxrl, fmt, toybuf, &TT.offset); + sscanf(rxrl, fmt, toybuf, &TT.delay); xregcomp(&rxp, toybuf, 0); rr = regexec(&rxp, line, 0, 0, 0); + if (TT.delay != -1) { + if (!TT.withld) TT.delay++; + return 0; + } + if (rr == REG_NOMATCH) return 0; if (!rr) return 1; - else if (rr == REG_NOMATCH) return 0; abrt("bad regex"); } @@ -73,25 +76,24 @@ static int cntxt(char *line, char *rule) size_t llv; if (TT.indx == toys.optc) return 0; - if (TT.offset < 0); - else if (TT.offset == 0) { - TT.offset = -1; + if (TT.delay < 0); + else if (!TT.delay) { + TT.delay = -1; return 1; } else { - TT.offset--; + TT.delay--; return 0; } switch (rule[0]) { case '/': - return rgmatch(rule, line, "/%[^/%]/%d"); - break; + return rgmatch(rule, line, "/%[^/%]/%u"); case '%': TT.withld = 1; - return rgmatch(rule, line, "%%%[^/%]%%%d"); + return rgmatch(rule, line, "%%%[^/%]%%%u"); case '{': if (TT.indx < 2) abrt("bad rule order"); @@ -124,10 +126,10 @@ static int cntxt(char *line, char *rule) return 0; default: - if (TT.lineno > atoll(rule)) abrt("bad rule order"); - else if (!(atoll(rule))) abrt("bad rule"); + if (!(atoll(rule))) abrt("bad rule"); + else if (TT.lineno > atoll(rule)) abrt("bad rule order"); else { - if (TT.lineno == atoll(rule)) TT.offset++; + if (TT.lineno == atoll(rule)) TT.delay++; return 0; } } @@ -138,25 +140,26 @@ void csplit_main(void) FILE *actvfile; FILE *fin = (*toys.optargs[0] != '-') ? xfopen(toys.optargs[0], "r") : stdin; char *line; - size_t filesize = 0; + size_t filesize = 0, fl = 1; TT.indx = TT.lineno = 1; - TT.tmp = TT.offset = -1; + TT.tmp = TT.delay = -1; // -f and -n formatting - TT.filefmt = xmprintf("%%s%%0%lud", TT.n ? TT.n : 2); + TT.filefmt = xmprintf("%%s%%0%lud", TT.n ? : 2); TT.prefix = TT.f ? TT.f : "xx"; actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+"); for (; (line = xgetline(fin)); free(line)) { - TT.lineno++; - if (!TT.withld) filesize += strlen(line)+1; + TT.lineno++; fl++; + if (!TT.withld) filesize += strlen(line); if (cntxt(line, toys.optargs[TT.indx])) { if (!TT.withld) { + fputc('\n', actvfile); fclose(actvfile); if (!FLAG(s)) printf("%ld\n", filesize); - filesize = 0; + filesize = 0; fl = 2; TT.findx++; actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+"); } @@ -164,10 +167,10 @@ void csplit_main(void) TT.indx++; TT.withld = 0; } - if (!TT.withld) fprintf(actvfile, "%s\n", line); + if (!TT.withld) fprintf(actvfile, fl > 2 ? "\n%s" : "%s", line); } if (!FLAG(s)) printf("%ld\n", filesize); // Abort Case: Not All Rules Processed - if (!((TT.indx == toys.optc) || TT.inf)) abrt("Rules not processed"); + if ((TT.indx != toys.optc) && !TT.inf) abrt("Rules not processed"); } -- 2.44.0
_______________________________________________ Toybox mailing list [email protected] http://lists.landley.net/listinfo.cgi/toybox-landley.net
