For the last few days I've been looking at csplit again, trying to get tests
implemented. I added about a dozen in the attached patch, did catch a handful
regressions I didn't notice when I was making this (was manually testing
because I didn't know how to use the test suite), so I guess it's doing it's 
job?

There are some insane trailing newline shenanigans, csplit doesn't print a 
trailing newline
only on the last line of the last file, which is.... Certainly behavior, also 
certainly
annoying to get working around a REPL. I got it working with the side effect of 
%exclude%
rules still not working because the trailing newline printing mechanism and the 
rule processing
logic do NOT like each other. Delay processing was also broken for /regex/ 
rules, so I fixed that.

In any event, csplit.c with this patch applied is considerably better than 
without. It's a start and I
have more debugging work which I'll do. But I've already spent a few days on 
this and have fixed more
regressions then are still left. 

-   Oliver Webb <[email protected]>
From a5a855ba851258314a1df105876bd0489cc6c9f0 Mon Sep 17 00:00:00 2001
From: Oliver Webb <[email protected]>
Date: Sun, 17 Mar 2024 17:12:30 -0500
Subject: [PATCH] csplit: Test suite, newline shenanigans, fixed (some of the)
 regressions

---
 tests/csplit.test     | 26 ++++++++++++++++
 toys/pending/csplit.c | 69 ++++++++++++++++++++++---------------------
 2 files changed, 62 insertions(+), 33 deletions(-)
 create mode 100644 tests/csplit.test

diff --git a/tests/csplit.test b/tests/csplit.test
new file mode 100644
index 00000000..8dc12b92
--- /dev/null
+++ b/tests/csplit.test
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+[ -f testing.sh ] && . testing.sh
+
+# testing "name" "command" "result" "infile" "stdin"
+
+testcmd "int" "- 5 >/dev/null && cat xx00; echo ==; cat xx01" \
+  "1\n2\n3\n4\n==\n5" "" "$(seq 5)"
+testcmd "-s" "-s - 5 && cat xx00; echo ==; cat xx01" \
+  "1\n2\n3\n4\n==\n5" "" "$(seq 5)"
+testcmd "-k" "-sk - 20 2>/dev/null || cat xx00" \
+  "1\n2\n3\n4\n5" "" "$(seq 5)"
+testcmd "-f" "-sf f - 5 && cat f00; echo ==; cat f01" \
+  "1\n2\n3\n4\n==\n5" "" "$(seq 5)"
+testcmd "-n" "-s -n 1 - 5 && cat xx0; echo ==; cat xx1" \
+  "1\n2\n3\n4\n==\n5" "" "$(seq 5)"
+testcmd "/regex/" "-s - /3/ && cat xx00; echo ==; cat xx01" \
+  "1\n2\n==\n3\n4\n5" "" "$(seq 5)"
+testcmd "/regex/delay" "-s - /3/2 && cat xx00; echo ==; cat xx01" \
+  "1\n2\n3\n4\n==\n5" "" "$(seq 5)"
+testcmd "{*}" "-s - '/[03]$/' '{*}' && cat xx00 && echo == && cat xx02"\
+  "1\n2\n==\n10\n11\n12" "" "$(seq 12)"
+testcmd "%exclude%" "-k - 2 %4% 7 -s && cat xx0{0..2}" \
+  "1\n4\n5\n6\n7\n8\n9\n10"  "" "$(seq 10)"
+testcmd "%exclude%delay" "-k - 2 %4%2 7 -s && cat xx0{0..2}" \
+  "1\n6\n7\n8\n9\n10"  "" "$(seq 10)"
diff --git a/toys/pending/csplit.c b/toys/pending/csplit.c
index c659730e..b5a599d5 100644
--- a/toys/pending/csplit.c
+++ b/toys/pending/csplit.c
@@ -12,24 +12,23 @@ config CSPLIT
   bool "csplit"
   default n
   help
-    usage: csplit [-ks] [-f PREFIX] [-n INTEGER] file arg...
+    usage: csplit [-ks] [-f PREFIX] [-n N] file arg...
 
     Split files into multiple files based on list of rules
 
     -k	Does not delete Files on error
     -s	No file output size messages
-    -f [PREFIX] Use [PREFIX] as filename prefix instead of "xx"
-    -n [INTEGER] Make all filename numbers [INTEGER] characters long
+    -f [PREFIX]  Use [PREFIX] as filename prefix instead of "xx"
+    -n N Make all filename numbers N characters long
 
     Valid Rules:
-    /regexp/[INTEGER] Break file before line that regexp matches,
-    %regexp%[INTEGER] Exclude untill line matches regexp
-    If a offset is specified for these rules, the break will happen [INTEGER]
-    lines after the regexp match
-    if a offset is specified, it will break at [INTEGER] lines after the offset
-    [INTEGER] Break file at line before [INTEGER]
-    {INTEGER} Repeat Previous Pattern INTEGER Number of times if INTEGER is *
-    The pattern repeats forever
+    /regexp/[N] Break file before line that regexp matches,
+    %regexp%[N] Exclude untill line matches regexp
+    If N is specified for these rules, the break will happen N lines after
+    the regexp match
+    N   Break file at line before N
+    {N} Repeat Previous Pattern N Number of times if N is * The pattern
+    repeats forever
 */
 
 #define FOR_csplit
@@ -43,7 +42,7 @@ GLOBALS(
   char *filefmt, *prefix;
   // Variables the context checker need to track between lines
   size_t btc, tmp;
-  int offset, withld, inf;
+  int delay, withld, inf;
 )
 
 static _Noreturn void abrt(char *err)
@@ -52,7 +51,7 @@ static _Noreturn void abrt(char *err)
   if (!FLAG(k)) for (; TT.indx>=1; TT.indx--)
     remove(xmprintf(TT.filefmt, TT.prefix, TT.findx));
 
-  error_exit("%s\n", err);
+  error_exit("%s", err);
 }
 
 static int rgmatch(char *rxrl, char *line, char *fmt)
@@ -60,11 +59,15 @@ static int rgmatch(char *rxrl, char *line, char *fmt)
   regex_t rxp;
   int rr;
 
-  sscanf(rxrl, fmt, toybuf, &TT.offset);
+  sscanf(rxrl, fmt, toybuf, &TT.delay);
   xregcomp(&rxp, toybuf, 0);
   rr = regexec(&rxp, line, 0, 0, 0);
+  if (TT.delay != -1) {
+    if (!TT.withld) TT.delay++;
+    return 0;
+  }
+  if (rr == REG_NOMATCH) return 0;
   if (!rr) return 1;
-  else if (rr == REG_NOMATCH) return 0;
   abrt("bad regex");
 }
 
@@ -73,25 +76,24 @@ static int cntxt(char *line, char *rule)
   size_t llv;
   if (TT.indx == toys.optc) return 0;
 
-  if (TT.offset < 0);
-  else if (TT.offset == 0) {
-    TT.offset = -1;
+  if (TT.delay < 0);
+  else if (!TT.delay) {
+    TT.delay = -1;
 
     return 1;
   } else {
-    TT.offset--;
+    TT.delay--;
 
     return 0;
   }
 
   switch (rule[0]) {
     case '/':
-      return rgmatch(rule, line, "/%[^/%]/%d");
-      break;
+      return rgmatch(rule, line, "/%[^/%]/%u");
 
     case '%':
       TT.withld = 1;
-      return rgmatch(rule, line, "%%%[^/%]%%%d");
+      return rgmatch(rule, line, "%%%[^/%]%%%u");
 
     case '{':
       if (TT.indx < 2) abrt("bad rule order");
@@ -124,10 +126,10 @@ static int cntxt(char *line, char *rule)
       return 0;
 
     default:
-      if (TT.lineno > atoll(rule)) abrt("bad rule order");
-      else if (!(atoll(rule))) abrt("bad rule");
+      if (!(atoll(rule))) abrt("bad rule");
+      else if (TT.lineno > atoll(rule)) abrt("bad rule order");
       else {
-        if (TT.lineno == atoll(rule)) TT.offset++;
+        if (TT.lineno == atoll(rule)) TT.delay++;
         return 0;
       }
   }
@@ -138,25 +140,26 @@ void csplit_main(void)
   FILE *actvfile;
   FILE *fin = (*toys.optargs[0] != '-') ? xfopen(toys.optargs[0], "r") : stdin;
   char *line;
-  size_t filesize = 0;
+  size_t filesize = 0, fl = 1;
 
   TT.indx = TT.lineno = 1;
-  TT.tmp = TT.offset = -1;
+  TT.tmp = TT.delay = -1;
 
   // -f and -n formatting
-  TT.filefmt = xmprintf("%%s%%0%lud", TT.n ? TT.n : 2);
+  TT.filefmt = xmprintf("%%s%%0%lud", TT.n ? : 2);
   TT.prefix = TT.f ? TT.f : "xx";
 
   actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
   for (; (line = xgetline(fin)); free(line)) {
-    TT.lineno++;
-    if (!TT.withld) filesize += strlen(line)+1;
+    TT.lineno++; fl++;
+    if (!TT.withld) filesize += strlen(line);
 
     if (cntxt(line, toys.optargs[TT.indx])) {
       if (!TT.withld) {
+        fputc('\n', actvfile);
         fclose(actvfile);
         if (!FLAG(s)) printf("%ld\n", filesize);
-        filesize = 0;
+        filesize = 0; fl = 2;
         TT.findx++;
         actvfile = xfopen(xmprintf(TT.filefmt, TT.prefix, TT.findx), "w+");
       }
@@ -164,10 +167,10 @@ void csplit_main(void)
       TT.indx++;
       TT.withld = 0;
     }
-    if (!TT.withld) fprintf(actvfile, "%s\n", line);
+    if (!TT.withld) fprintf(actvfile, fl > 2 ? "\n%s" : "%s", line);
   }
   if (!FLAG(s)) printf("%ld\n", filesize);
 
   // Abort Case: Not All Rules Processed
-  if (!((TT.indx == toys.optc) || TT.inf)) abrt("Rules not processed");
+  if ((TT.indx != toys.optc) && !TT.inf) abrt("Rules not processed");
 }
-- 
2.44.0

_______________________________________________
Toybox mailing list
[email protected]
http://lists.landley.net/listinfo.cgi/toybox-landley.net

Reply via email to