Re: [Toybox] [PATCH] A implemetation of the 'csplit' command

Oliver Webb via Toybox Wed, 13 Sep 2023 18:21:32 -0700

Sent with Proton Mail secure email.

------- Original Message -------
On Wednesday, September 13th, 2023 at 12:56 AM, Rob Landley <r...@landley.net> 
wrote:

> On 9/12/23 23:43, Oliver Webb wrote:
> 
> > ------- Original Message -------
> > On Tuesday, September 12th, 2023 at 2:36 PM, Rob Landley r...@landley.net 
> > wrote:
> > 
> > > On 9/11/23 23:56, Oliver Webb via Toybox wrote:
> > > 
> > > > I have made a implementation of the 'csplit' command in about 160 lines 
> > > > of code.
> 
> 
> By the way, have you found anything that actually uses this? Busybox hasn't
> bothered to implement it, and line selection is a trivial function of sed,
> including regex support:

The main reason I wrote this command is because it was in POSIX and the roadmap 
of the project,
but for actual use cases it has a clearer syntax then sed does when specifying 
rules for splitting files.

One of the things I fixed in this patch is that it doesn't reset the line 
number 
every time it encounters "{NUMBER}" rules. And can do stuff like "/foobar/ {4} 
321" correctly now.

 $ yes a | head -n 8 | ./csplit - /a/ {3} 7 -s; wc -l xx0*
 0 xx00
 1 xx01
 1 xx02
 1 xx03
 3 xx04
 2 xx05
 8 total

This will probably be my last patch to the csplit command for a while. 
I have everything except negative offsets working properly. I fixed some issues 
with the
way the command breaks at line numbers. As well as adjusted some of the 
formatting for the code.
And the tracking of file sizes works by a "filesize += strlen(line);" in the 
main loop instead of
stat()'ing every file after it's written.

From 806e53f113848c579b41ee055af47ef115a11953 Mon Sep 17 00:00:00 2001
From: Oliver Webb <aquahobby...@proton.me>
Date: Wed, 13 Sep 2023 19:38:45 -0500
Subject: [PATCH] Fixed relative line numbers, code formatting, number offsets

---
 toys/pending/csplit.c | 80 ++++++++++++++++++++++++-------------------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/toys/pending/csplit.c b/toys/pending/csplit.c
index 1703afb5..8f053cca 100644
--- a/toys/pending/csplit.c
+++ b/toys/pending/csplit.c
@@ -44,31 +44,36 @@ GLOBALS(
   char *f;
 )
 
-size_t indx = 1, findx = 0, lineno = 1, btc = 0;
-int eg = 0, offset = -1, withld = 0;
+size_t indx = 1, findx = 0, lineno = 1;
 char *filefmt, *flname, *prefix;
+// Variables the context checker need to track between lines
+size_t btc = 0, tmp = -1;
+int offset = -1, withld = 0, inf = 0;
 
 // This is only int so we can exit cleanly in ternary operators
-int abort_csplit(char *err) {
+int abrt(char *err) {
   // Cycle down through index instead of keeping track of what files we made
   if (!FLAG(k)) for (; indx>=1; indx--)
 	remove(xmprintf(filefmt, prefix, findx));
+
   error_exit("%s\n", err);
   return 1;
 }
 
 int rgmatch(char *rxrl, char *line, char *fmt) {
   regex_t rxp;
+  int rr;
   sscanf(rxrl,fmt, toybuf, &offset);
   xregcomp(&rxp, toybuf, 0);
-  int rr = regexec(&rxp, line, 0, 0, 0);
+  rr = regexec(&rxp, line, 0, 0, 0);
   if (!rr) return 1;
   else if (rr == REG_NOMATCH) return 0;
-  return abort_csplit("bad regex");
+  return abrt("bad regex");
 }
 
 int cntxt(char *line, char *rule) {
-  if (eg) return 0;
+  size_t llv;
+  if (indx == toys.optc) return 0;
 
   if (offset < 0);
   else if (offset == 0) {
@@ -90,14 +95,23 @@ int cntxt(char *line, char *rule) {
 	  break;
 
 	case '{':
+	  if (indx < 2) abrt("bad rule order");
+
 	  // GNU extention: {*}
-	  if (!strcmp(rule,"{*}"))
+	  if (!strcmp(rule,"{*}")){ 
 		btc = -1;
-	  else if (!sscanf(rule,"{%lu}",&btc))
-		abort_csplit("bad rule");
-
-	  // Reset the lineno so we can do things like "10 {*}"
-	  lineno = 1;
+		inf = 1;
+	  } else if (!sscanf(rule,"{%lu}",&btc))
+		abrt("bad rule");
+
+	  if (tmp == -1) tmp = lineno;
+	  if ((llv = atoll(toys.optargs[indx-1]))) {
+		if (((lineno-tmp) % llv+1) == llv) {
+		  tmp = -1;
+		  indx--;
+		  return 1;
+		} else return 0;
+	  }
 
 	  if (cntxt(line, toys.optargs[indx-1])) {
 		// Manipulate the rule then return to it later so we create a
@@ -113,12 +127,13 @@ int cntxt(char *line, char *rule) {
 	  break;
 
 	default:
-	 if (lineno > ((size_t)atoll(rule))) {
-	   abort_csplit("bad rule order");
+	 if (lineno > atoll(rule)) {
+	   abrt("bad rule order");
 	 } else if (!(atoll(rule))) {
-	   abort_csplit("bad rule");
+	   abrt("bad rule");
 	 } else {
-	   return (lineno == (size_t)atoll(rule));
+	   if (lineno == atoll(rule)) offset++;
+	   return 0;
 	 }
 	 break;
   }
@@ -130,44 +145,37 @@ int cntxt(char *line, char *rule) {
 
 void csplit_main(void)
 {
+  FILE *actvfile;
   FILE *fin = (*toys.optargs[0] != '-') ? xfopen(toys.optargs[0], "r") : stdin;
-
-  struct stat st;
+  char *line;
+  size_t filesize = 0;
 
   // -f and -n formatting
-  filefmt = xmprintf("%%s%%0%dd", TT.n ? (int)TT.n : 2);
+  filefmt = xmprintf("%%s%%0%lud", TT.n ? TT.n : 2);
   prefix = TT.f ? TT.f : "xx";
 
-  flname = xmprintf(filefmt, prefix, findx);
-  FILE *actvfile = xfopen(flname, "w+");
-  for (char *line; (line = xgetline(fin)); free(line)) {
+  actvfile = xfopen(xmprintf(filefmt, prefix, findx), "w+");
+  for (; (line = xgetline(fin)); free(line)) {
 	lineno++;
+	filesize += strlen(line)+1;
+
 	if (cntxt(line, toys.optargs[indx])) {
 
 	  if (!withld) {
 		fclose(actvfile);
-		if (!FLAG(s)) {
-		  stat(flname, &st);
-		  printf("%ld\n", st.st_size);
-		}
+		if (!FLAG(s)) printf("%ld\n", filesize);
+		filesize = 0;
 		findx++;
-		flname = xmprintf(filefmt, prefix, findx);
-		actvfile = xfopen(flname, "w+");
+		actvfile = xfopen(xmprintf(filefmt, prefix, findx), "w+");
 	  }
 
 	  indx++;
 	  withld = 0;
-	  if (indx == toys.optc) eg = 1;
 	}
 	if (!withld) fprintf(actvfile, "%s\n", line);
   }
-
-  fclose(actvfile);
-  if (!FLAG(s)) {
-	stat(flname, &st);
-	printf("%ld\n", st.st_size);
-  }
+  if (!FLAG(s)) printf("%ld\n", filesize);
 
   // Abort Case: Not All Rules Processed
-  if (indx < toys.optc-1) abort_csplit("Rules not processed");
+  if (!((indx == toys.optc) || inf)) abrt("Rules not processed");
 }
-- 
2.34.1

_______________________________________________
Toybox mailing list
Toybox@lists.landley.net
http://lists.landley.net/listinfo.cgi/toybox-landley.net

Re: [Toybox] [PATCH] A implemetation of the 'csplit' command

Reply via email to