After looking at it more, I decided the slist linked list is too
complicated. It requires the pfield allocation and free every line, and
associated logic, just to keep track of what fields had already been
visited.

I changed things to keep the list of intervals in toybuf, and to clean
up the list so we don't have to keep track of fields already visited.

I also rewrote the do_*cut functions. They're much simpler now, and I
think easier to understand.

As a bonus, there's less heap usage and churn, an array has better
locality of reference than a linked list, and the O(n^2) insertion sort
of the list parsing has turned into (presumably) O(n log n) of qsort.

Note that holding the list in toybuf does limit the number of intervals
to 512 -- 4096/(2*sizeof(int)). If this is a problem, it's a
straightforward change to count commas and malloc exactly how much
space we'd need.

This passes all tests in the suite. I'm not 100% confident I didn't
miss an off-by-one or boundary condition somewhere, because the tests
aren't very thorough.

I figured it's a significant enough change to add my name to the
copyright. If you disagree, just remove it.
From 524942352b75f8c821a5878247bdd249d0e6827b Mon Sep 17 00:00:00 2001
From: "Daniel K. Levy" <alliede...@gmail.com>
Date: Fri, 17 Feb 2017 16:33:41 -0600
Subject: [PATCH] cut: simplify by replacing slist.

Store interval list as a flat array in toybuf instead.
Remove add_to_list function; add interval_cmp so we can use qsort.
Clean up interval list so we don't have to check if we've already
visited a field.
Rewrite do_fcut and do_bccut to simplify them.
---
 toys/posix/cut.c | 158 ++++++++++++++++++++++---------------------------------
 1 file changed, 63 insertions(+), 95 deletions(-)

diff --git a/toys/posix/cut.c b/toys/posix/cut.c
index 3f2ac54..a45ec0a 100644
--- a/toys/posix/cut.c
+++ b/toys/posix/cut.c
@@ -2,6 +2,7 @@
  *
  * Copyright 2012 Ranjan Kumar <ranjankumar....@gmail.com>
  * Copyright 2012 Kyungwan Han <asura...@gmail.com>
+ * Copyright 2017 Daniel K. Levy <alliede...@gmail.com>
  *
  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
  *
@@ -27,154 +28,123 @@ config CUT
 #define FOR_cut
 #include "toys.h"
 
-struct slist {
-  struct slist *next;
-  int start, end;
-};
-
 GLOBALS(
   char *delim;
   char *flist;
   char *clist;
   char *blist;
 
-  struct slist *slist_head;
   unsigned nelem;
 )
 
-static void add_to_list(int start, int end)
-{
-  struct slist *current, *temp1_node;
+struct interval {
+  unsigned start, end;
+};
 
-  temp1_node = xzalloc(sizeof(struct slist));
-  temp1_node->start = start;
-  temp1_node->end = end;
+#define MIN(x,y) ((x) < (y) ? (x) : (y))
+#define MAX(x,y) ((x) > (y) ? (x) : (y))
 
-  /* Special case for the head end */
-  if (!TT.slist_head || TT.slist_head->start >= start) {
-    temp1_node->next = TT.slist_head;
-    TT.slist_head = temp1_node;
-  } else { 
-    /* Locate the node before the point of insertion */   
-    current = TT.slist_head;
-    while (current->next && current->next->start < temp1_node->start)
-        current = current->next;
-    temp1_node->next = current->next;   
-    current->next = temp1_node;
-  }
+int interval_cmp(const void *a, const void *b) {
+  const struct interval *ai = a, *bi = b;
+  if (ai->start != bi->start)
+    return ai->start < bi->start ? -1 : 1;
+  if (ai->end != bi->end)
+    return ai->end < bi->end ? -1 : 1;
+  return 0;
 }
 
-// parse list and add to slist.
+// parse list and add to interval list (in toybuf).
 static void parse_list(char *list)
 {
+  struct interval *ilist = (void *)toybuf, *cur_int;
+  unsigned nmerged = 0;
   for (;;) {
     char *ctoken = strsep(&list, ","), *dtoken;
-    int start = 0, end = INT_MAX;
+    unsigned start = 0, end = UINT_MAX;
 
     if (!ctoken) break;
     if (!*ctoken) error_exit("empty position specifier");
 
     // Get start position.
     if (*(dtoken = strsep(&ctoken, "-")))
-      start = atolx_range(dtoken, 1, INT_MAX)-1;
+      start = atolx_range(dtoken, 1, UINT_MAX)-1;
 
     // Get end position.
-    if (!ctoken) end = -1; //case e.g. 1,2,3
+    if (!ctoken) end = start; //case e.g. 1,2,3
     else if (*ctoken) {//case e.g. N-M
-      end = atolx_range(ctoken, start+1, INT_MAX)-1;
-      if(end == start) end = -1;
+      end = atolx_range(ctoken, start+1, UINT_MAX)-1;
     } else if (!*dtoken)
       error_exit("empty position specifier");
 
-    add_to_list(start, end);
+    if (TT.nelem >= sizeof(toybuf)/sizeof(*ilist))
+      error_exit("too many position specifiers");
+    ilist[TT.nelem].start = start;
+    ilist[TT.nelem].end = end;
     TT.nelem++;
   }
   // if list is missing in command line.
   if (!TT.nelem) error_exit("missing positions list");
+
+  // clean up list -- sort intervals
+  qsort(ilist, TT.nelem, sizeof(*ilist), interval_cmp);
+  // now merge intervals
+  cur_int = ilist;
+  for (int i = 1; i < TT.nelem; i++) {
+    if (ilist[i].start <= cur_int->end + 1) {
+      nmerged++;
+      cur_int->end = MAX(cur_int->end, ilist[i].end);
+    } else {
+      cur_int++;
+      cur_int->start = ilist[i].start;
+      cur_int->end = ilist[i].end;
+    }
+  }
+  TT.nelem -= nmerged;
 }
 
 // perform cut operation on the given delimiter.
 static void do_fcut(int fd, char *name)
 {
-  char *line, *buff, *pfield;
+  char *line, *buff, *field;
+  unsigned i, cur_field;
+  struct interval *ilist = (void *)toybuf;
 
   for (;(buff = line = get_line(fd)); free(line)) {
-    unsigned cpos;
-    int start, ndelimiters = -1, nprinted_fields = 0;
-    struct slist *temp_node = TT.slist_head;
-
-    //does line have any delimiter?.
-    if (!strchr(buff, *TT.delim)) {
-      //if not then print whole line and move to next line.
+    if (!strchr(line, *TT.delim)) {
       if (!(toys.optflags & FLAG_s)) xputs(buff);
       continue;
     }
 
-    pfield = xzalloc(strlen(buff) + 1);
-
-      //process list on each line.
-      for (cpos = 0; temp_node && cpos < TT.nelem && buff; cpos++) {
-        start = temp_node->start;
-        do {
-          char *field = 0;
-
-          //count number of delimeters per line.
-          while (buff && ndelimiters < start) {
-              ndelimiters++;
-              field = strsep(&buff, TT.delim);
-          }
-          //print field (if not yet printed).
-          if (!pfield[ndelimiters] && ndelimiters == start) {
-              //put delimiter.
-              if (nprinted_fields++ > 0) xputc(*TT.delim);
-              if (field) fputs(field, stdout);
-              //make sure this field won't print again.
-              pfield[ndelimiters] = (char) 0x23; //put some char at this position.
-          }
-          start++;
-          if ((temp_node->end < 0) || !buff) break;          
-        } while(start <= temp_node->end);
-        temp_node = temp_node->next;
+    cur_field = 0;
+    for (i = 0; i < TT.nelem && buff; i++) {
+      for (; (field = strsep(&buff, TT.delim)) && cur_field <= ilist[i].end;
+            cur_field++)
+      {
+        if (cur_field > ilist[0].start) xputc(*TT.delim);
+        if (cur_field >= ilist[i].start) fputs(field, stdout);
       }
+    }
     xputc('\n');
-    free(pfield);
   }
 }
 
 // perform cut operation char or byte.
 static void do_bccut(int fd, char *name)
 {
-  char *line, *buff;
+  char *line;
+  unsigned i;
+  struct interval *ilist = (void *)toybuf;
 
-  for (;(buff = line = get_line(fd)); free(line)) {
-    unsigned cpos;
-    size_t buffln = strlen(buff);
-    char *pfield = xzalloc(buffln + 1);
-    struct slist *temp_node = TT.slist_head;
+  for (;(line = get_line(fd)); free(line)) {
+    size_t lineln = strlen(line);
 
-      for (cpos = 0; temp_node && cpos < TT.nelem; cpos++) {
-        int start;
+    for (i = 0; i < TT.nelem; i++) {
+      if (ilist[i].start >= lineln) break;
 
-        start = temp_node->start;
-        while (start < buffln) {
-          //to avoid duplicate field printing.
-          if (pfield[start]) {
-              if (++start <= temp_node->end) continue;
-              temp_node = temp_node->next;
-              break;
-          } else {
-            //make sure this field won't print again.
-            pfield[start] = (char) 0x23; //put some char at this position.
-            xputc(buff[start]);
-          }
-          if (++start > temp_node->end) {
-            temp_node = temp_node->next;
-            break;
-          }
-        }
-      }
-      xputc('\n');
-    free(pfield);
+      xwrite(1, line+ilist[i].start,
+          MIN(lineln, ilist[i].end)-ilist[i].start+1);
+    }
+    xputc('\n');
   }
 }
 
@@ -184,7 +154,6 @@ void cut_main(void)
   void (*do_cut)(int, char *);
 
   TT.nelem = 0;
-  TT.slist_head = NULL;
 
   if (toys.optflags & FLAG_f) {
     list = TT.flist;
@@ -206,5 +175,4 @@ void cut_main(void)
 
   parse_list(list);
   loopfiles(toys.optargs, do_cut);
-  if (CFG_TOYBOX_FREE) llist_traverse(TT.slist_head, free);
 }
-- 
2.9.3

_______________________________________________
Toybox mailing list
Toybox@lists.landley.net
http://lists.landley.net/listinfo.cgi/toybox-landley.net

Reply via email to