From 8f76b94de437d88409860589002f31cfc6f0136c Mon Sep 17 00:00:00 2001
From: Ray Gardner <raygard@gmail.com>
Date: Thu, 8 Aug 2024 17:47:59 -0600
Subject: [PATCH 3/7] Mods to make "interactive input" work with awk

User @davidar (github.com/davidar, David A Roberts) notes that wak/toybox awk
did not work correctly with interactive input. His example was

wak '/^say/ {print $2}

Taking input from the terminal, input of 'say hi' should respond 'hi', but it
was not working correctly due to the behavior of the input routines not
stopping at a newline, but attempting to fill a large buffer to accommodate the
use of a regex as RS (record separator).

David provided a patch using fgets() to stop reading input at a newline, but it
caused the RS to be ignored. I tried checking for RS == "\n" but that was not
sufficient, as RS is usually a newline, and causing all input to go to fgets()
was a big performance hit. So I also added a field 'is_tty' for each opened
file (including stdin), and only use fgets() if RS is "\n" and also if the
input file isatty(). Then I find that I also have to set toybox awk to use line
buffering. When using (non-toybox) wak, the system sets the output buffering to
non-full-buffering if stdout is an "interactive device" (as required by POSIX).

It would be nice if toybox provided an option to not set the buffering for
stdout at all, say TOYFLAG_DEFAULTBUF, leaving it to the system, then toybox
awk would not take the performance hit on stdout for all file operations.
---
 toys/pending/awk.c            | 39 +++++++++++++++++++++------------------

diff --git a/toys/pending/awk.c b/toys/pending/awk.c
index a0183fe..f41db24 100644
--- a/toys/pending/awk.c
+++ b/toys/pending/awk.c
@@ -5,7 +5,7 @@
  *
  * See https://pubs.opengroup.org/onlinepubs/9699919799/utilities/awk.html
 
-USE_AWK(NEWTOY(awk, "F:v*f*bc", TOYFLAG_USR|TOYFLAG_BIN))
+USE_AWK(NEWTOY(awk, "F:v*f*bc", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LINEBUF))
 
 config AWK
   bool "awk"
@@ -86,16 +86,11 @@ GLOBALS(
 
   struct runtime_globals {
     struct zvalue cur_arg;
-    //char *filename;     // UNUSED
     FILE *fp;           // current data file
     int narg;           // cmdline arg index
     int nfiles;         // num of cmdline data file args processed
     int eof;            // all cmdline files (incl. stdin) read
     char *recptr;
-    char *recbuf;
-    size_t recbufsize;
-    char *recbuf_multx;
-    size_t recbufsize_multx;
     struct zstring *zspr;      // Global to receive sprintf() string value
   } rgl;
 
@@ -132,7 +127,8 @@ GLOBALS(
     char *fn;
     FILE *fp;
     char mode;  // w, a, or r
-    char file_or_pipe;  // f or p
+    char file_or_pipe;  // 1 if file, 0 if pipe
+    char is_tty;
     char is_std_file;
     char *recbuf;
     size_t recbufsize;
@@ -3003,12 +2999,12 @@ static struct zvalue *setup_lvalue(int ref_stack_ptr, int parmbase, int *field_n
   return v; // order FATAL() and return to mute warning
 }
 
-
-static struct zfile *new_file(char *fn, FILE *fp, char mode, char file_or_pipe)
+static struct zfile *new_file(char *fn, FILE *fp, char mode, char file_or_pipe,
+                              char is_std_file)
 {
   struct zfile *f = xzalloc(sizeof(struct zfile));
   *f = (struct zfile){TT.zfiles, xstrdup(fn), fp, mode, file_or_pipe,
-                        0, 0, 0, 0, 0, 0, 0, 0, 0};
+                isatty(fileno(fp)), is_std_file, 0, 0, 0, 0, 0, 0, 0, 0};
   return TT.zfiles = f;
 }
 
@@ -3073,7 +3069,7 @@ static struct zfile *setup_file(char file_or_pipe, char *mode)
     }
   FILE *fp = (file_or_pipe ? fopen : popen)(fn, mode);
   if (fp) {
-    struct zfile *p = new_file(fn, fp, *mode, file_or_pipe);
+    struct zfile *p = new_file(fn, fp, *mode, file_or_pipe, 0);
     drop();
     return p;
   }
@@ -3290,10 +3286,12 @@ static int next_fp(void)
   if (TT.cfile->fp && TT.cfile->fp != stdin) fclose(TT.cfile->fp);
   if ((!fn && !TT.rgl.nfiles && TT.cfile->fp != stdin) || (fn && !strcmp(fn, "-"))) {
     TT.cfile->fp = stdin;
+    TT.cfile->fn = "<stdin>";
     zvalue_release_zstring(&STACK[FILENAME]);
     STACK[FILENAME].vst = new_zstring("<stdin>", 7);
   } else if (fn) {
     if (!(TT.cfile->fp = fopen(fn, "r"))) FFATAL("can't open %s\n", fn);
+    TT.cfile->fn = fn;
     zvalue_copy(&STACK[FILENAME], &TT.rgl.cur_arg);
   } else {
     TT.rgl.eof = 1;
@@ -3301,6 +3299,7 @@ static int next_fp(void)
   }
   set_num(&STACK[FNR], 0);
   TT.cfile->recoffs = TT.cfile->endoffs = 0;  // reset record buffer
+  TT.cfile->is_tty = isatty(fileno(TT.cfile->fp));
   return 1;
 }
 
@@ -3342,8 +3341,8 @@ static int rx_findx(regex_t *rx, char *s, long len, regoff_t *start, regoff_t *e
 
 static ssize_t getrec_f(struct zfile *zfp)
 {
-  int r = 0, rs = ENSURE_STR(&STACK[RS])->vst->str[0] & 0xff;
-  if (!rs) return getrec_multiline(zfp);
+  int r = 0;
+  if (!ENSURE_STR(&STACK[RS])->vst->str[0]) return getrec_multiline(zfp);
   regex_t rsrx, *rsrxp = &rsrx;
   // TEMP!! FIXME Need to cache and avoid too-frequent rx compiles
   rx_zvalue_compile(&rsrxp, &STACK[RS]);
@@ -3355,7 +3354,11 @@ static ssize_t getrec_f(struct zfile *zfp)
 #define RS_LENGTH_MARGIN    (INIT_RECBUF_LEN / 8)
       if (!zfp->recbuf)
         zfp->recbuf = xmalloc((zfp->recbufsize = INIT_RECBUF_LEN) + 1);
-      zfp->endoffs = fread(zfp->recbuf, 1, zfp->recbufsize, zfp->fp);
+      if (zfp->is_tty && !memcmp(STACK[RS].vst->str, "\n", 2)) {
+        zfp->endoffs = 0;
+        if (fgets(zfp->recbuf, zfp->recbufsize, zfp->fp))
+          zfp->endoffs = strlen(zfp->recbuf);
+      } else zfp->endoffs = fread(zfp->recbuf, 1, zfp->recbufsize, zfp->fp);
       zfp->recoffs = 0;
       zfp->recbuf[zfp->endoffs] = 0;
       if (!zfp->endoffs) break;
@@ -4475,11 +4478,11 @@ static void run(int optind, int argc, char **argv, char *sepstring,
   xregcomp(&TT.rx_default, "[ \t\n]+", REG_EXTENDED);
   xregcomp(&TT.rx_last, "[ \t\n]+", REG_EXTENDED);
   xregcomp(&TT.rx_printf_fmt, printf_fmt_rx, REG_EXTENDED);
-  new_file("-", stdin, 'r', 'f')->is_std_file = 1;
-  new_file("/dev/stdin", stdin, 'r', 'f')->is_std_file = 1;
-  new_file("/dev/stdout", stdout, 'w', 'f')->is_std_file = 1;
+  new_file("-", stdin, 'r', 1, 1);
+  new_file("/dev/stdin", stdin, 'r', 1, 1);
+  new_file("/dev/stdout", stdout, 'w', 1, 1);
   TT.zstdout = TT.zfiles;
-  new_file("/dev/stderr", stderr, 'w', 'f')->is_std_file = 1;
+  new_file("/dev/stderr", stderr, 'w', 1, 1);
   seedrand(123);
   int status = -1, r = 0;
   if (TT.cgl.first_begin) r = interp(TT.cgl.first_begin, &status);
-- 
2.43.0

