tag 203115 +patch
thanks

Hi,

I could reproduce this bug, the problem shows up when the input includes NUL
chars, as the functions used by mawk are expecting C strings. Which ends up in
a malloc read loop till EOF is reached, on EOF mawk would process only one
of the read lines. If the file is large enough (or data is being read from a
infinite device such as /dev/urandom) the OOM killer would do its job.

Replacing the call to strchr with memchr fixes part of the issues, the rest of
the patch deals with the buffer to avoid dropping what was read.
In the patch there's also a small test for this issues.

This patch fixes only the block buffering part (reading from a file), reading
from stdin is made with fgets which would fail if the input include a NUL
char.

Thanks,
-- 
"The sooner you start to code, the longer the program will take." -- Roy Carlson
Saludos /\/\ /\ >< `/
## Description: Some fixes for the block buffering
## Origin/Author: Maximiliano Curia <[email protected]>
## Bug: http://bugs.debian.org/203115
Index: mawk-1.3.3/fin.c
===================================================================
--- mawk-1.3.3.orig/fin.c       2011-12-13 14:03:48.829442876 -0300
+++ mawk-1.3.3/fin.c    2011-12-13 14:03:48.909439674 -0300
@@ -241,19 +241,23 @@
       else
       {
         /* block buffering */
-        r = fillbuff(fin->fd, fin->buff, fin->nbuffs * BUFFSZ) ;
-        if (r == 0)
+        if ( (fin->flags & START_FLAG) || (fin->buff + (fin->nbuffs * BUFFSZ) 
<= fin->buffp) )
         {
-           fin->flags |= EOF_FLAG ;
+           r = fillbuff(fin->fd, fin->buff, fin->nbuffs * BUFFSZ) ;
+           if (r == 0)
+           {
+              fin->flags |= EOF_FLAG ;
+              fin->buffp = fin->buff ;
+              goto restart ;    /* might be main */
+           }
+           else if (r < fin->nbuffs * BUFFSZ)
+           {
+              fin->flags |= EOF_FLAG ;
+           }
+
            fin->buffp = fin->buff ;
-           goto restart ;       /* might be main */
-        }
-        else if (r < fin->nbuffs * BUFFSZ)
-        {
-           fin->flags |= EOF_FLAG ;
         }
-
-        p = fin->buffp = fin->buff ;
+        p = fin->buffp;
 
         if (fin->flags & START_FLAG)
         {
@@ -274,7 +278,7 @@
    switch (rs_shadow.type)
    {
       case SEP_CHAR:
-        q = strchr(p, rs_shadow.c) ;
+        q = memchr(p, rs_shadow.c, (fin->nbuffs * BUFFSZ) - (p - fin->buff) ) ;
         match_len = 1 ;
         break ;
 
@@ -328,11 +332,14 @@
       /* move a partial line to front of buffer and try again */
       unsigned rr ;
 
-      p = (char *) memcpy(fin->buff, p, r = strlen(p)) ;
-      q = p+r ;         rr = fin->nbuffs*BUFFSZ - r ;
+      rr = p - fin->buff;
+      r = fin->nbuffs*BUFFSZ - rr;
+      p = (char *) memcpy(fin->buff, p, r) ;
+      q = p+r ;
 
       if ((r = fillbuff(fin->fd, q, rr)) < rr)
         fin->flags |= EOF_FLAG ;
+      fin->buffp = fin->buff;
    }
    goto retry ;
 }
Index: mawk-1.3.3/test/mawktest
===================================================================
--- mawk-1.3.3.orig/test/mawktest       2011-12-13 14:03:48.869441275 -0300
+++ mawk-1.3.3/test/mawktest    2011-12-13 14:18:02.183249194 -0300
@@ -69,6 +69,12 @@
 
 echo general stress test passed
 
+#################################
+echo
+echo testing binary data in block read
+
+mawk -f gen_bin.awk > temp$$ || exit
+mawk '//{print}' temp$$ | cmp -s - temp$$ || exit
 
 echo
 echo  tested mawk seems OK
Index: mawk-1.3.3/test/gen_bin.awk
===================================================================
--- /dev/null   1970-01-01 00:00:00.000000000 +0000
+++ mawk-1.3.3/test/gen_bin.awk 2011-12-13 14:15:38.637005136 -0300
@@ -0,0 +1,11 @@
+BEGIN {
+       for (i=0; i<512; i++) {
+               s = "";
+               for (j=0; j<63; j++) {
+                       c = (i+j) % 256;
+                       s = sprintf("%s%c", s, c);
+               }
+               print s;
+       }
+}
+

Attachment: signature.asc
Description: Digital signature

Reply via email to