Package: gawk
Version: 1:3.1.7.dfsg-5
Severity: normal
Tags: patch

The contents of RT may be garbled and the length may be wrong when RS=="".

There are two cases:
- Case 1: The last record is 'terminated' with '\n' instead of '\n\n'
  In this case, the length of RT is reported as 0 instead of 1
  Example (1st and 3rd are OK):
    $ awk 'BEGIN {printf "0"; exit}' | awk 'BEGIN {RS=""}; {print length(RT)}'
    0
    $ awk 'BEGIN {printf "0\n"; exit}' | awk 'BEGIN {RS=""}; {print length(RT)}'
    0
    $ awk 'BEGIN {printf "0\n\n"; exit}' | awk 'BEGIN {RS=""}; {print 
length(RT)}'
    2
- Case 2: RT is longer than the shortest RT seen so far
  In this case, the additional characters in RT are garbage.
  In a non-C locale, the length is also reported incorrectly.
    $ awk 'BEGIN {printf "0\n\n\n1\n\n\n\n\n"; exit}' | LC_ALL=C awk 'BEGIN 
{RS=""}; {print length(RT),gensub("\n","\\\\n","g",RT)}' | cat -v
    3 \n\n\n
    5 \n\n\n^@^@
    $ awk 'BEGIN {printf "0\n\n\n1\n\n\n\n\n"; exit}' | LC_ALL=en_US.UTF-8 awk 
'BEGIN {RS=""}; {print length(RT),gensub("\n","\\\\n","g",RT)}' | cat -v
    3 \n\n\n
    3 \n\n\n^@^@
  In both cases, the output should be:
    3 \n\n\n
    5 \n\n\n\n\n

I have attached a patch that fixes these problems, and I have added some test 
cases
as well. The patched source passes all tests and compiles into a .deb without 
errors.
After applying the patch, execute permission must be set on the test scripts:
    $ chmod +x test/rtlen*.sh

I hereby put the patch, to which I have all rights, in the public domain, so 
that
there can (hopefully) be no legal objection to incorporating it.

Regards.

Rogier.

-- System Information:
Debian Release: wheezy/sid
  APT prefers squeeze-updates
  APT policy: (500, 'squeeze-updates'), (500, 'testing')
Architecture: i386 (x86_64)

Kernel: Linux 2.6.32-5-amd64 (SMP w/1 CPU core)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash

Versions of packages gawk depends on:
ii  libc6                         2.11.2-11  Embedded GNU C Library: Shared lib

gawk recommends no packages.

gawk suggests no packages.

-- no debconf information
diff -Nur gawk-3.1.7.dfsg/io.c gawk-3.1.7.dfsg-patch/io.c
--- gawk-3.1.7.dfsg/io.c	2009-07-09 21:32:10.000000000 +0200
+++ gawk-3.1.7.dfsg-patch/io.c	2011-03-26 15:59:46.000000000 +0100
@@ -2951,8 +2951,12 @@
         while (*bp++ != '\n')
                 continue;
 
-        if (bp >= iop->dataend) {       /* no terminator */
+        if (bp >= iop->dataend) {       /* no full terminator */
                 iop->scanoff = recm->len = bp - iop->off - 1;
+		if (bp == iop->dataend) {	/* half a terminator */
+			recm->rt_start = bp - 1;
+			recm->rt_len = 1;
+		}
                 *state = INDATA;
                 return NOTERM;
         }
@@ -3145,9 +3149,12 @@
                         /* else
                                 leave it alone */
                 } else if (matchrec == rsnullscan) {
-                        if (rtval->stlen <= recm.rt_len)
+                        if (rtval->stlen >= recm.rt_len) {
                                 rtval->stlen = recm.rt_len;
-                        else
+#ifdef MBS_SUPPORT
+                                rtval->wstlen = recm.rt_len;
+#endif
+			} else
                                 set_RT(recm.rt_start, recm.rt_len);
                 } else
                         set_RT(recm.rt_start, recm.rt_len);
diff -Nur gawk-3.1.7.dfsg/test/Makefile.in gawk-3.1.7.dfsg-patch/test/Makefile.in
--- gawk-3.1.7.dfsg/test/Makefile.in	2009-07-21 21:29:59.000000000 +0200
+++ gawk-3.1.7.dfsg-patch/test/Makefile.in	2011-03-26 17:12:03.000000000 +0100
@@ -770,6 +770,10 @@
 	rswhite.awk \
 	rswhite.in \
 	rswhite.ok \
+	rtlen.ok \
+	rtlen.sh \
+	rtlen01.ok \
+	rtlen01.sh \
 	scalar.awk \
 	scalar.ok \
 	sclforin.awk \
@@ -908,7 +912,8 @@
 	unterm uparrfs wideidx wideidx2 widesub widesub2 widesub3 \
 	widesub4 wjposer1 zero2 zeroe0 zeroflag
 
-UNIX_TESTS = fflush getlnhd localenl pid pipeio1 pipeio2 poundbang space strftlng
+UNIX_TESTS = fflush getlnhd localenl pid pipeio1 pipeio2 poundbang rtlen rtlen01 \
+	space strftlng
 GAWK_EXT_TESTS = \
 	argtest backw badargs binmode1 clos1way devfd devfd1 devfd2 fieldwdth \
 	fsfwfs funlen fwtest fwtest2 gensub gensub2 getlndir gnuops2 gnuops3 \
@@ -921,7 +926,7 @@
 INET_TESTS = inetechu inetecht inetdayu inetdayt
 MACHINE_TESTS = double1 double2 fmtspcl intformat
 LOCALE_CHARSET_TESTS = asort asorti fmttest fnarydel fnparydl lc_num1 mbfw1 \
-	mbprintf1 mbprintf2 rebt8b2 sort1 sprintfc whiny
+	mbprintf1 mbprintf2 rebt8b2 rtlenmb sort1 sprintfc whiny
 
 
 # List of the tests which should be run with --lint option:
@@ -1525,6 +1530,22 @@
 	@head $(srcdir)/rsstart1.in | $(AWK) -f $(srcdir)/rsstart2.awk >_$@
 	@-$(CMP) $(srcdir)/[email protected] _$@ && rm -f _$@
 
+rtlen::
+	@echo $@
+	@$(srcdir)/[email protected] >_$@ || echo EXIT CODE: $$? >>_$@
+	@-$(CMP) $(srcdir)/[email protected] _$@ && rm -f _$@
+
+rtlen01::
+	@echo $@
+	@$(srcdir)/[email protected] >_$@ || echo EXIT CODE: $$? >>_$@
+	@-$(CMP) $(srcdir)/[email protected] _$@ && rm -f _$@
+
+rtlenmb::
+	@echo $@
+	@GAWKLOCALE=en_US.UTF-8 ; export GAWKLOCALE ; \
+	$(srcdir)/rtlen.sh >_$@ || echo EXIT CODE: $$? >>_$@
+	@-$(CMP) $(srcdir)/rtlen.ok _$@ && rm -f _$@
+
 nondec2::
 	@echo $@
 	@$(AWK) --non-decimal-data -v a=0x1 -f $(srcdir)/[email protected] >_$@
diff -Nur gawk-3.1.7.dfsg/test/rtlen.ok gawk-3.1.7.dfsg-patch/test/rtlen.ok
--- gawk-3.1.7.dfsg/test/rtlen.ok	1970-01-01 01:00:00.000000000 +0100
+++ gawk-3.1.7.dfsg-patch/test/rtlen.ok	2011-03-26 16:54:14.000000000 +0100
@@ -0,0 +1,3 @@
+3
+5
+2
diff -Nur gawk-3.1.7.dfsg/test/rtlen.sh gawk-3.1.7.dfsg-patch/test/rtlen.sh
--- gawk-3.1.7.dfsg/test/rtlen.sh	1970-01-01 01:00:00.000000000 +0100
+++ gawk-3.1.7.dfsg-patch/test/rtlen.sh	2011-03-26 16:54:49.000000000 +0100
@@ -0,0 +1,5 @@
+#! /bin/sh
+
+AWK=${AWK:-../gawk}
+
+$AWK 'BEGIN {printf "0\n\n\n1\n\n\n\n\n2\n\n"; exit}' | $AWK 'BEGIN {RS=""}; {print length(RT)}'
diff -Nur gawk-3.1.7.dfsg/test/rtlen01.ok gawk-3.1.7.dfsg-patch/test/rtlen01.ok
--- gawk-3.1.7.dfsg/test/rtlen01.ok	1970-01-01 01:00:00.000000000 +0100
+++ gawk-3.1.7.dfsg-patch/test/rtlen01.ok	2011-03-26 14:59:39.000000000 +0100
@@ -0,0 +1,3 @@
+0
+1
+2
diff -Nur gawk-3.1.7.dfsg/test/rtlen01.sh gawk-3.1.7.dfsg-patch/test/rtlen01.sh
--- gawk-3.1.7.dfsg/test/rtlen01.sh	1970-01-01 01:00:00.000000000 +0100
+++ gawk-3.1.7.dfsg-patch/test/rtlen01.sh	2011-03-26 15:20:14.000000000 +0100
@@ -0,0 +1,8 @@
+#! /bin/sh
+
+AWK=${AWK:-../gawk}
+
+$AWK 'BEGIN {printf "0"; exit}' | $AWK 'BEGIN {RS=""}; {print length(RT)}'
+$AWK 'BEGIN {printf "0\n"; exit}' | $AWK 'BEGIN {RS=""}; {print length(RT)}'
+$AWK 'BEGIN {printf "0\n\n"; exit}' | $AWK 'BEGIN {RS=""}; {print length(RT)}'
+

Reply via email to