Package: gawk
Version: 1:3.1.7.dfsg-5
Severity: normal
Tags: patch
The contents of RT may be garbled and the length may be wrong when RS=="".
There are two cases:
- Case 1: The last record is 'terminated' with '\n' instead of '\n\n'
In this case, the length of RT is reported as 0 instead of 1
Example (1st and 3rd are OK):
$ awk 'BEGIN {printf "0"; exit}' | awk 'BEGIN {RS=""}; {print length(RT)}'
0
$ awk 'BEGIN {printf "0\n"; exit}' | awk 'BEGIN {RS=""}; {print length(RT)}'
0
$ awk 'BEGIN {printf "0\n\n"; exit}' | awk 'BEGIN {RS=""}; {print
length(RT)}'
2
- Case 2: RT is longer than the shortest RT seen so far
In this case, the additional characters in RT are garbage.
In a non-C locale, the length is also reported incorrectly.
$ awk 'BEGIN {printf "0\n\n\n1\n\n\n\n\n"; exit}' | LC_ALL=C awk 'BEGIN
{RS=""}; {print length(RT),gensub("\n","\\\\n","g",RT)}' | cat -v
3 \n\n\n
5 \n\n\n^@^@
$ awk 'BEGIN {printf "0\n\n\n1\n\n\n\n\n"; exit}' | LC_ALL=en_US.UTF-8 awk
'BEGIN {RS=""}; {print length(RT),gensub("\n","\\\\n","g",RT)}' | cat -v
3 \n\n\n
3 \n\n\n^@^@
In both cases, the output should be:
3 \n\n\n
5 \n\n\n\n\n
I have attached a patch that fixes these problems, and I have added some test
cases
as well. The patched source passes all tests and compiles into a .deb without
errors.
After applying the patch, execute permission must be set on the test scripts:
$ chmod +x test/rtlen*.sh
I hereby put the patch, to which I have all rights, in the public domain, so
that
there can (hopefully) be no legal objection to incorporating it.
Regards.
Rogier.
-- System Information:
Debian Release: wheezy/sid
APT prefers squeeze-updates
APT policy: (500, 'squeeze-updates'), (500, 'testing')
Architecture: i386 (x86_64)
Kernel: Linux 2.6.32-5-amd64 (SMP w/1 CPU core)
Locale: LANG=en_US.UTF-8, LC_CTYPE=en_US.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Versions of packages gawk depends on:
ii libc6 2.11.2-11 Embedded GNU C Library: Shared lib
gawk recommends no packages.
gawk suggests no packages.
-- no debconf information
diff -Nur gawk-3.1.7.dfsg/io.c gawk-3.1.7.dfsg-patch/io.c
--- gawk-3.1.7.dfsg/io.c 2009-07-09 21:32:10.000000000 +0200
+++ gawk-3.1.7.dfsg-patch/io.c 2011-03-26 15:59:46.000000000 +0100
@@ -2951,8 +2951,12 @@
while (*bp++ != '\n')
continue;
- if (bp >= iop->dataend) { /* no terminator */
+ if (bp >= iop->dataend) { /* no full terminator */
iop->scanoff = recm->len = bp - iop->off - 1;
+ if (bp == iop->dataend) { /* half a terminator */
+ recm->rt_start = bp - 1;
+ recm->rt_len = 1;
+ }
*state = INDATA;
return NOTERM;
}
@@ -3145,9 +3149,12 @@
/* else
leave it alone */
} else if (matchrec == rsnullscan) {
- if (rtval->stlen <= recm.rt_len)
+ if (rtval->stlen >= recm.rt_len) {
rtval->stlen = recm.rt_len;
- else
+#ifdef MBS_SUPPORT
+ rtval->wstlen = recm.rt_len;
+#endif
+ } else
set_RT(recm.rt_start, recm.rt_len);
} else
set_RT(recm.rt_start, recm.rt_len);
diff -Nur gawk-3.1.7.dfsg/test/Makefile.in gawk-3.1.7.dfsg-patch/test/Makefile.in
--- gawk-3.1.7.dfsg/test/Makefile.in 2009-07-21 21:29:59.000000000 +0200
+++ gawk-3.1.7.dfsg-patch/test/Makefile.in 2011-03-26 17:12:03.000000000 +0100
@@ -770,6 +770,10 @@
rswhite.awk \
rswhite.in \
rswhite.ok \
+ rtlen.ok \
+ rtlen.sh \
+ rtlen01.ok \
+ rtlen01.sh \
scalar.awk \
scalar.ok \
sclforin.awk \
@@ -908,7 +912,8 @@
unterm uparrfs wideidx wideidx2 widesub widesub2 widesub3 \
widesub4 wjposer1 zero2 zeroe0 zeroflag
-UNIX_TESTS = fflush getlnhd localenl pid pipeio1 pipeio2 poundbang space strftlng
+UNIX_TESTS = fflush getlnhd localenl pid pipeio1 pipeio2 poundbang rtlen rtlen01 \
+ space strftlng
GAWK_EXT_TESTS = \
argtest backw badargs binmode1 clos1way devfd devfd1 devfd2 fieldwdth \
fsfwfs funlen fwtest fwtest2 gensub gensub2 getlndir gnuops2 gnuops3 \
@@ -921,7 +926,7 @@
INET_TESTS = inetechu inetecht inetdayu inetdayt
MACHINE_TESTS = double1 double2 fmtspcl intformat
LOCALE_CHARSET_TESTS = asort asorti fmttest fnarydel fnparydl lc_num1 mbfw1 \
- mbprintf1 mbprintf2 rebt8b2 sort1 sprintfc whiny
+ mbprintf1 mbprintf2 rebt8b2 rtlenmb sort1 sprintfc whiny
# List of the tests which should be run with --lint option:
@@ -1525,6 +1530,22 @@
@head $(srcdir)/rsstart1.in | $(AWK) -f $(srcdir)/rsstart2.awk >_$@
@-$(CMP) $(srcdir)/[email protected] _$@ && rm -f _$@
+rtlen::
+ @echo $@
+ @$(srcdir)/[email protected] >_$@ || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) $(srcdir)/[email protected] _$@ && rm -f _$@
+
+rtlen01::
+ @echo $@
+ @$(srcdir)/[email protected] >_$@ || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) $(srcdir)/[email protected] _$@ && rm -f _$@
+
+rtlenmb::
+ @echo $@
+ @GAWKLOCALE=en_US.UTF-8 ; export GAWKLOCALE ; \
+ $(srcdir)/rtlen.sh >_$@ || echo EXIT CODE: $$? >>_$@
+ @-$(CMP) $(srcdir)/rtlen.ok _$@ && rm -f _$@
+
nondec2::
@echo $@
@$(AWK) --non-decimal-data -v a=0x1 -f $(srcdir)/[email protected] >_$@
diff -Nur gawk-3.1.7.dfsg/test/rtlen.ok gawk-3.1.7.dfsg-patch/test/rtlen.ok
--- gawk-3.1.7.dfsg/test/rtlen.ok 1970-01-01 01:00:00.000000000 +0100
+++ gawk-3.1.7.dfsg-patch/test/rtlen.ok 2011-03-26 16:54:14.000000000 +0100
@@ -0,0 +1,3 @@
+3
+5
+2
diff -Nur gawk-3.1.7.dfsg/test/rtlen.sh gawk-3.1.7.dfsg-patch/test/rtlen.sh
--- gawk-3.1.7.dfsg/test/rtlen.sh 1970-01-01 01:00:00.000000000 +0100
+++ gawk-3.1.7.dfsg-patch/test/rtlen.sh 2011-03-26 16:54:49.000000000 +0100
@@ -0,0 +1,5 @@
+#! /bin/sh
+
+AWK=${AWK:-../gawk}
+
+$AWK 'BEGIN {printf "0\n\n\n1\n\n\n\n\n2\n\n"; exit}' | $AWK 'BEGIN {RS=""}; {print length(RT)}'
diff -Nur gawk-3.1.7.dfsg/test/rtlen01.ok gawk-3.1.7.dfsg-patch/test/rtlen01.ok
--- gawk-3.1.7.dfsg/test/rtlen01.ok 1970-01-01 01:00:00.000000000 +0100
+++ gawk-3.1.7.dfsg-patch/test/rtlen01.ok 2011-03-26 14:59:39.000000000 +0100
@@ -0,0 +1,3 @@
+0
+1
+2
diff -Nur gawk-3.1.7.dfsg/test/rtlen01.sh gawk-3.1.7.dfsg-patch/test/rtlen01.sh
--- gawk-3.1.7.dfsg/test/rtlen01.sh 1970-01-01 01:00:00.000000000 +0100
+++ gawk-3.1.7.dfsg-patch/test/rtlen01.sh 2011-03-26 15:20:14.000000000 +0100
@@ -0,0 +1,8 @@
+#! /bin/sh
+
+AWK=${AWK:-../gawk}
+
+$AWK 'BEGIN {printf "0"; exit}' | $AWK 'BEGIN {RS=""}; {print length(RT)}'
+$AWK 'BEGIN {printf "0\n"; exit}' | $AWK 'BEGIN {RS=""}; {print length(RT)}'
+$AWK 'BEGIN {printf "0\n\n"; exit}' | $AWK 'BEGIN {RS=""}; {print length(RT)}'
+