On Sun, 28 May 2023 19:01:52 +0200 Elvira Khabirova <[email protected]> wrote:
> When xmalloc_fgetline was introduced, getline(3) was a GNU extension > and was not necessarily implemented in libcs. Since then, > it's become a part of POSIX.1-2008 and is now implemented in > glibc, uClibc-ng, and musl. > > Previously, xmalloc_fgetline directly called bb_get_chunk_from_file. > The issue with that approach is that bb_get_chunk_from_file stops > both at \n and at \0. This introduces unexpected behavior when tools > are presented with inputs containing \0. For example, in a comparison > of GNU core utils cut with busybox cut: > > % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | cut -b1-3 | hexdump -C > 00000000 00 01 00 0a |....| > 00000004 > % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | ./busybox cut -b1-3 | hexdump -C > 00000000 0a 01 0a 03 04 05 0a |.......| > 00000007 > > Here, due to bb_get_chunk_from_file splitting lines by \n and \0, > busybox cut interprets the input as three lines instead of one. > > Also, since xmalloc_fgetline never returned strings with embedded \0, > cut_file expects strlen of the returned string to match the string's > total length. > > To fix the behavior of the cut utility, introduce xmalloc_fgetline_n, > that fully matches the behavior of xmalloc_fgetline, > but also returns the amount of bytes read. > > Add a configuration option, FEATURE_USE_GETLINE, and enable it > by default. Use getline in xmalloc_fgetline_n if the feature is enabled. > > Change the behavior of cut_file to use the values returned > by the new function instead of calling strlen. > > Call xmalloc_fgetline_n from xmalloc_fgetline. > > Add a test for the previously mentioned case. > > With FEATURE_USE_GETLINE: > > function old new delta > xmalloc_fgetline_n - 173 +173 > xmalloc_fgetline 85 58 -27 > cut_main 1406 1367 -39 > ------------------------------------------------------------------------------ > (add/remove: 1/0 grow/shrink: 0/2 up/down: 173/-66) Total: 107 bytes > > Without FEATURE_USE_GETLINE: > > function old new delta > xmalloc_fgetline_n - 41 +41 > xmalloc_fgetline 85 58 -27 > cut_main 1406 1367 -39 > ------------------------------------------------------------------------------ > (add/remove: 1/0 grow/shrink: 0/2 up/down: 41/-66) Total: -25 bytes > > Fixes: https://bugs.busybox.net/show_bug.cgi?id=15276 > Signed-off-by: Elvira Khabirova <[email protected]> > --- > coreutils/cut.c | 4 ++-- > include/libbb.h | 2 ++ > libbb/Config.src | 11 +++++++++++ > libbb/get_line_from_file.c | 37 ++++++++++++++++++++++++++++++++----- > testsuite/cut.tests | 2 ++ > 5 files changed, 49 insertions(+), 7 deletions(-) > > diff --git a/coreutils/cut.c b/coreutils/cut.c > index 55bdd9386..7c87467ca 100644 > --- a/coreutils/cut.c > +++ b/coreutils/cut.c > @@ -89,6 +89,7 @@ static void cut_file(FILE *file, const char *delim, const > char *odelim, > const struct cut_list *cut_lists, unsigned nlists) > { > char *line; > + size_t linelen = 0; > unsigned linenum = 0; /* keep these zero-based to be consistent */ > regex_t reg; > int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS; > @@ -96,10 +97,9 @@ static void cut_file(FILE *file, const char *delim, const > char *odelim, > if (shoe) xregcomp(®, delim, REG_EXTENDED); > > /* go through every line in the file */ > - while ((line = xmalloc_fgetline(file)) != NULL) { > + while ((line = xmalloc_fgetline_n(file, &linelen)) != NULL) { > > /* set up a list so we can keep track of what's been printed */ > - int linelen = strlen(line); > char *printed = xzalloc(linelen + 1); > char *orig_line = line; > unsigned cl_pos = 0; > diff --git a/include/libbb.h b/include/libbb.h > index 6191debb1..73d16647a 100644 > --- a/include/libbb.h > +++ b/include/libbb.h > @@ -1048,6 +1048,8 @@ extern char *xmalloc_fgetline_str(FILE *file, const > char *terminating_string) FA > extern char *xmalloc_fgets(FILE *file) FAST_FUNC RETURNS_MALLOC; > /* Chops off '\n' from the end, unlike fgets: */ > extern char *xmalloc_fgetline(FILE *file) FAST_FUNC RETURNS_MALLOC; > +/* Same as xmalloc_fgetline but returns number of bytes read: */ > +extern char* xmalloc_fgetline_n(FILE *file, size_t *n) FAST_FUNC > RETURNS_MALLOC; > /* Same, but doesn't try to conserve space (may have some slack after the > end) */ > /* extern char *xmalloc_fgetline_fast(FILE *file) FAST_FUNC RETURNS_MALLOC; > */ > > diff --git a/libbb/Config.src b/libbb/Config.src > index b980f19a9..7a37929d6 100644 > --- a/libbb/Config.src > +++ b/libbb/Config.src > @@ -147,6 +147,17 @@ config MONOTONIC_SYSCALL > will be used instead (which gives wrong results if date/time > is reset). > > +config FEATURE_USE_GETLINE > + bool "Use getline library function" > + default y > + help > + When enabled, busybox will use the libc getline() function > + instead of getc loops to read data from files. > + Using getline provides better behavior when utilities > + encounter NUL bytes in their inputs. > + getline() was originally a GNU extension, but now is a part > + of POSIX.1-2008 and is implemented in contemporary libcs. > + > config IOCTL_HEX2STR_ERROR > bool "Use ioctl names rather than hex values in error messages" > default y > diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c > index 903ff1fb6..096bbd66f 100644 > --- a/libbb/get_line_from_file.c > +++ b/libbb/get_line_from_file.c > @@ -12,9 +12,9 @@ > > char* FAST_FUNC bb_get_chunk_from_file(FILE *file, size_t *end) > { > - int ch; > - size_t idx = 0; > char *linebuf = NULL; > + size_t idx = 0; > + int ch; > > while ((ch = getc(file)) != EOF) { > /* grow the line buffer as necessary */ > @@ -55,10 +55,37 @@ char* FAST_FUNC xmalloc_fgets(FILE *file) > char* FAST_FUNC xmalloc_fgetline(FILE *file) > { > size_t i; > - char *c = bb_get_chunk_from_file(file, &i); > > - if (i && c[--i] == '\n') > - c[i] = '\0'; > + return xmalloc_fgetline_n(file, &i); > +} > + > +/* Get line. Remove trailing \n. Return the number of bytes read. */ > +char* FAST_FUNC xmalloc_fgetline_n(FILE *file, size_t *n) > +{ > + char *c = NULL; > +#if ENABLE_FEATURE_USE_GETLINE > + size_t idx = 0; > + ssize_t nread; > + > + nread = getline(&c, &idx, file); > + if (nread < 0) { > + if (errno == ENOMEM) > + bb_die_memory_exhausted(); > + free(c); > + c = NULL; > + *n = 0; > + } else { > + *n = nread; > + } > +#else > + c = bb_get_chunk_from_file(file, n); > +#endif > + > + if (*n) { > + if (c[*n - 1] == '\n') { > + c[--*n] = '\0'; > + } > + } > > return c; > } > diff --git a/testsuite/cut.tests b/testsuite/cut.tests > index 2458c019c..0fd731688 100755 > --- a/testsuite/cut.tests > +++ b/testsuite/cut.tests > @@ -81,4 +81,6 @@ SKIP= > testing "cut empty field" "cut -d ':' -f 1-3" "a::b\n" "" "a::b\n" > testing "cut empty field 2" "cut -d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n" > > +testing "cut with embedded NUL" "cut -c 1-3" "\x00\x01\x00\n" "" > "\x00\x01\x00\x03\x04\x05\x06" > + > exit $FAILCOUNT Hi, what is this change for? - int ch; - size_t idx = 0; char *linebuf = NULL; + size_t idx = 0; + int ch; Ciao, Tito _______________________________________________ busybox mailing list [email protected] http://lists.busybox.net/mailman/listinfo/busybox
