When xmalloc_fgetline was introduced, getline(3) was a GNU extension and was not necessarily implemented in libcs. Since then, it's become a part of POSIX.1-2008 and is now implemented in glibc, uClibc-ng, and musl.
Previously, xmalloc_fgetline directly called bb_get_chunk_from_file. The issue with that approach is that bb_get_chunk_from_file stops both at \n and at \0. This introduces unexpected behavior when tools are presented with inputs containing \0. For example, in a comparison of GNU core utils cut with busybox cut: % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | cut -b1-3 | hexdump -C 00000000 00 01 00 0a |....| 00000004 % echo -ne '\x00\x01\x00\x03\x04\x05\x06' | ./busybox cut -b1-3 | hexdump -C 00000000 0a 01 0a 03 04 05 0a |.......| 00000007 Here, due to bb_get_chunk_from_file splitting lines by \n and \0, busybox cut interprets the input as three lines instead of one. Also, since xmalloc_fgetline never returned strings with embedded \0, cut_file expects strlen of the returned string to match the string's total length. To fix the behavior of the cut utility, introduce xmalloc_fgetline_n, that fully matches the behavior of xmalloc_fgetline, but also returns the amount of bytes read. Add a configuration option, FEATURE_USE_GETLINE, and enable it by default. Use getline in xmalloc_fgetline_n if the feature is enabled. Change the behavior of cut_file to use the values returned by the new function instead of calling strlen. Call xmalloc_fgetline_n from xmalloc_fgetline. Add a test for the previously mentioned case. With FEATURE_USE_GETLINE: function old new delta xmalloc_fgetline_n - 173 +173 xmalloc_fgetline 85 58 -27 cut_main 1406 1367 -39 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/2 up/down: 173/-66) Total: 107 bytes Without FEATURE_USE_GETLINE: function old new delta xmalloc_fgetline_n - 41 +41 xmalloc_fgetline 85 58 -27 cut_main 1406 1367 -39 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/2 up/down: 41/-66) Total: -25 bytes Fixes: https://bugs.busybox.net/show_bug.cgi?id=15276 Signed-off-by: Elvira Khabirova <[email protected]> --- coreutils/cut.c | 4 ++-- include/libbb.h | 2 ++ libbb/Config.src | 11 +++++++++++ libbb/get_line_from_file.c | 37 ++++++++++++++++++++++++++++++++----- testsuite/cut.tests | 2 ++ 5 files changed, 49 insertions(+), 7 deletions(-) diff --git a/coreutils/cut.c b/coreutils/cut.c index 55bdd9386..7c87467ca 100644 --- a/coreutils/cut.c +++ b/coreutils/cut.c @@ -89,6 +89,7 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, const struct cut_list *cut_lists, unsigned nlists) { char *line; + size_t linelen = 0; unsigned linenum = 0; /* keep these zero-based to be consistent */ regex_t reg; int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS; @@ -96,10 +97,9 @@ static void cut_file(FILE *file, const char *delim, const char *odelim, if (shoe) xregcomp(®, delim, REG_EXTENDED); /* go through every line in the file */ - while ((line = xmalloc_fgetline(file)) != NULL) { + while ((line = xmalloc_fgetline_n(file, &linelen)) != NULL) { /* set up a list so we can keep track of what's been printed */ - int linelen = strlen(line); char *printed = xzalloc(linelen + 1); char *orig_line = line; unsigned cl_pos = 0; diff --git a/include/libbb.h b/include/libbb.h index 6191debb1..73d16647a 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -1048,6 +1048,8 @@ extern char *xmalloc_fgetline_str(FILE *file, const char *terminating_string) FA extern char *xmalloc_fgets(FILE *file) FAST_FUNC RETURNS_MALLOC; /* Chops off '\n' from the end, unlike fgets: */ extern char *xmalloc_fgetline(FILE *file) FAST_FUNC RETURNS_MALLOC; +/* Same as xmalloc_fgetline but returns number of bytes read: */ +extern char* xmalloc_fgetline_n(FILE *file, size_t *n) FAST_FUNC RETURNS_MALLOC; /* Same, but doesn't try to conserve space (may have some slack after the end) */ /* extern char *xmalloc_fgetline_fast(FILE *file) FAST_FUNC RETURNS_MALLOC; */ diff --git a/libbb/Config.src b/libbb/Config.src index b980f19a9..7a37929d6 100644 --- a/libbb/Config.src +++ b/libbb/Config.src @@ -147,6 +147,17 @@ config MONOTONIC_SYSCALL will be used instead (which gives wrong results if date/time is reset). +config FEATURE_USE_GETLINE + bool "Use getline library function" + default y + help + When enabled, busybox will use the libc getline() function + instead of getc loops to read data from files. + Using getline provides better behavior when utilities + encounter NUL bytes in their inputs. + getline() was originally a GNU extension, but now is a part + of POSIX.1-2008 and is implemented in contemporary libcs. + config IOCTL_HEX2STR_ERROR bool "Use ioctl names rather than hex values in error messages" default y diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c index 903ff1fb6..096bbd66f 100644 --- a/libbb/get_line_from_file.c +++ b/libbb/get_line_from_file.c @@ -12,9 +12,9 @@ char* FAST_FUNC bb_get_chunk_from_file(FILE *file, size_t *end) { - int ch; - size_t idx = 0; char *linebuf = NULL; + size_t idx = 0; + int ch; while ((ch = getc(file)) != EOF) { /* grow the line buffer as necessary */ @@ -55,10 +55,37 @@ char* FAST_FUNC xmalloc_fgets(FILE *file) char* FAST_FUNC xmalloc_fgetline(FILE *file) { size_t i; - char *c = bb_get_chunk_from_file(file, &i); - if (i && c[--i] == '\n') - c[i] = '\0'; + return xmalloc_fgetline_n(file, &i); +} + +/* Get line. Remove trailing \n. Return the number of bytes read. */ +char* FAST_FUNC xmalloc_fgetline_n(FILE *file, size_t *n) +{ + char *c = NULL; +#if ENABLE_FEATURE_USE_GETLINE + size_t idx = 0; + ssize_t nread; + + nread = getline(&c, &idx, file); + if (nread < 0) { + if (errno == ENOMEM) + bb_die_memory_exhausted(); + free(c); + c = NULL; + *n = 0; + } else { + *n = nread; + } +#else + c = bb_get_chunk_from_file(file, n); +#endif + + if (*n) { + if (c[*n - 1] == '\n') { + c[--*n] = '\0'; + } + } return c; } diff --git a/testsuite/cut.tests b/testsuite/cut.tests index 2458c019c..0fd731688 100755 --- a/testsuite/cut.tests +++ b/testsuite/cut.tests @@ -81,4 +81,6 @@ SKIP= testing "cut empty field" "cut -d ':' -f 1-3" "a::b\n" "" "a::b\n" testing "cut empty field 2" "cut -d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n" +testing "cut with embedded NUL" "cut -c 1-3" "\x00\x01\x00\n" "" "\x00\x01\x00\x03\x04\x05\x06" + exit $FAILCOUNT -- 2.34.1 _______________________________________________ busybox mailing list [email protected] http://lists.busybox.net/mailman/listinfo/busybox
