When xmalloc_fgetline was introduced, getline(3) was a GNU extension
and was not necessarily implemented in libcs. Since then,
it's become a part of POSIX.1-2008 and is now implemented in
glibc, uClibc-ng, and musl.

Previously, xmalloc_fgetline directly called bb_get_chunk_from_file.
The issue with that approach is that bb_get_chunk_from_file stops
both at \n and at \0. This introduces unexpected behavior when tools
are presented with inputs containing \0. For example, in a comparison
of GNU core utils cut with busybox cut:

% echo -ne '\x00\x01\x00\x03\x04\x05\x06' | cut -b1-3 | hexdump -C
00000000  00 01 00 0a                                       |....|
00000004
% echo -ne '\x00\x01\x00\x03\x04\x05\x06' | ./busybox cut -b1-3 | hexdump -C
00000000  0a 01 0a 03 04 05 0a                              |.......|
00000007

Here, due to bb_get_chunk_from_file splitting lines by \n and \0,
busybox cut interprets the input as three lines instead of one.

Also, since xmalloc_fgetline never returned strings with embedded \0,
cut_file expects strlen of the returned string to match the string's
total length.

To fix the behavior of the cut utility, introduce xmalloc_fgetline_n,
that fully matches the behavior of xmalloc_fgetline,
but also returns the amount of bytes read.

Add a configuration option, FEATURE_USE_GETLINE, and enable it
by default. Use getline in xmalloc_fgetline_n if the feature is enabled.

Change the behavior of cut_file to use the values returned
by the new function instead of calling strlen.

Call xmalloc_fgetline_n from xmalloc_fgetline.

Add a test for the previously mentioned case.

With FEATURE_USE_GETLINE:

function                                             old     new   delta
xmalloc_fgetline_n                                     -     173    +173
xmalloc_fgetline                                      85      58     -27
cut_main                                            1406    1367     -39
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/2 up/down: 173/-66)           Total: 107 bytes

Without FEATURE_USE_GETLINE:

function                                             old     new   delta
xmalloc_fgetline_n                                     -      41     +41
xmalloc_fgetline                                      85      58     -27
cut_main                                            1406    1367     -39
------------------------------------------------------------------------------
(add/remove: 1/0 grow/shrink: 0/2 up/down: 41/-66)            Total: -25 bytes

Fixes: https://bugs.busybox.net/show_bug.cgi?id=15276
Signed-off-by: Elvira Khabirova <[email protected]>
---
 coreutils/cut.c            |  4 ++--
 include/libbb.h            |  2 ++
 libbb/Config.src           | 11 +++++++++++
 libbb/get_line_from_file.c | 37 ++++++++++++++++++++++++++++++++-----
 testsuite/cut.tests        |  2 ++
 5 files changed, 49 insertions(+), 7 deletions(-)

diff --git a/coreutils/cut.c b/coreutils/cut.c
index 55bdd9386..7c87467ca 100644
--- a/coreutils/cut.c
+++ b/coreutils/cut.c
@@ -89,6 +89,7 @@ static void cut_file(FILE *file, const char *delim, const 
char *odelim,
                const struct cut_list *cut_lists, unsigned nlists)
 {
        char *line;
+       size_t linelen = 0;
        unsigned linenum = 0;   /* keep these zero-based to be consistent */
        regex_t reg;
        int spos, shoe = option_mask32 & CUT_OPT_REGEX_FLGS;
@@ -96,10 +97,9 @@ static void cut_file(FILE *file, const char *delim, const 
char *odelim,
        if (shoe) xregcomp(&reg, delim, REG_EXTENDED);
 
        /* go through every line in the file */
-       while ((line = xmalloc_fgetline(file)) != NULL) {
+       while ((line = xmalloc_fgetline_n(file, &linelen)) != NULL) {
 
                /* set up a list so we can keep track of what's been printed */
-               int linelen = strlen(line);
                char *printed = xzalloc(linelen + 1);
                char *orig_line = line;
                unsigned cl_pos = 0;
diff --git a/include/libbb.h b/include/libbb.h
index 6191debb1..73d16647a 100644
--- a/include/libbb.h
+++ b/include/libbb.h
@@ -1048,6 +1048,8 @@ extern char *xmalloc_fgetline_str(FILE *file, const char 
*terminating_string) FA
 extern char *xmalloc_fgets(FILE *file) FAST_FUNC RETURNS_MALLOC;
 /* Chops off '\n' from the end, unlike fgets: */
 extern char *xmalloc_fgetline(FILE *file) FAST_FUNC RETURNS_MALLOC;
+/* Same as xmalloc_fgetline but returns number of bytes read: */
+extern char* xmalloc_fgetline_n(FILE *file, size_t *n) FAST_FUNC 
RETURNS_MALLOC;
 /* Same, but doesn't try to conserve space (may have some slack after the end) 
*/
 /* extern char *xmalloc_fgetline_fast(FILE *file) FAST_FUNC RETURNS_MALLOC; */
 
diff --git a/libbb/Config.src b/libbb/Config.src
index b980f19a9..7a37929d6 100644
--- a/libbb/Config.src
+++ b/libbb/Config.src
@@ -147,6 +147,17 @@ config MONOTONIC_SYSCALL
        will be used instead (which gives wrong results if date/time
        is reset).
 
+config FEATURE_USE_GETLINE
+       bool "Use getline library function"
+       default y
+       help
+       When enabled, busybox will use the libc getline() function
+       instead of getc loops to read data from files.
+       Using getline provides better behavior when utilities
+       encounter NUL bytes in their inputs.
+       getline() was originally a GNU extension, but now is a part
+       of POSIX.1-2008 and is implemented in contemporary libcs.
+
 config IOCTL_HEX2STR_ERROR
        bool "Use ioctl names rather than hex values in error messages"
        default y
diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c
index 903ff1fb6..096bbd66f 100644
--- a/libbb/get_line_from_file.c
+++ b/libbb/get_line_from_file.c
@@ -12,9 +12,9 @@
 
 char* FAST_FUNC bb_get_chunk_from_file(FILE *file, size_t *end)
 {
-       int ch;
-       size_t idx = 0;
        char *linebuf = NULL;
+       size_t idx = 0;
+       int ch;
 
        while ((ch = getc(file)) != EOF) {
                /* grow the line buffer as necessary */
@@ -55,10 +55,37 @@ char* FAST_FUNC xmalloc_fgets(FILE *file)
 char* FAST_FUNC xmalloc_fgetline(FILE *file)
 {
        size_t i;
-       char *c = bb_get_chunk_from_file(file, &i);
 
-       if (i && c[--i] == '\n')
-               c[i] = '\0';
+       return xmalloc_fgetline_n(file, &i);
+}
+
+/* Get line.  Remove trailing \n.  Return the number of bytes read. */
+char* FAST_FUNC xmalloc_fgetline_n(FILE *file, size_t *n)
+{
+       char *c = NULL;
+#if ENABLE_FEATURE_USE_GETLINE
+       size_t idx = 0;
+       ssize_t nread;
+
+       nread = getline(&c, &idx, file);
+       if (nread < 0) {
+               if (errno == ENOMEM)
+                       bb_die_memory_exhausted();
+               free(c);
+               c = NULL;
+               *n = 0;
+       } else {
+               *n = nread;
+       }
+#else
+       c = bb_get_chunk_from_file(file, n);
+#endif
+
+       if (*n) {
+               if (c[*n - 1] == '\n') {
+                       c[--*n] = '\0';
+               }
+       }
 
        return c;
 }
diff --git a/testsuite/cut.tests b/testsuite/cut.tests
index 2458c019c..0fd731688 100755
--- a/testsuite/cut.tests
+++ b/testsuite/cut.tests
@@ -81,4 +81,6 @@ SKIP=
 testing "cut empty field" "cut -d ':' -f 1-3" "a::b\n" "" "a::b\n"
 testing "cut empty field 2" "cut -d ':' -f 3-5" "b::c\n" "" "a::b::c:d\n"
 
+testing "cut with embedded NUL" "cut -c 1-3" "\x00\x01\x00\n" "" 
"\x00\x01\x00\x03\x04\x05\x06"
+
 exit $FAILCOUNT
-- 
2.34.1

_______________________________________________
busybox mailing list
[email protected]
http://lists.busybox.net/mailman/listinfo/busybox

Reply via email to