Some context for the attached patch, since this discussion was ~2
months ago. I made changes to getdelim/getline in glibc to null
terminate the buffer upon reading EOF [1]. At the time, this felt more
compliant with POSIX's description and protected callers from using
uninitialized memory when the given file stream was empty.
Eric Blake alerted us that this broke a program which got the last line
of 'du' like this:
while (getline (&line, &len, fp) != -1)
;
/* Process LINE. */
The old glibc behavior would store the last line in LINE after the
loop. The new behavior would store a NUL in the first byte after the
loop.
I did some testing and these platforms would have the last line in LINE
after the loop [3]:
1. Fedora 43 (glibc 2.42)
2. AIX 7.1.1 and AIX 7.3.3
3. Alpine 3.19.8 (musl 1.2.4_git20230717)
4. OpenBSD 7.7
These platforms would have a NUL in the first byte after the loop:
1. MacOS 12.6
2. Solaris 11
3. FreeBSD 14.3
POSIX made this behavior a bit more clearly undefined (defined as
undefined?), so that both implementations are obviously conferment
[4]. Specifically, by adding this line:
If the return value is -1, the contents of *lineptr are
indeterminate.
In glibc, we decided to null terminate the buffer only after
getdelim/getline does the initial allocation. That is, it protects you
from undefined behavior when FP is an empty stream here:
char *line = NULL;
size_t len = 0;
ssize_t result = getline (&line, &len, fp);
/* LINE is safe to use. */
It does not protect you from undefined behavior when FP is an empty
stream here:
char *line = malloc (1);
size_t len = 1;
ssize_t result = getline (&line, &len, fp);
/* LINE is uninitialized if FP is empty. */
Most of the time getdelim is used like in the first example, so
protecting the caller is nice there. It also does not break applications
which rely on the existing glibc behavior, i.e., being able to use the
last line of the file after getdelim returns -1.
I've attached a (hopefully) mostly complete patch. It has been tested on
glibc 2.42 and glibc 2.43, and works as expected for both. That is, the
functions are replaced on glibc 2.42 and not on glibc 2.43.
The one part I am not sure about is how to document this portability
mess in the manual. Can we just say that only glibc 2.43 and later have
this newly invented behavior? Or should the other two behaviors that
might cause problems be documented as well?
Collin
[1]
https://inbox.sourceware.org/libc-alpha/c66b228db6ee9c6ab54f3580c2d68ac39707aa59.1759979441.git.collin.fu...@gmail.com/
[2] https://lists.gnu.org/archive/html/bug-gnulib/2025-10/msg00096.html
[3] https://inbox.sourceware.org/libc-alpha/[email protected]/
[4] https://www.austingroupbugs.net/bug_view_page.php?bug_id=1953
diff --git a/ChangeLog b/ChangeLog
index 7b4a86c977..1d2540006b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2026-01-21 Collin Funk <[email protected]>
+
+ getdelim, getline: match the behavior of glibc 2.43.
+ * lib/getdelim.c (getdelim): When the initial buffer is allocated by
+ getdelim, null terminate it. Exit immediately upon EOF.
+ * m4/getdelim.m4 (gl_FUNC_GETDELIM): Check for the glibc 2.43 behavior.
+ * m4/getline.m4 (gl_FUNC_GETLINE): Likewise.
+ * tests/test-getdelim.c (main): Check that calling getdelim at EOF will
+ not prevent us from using the previously read line. Check that the
+ buffer is null terminated when getdelim allocates it.
+ * tests/test-getline.c (main): Likewise.
+
2026-01-20 Bruno Haible <[email protected]>
vasnprintf: Optimize when 'long double' is the same as 'double'.
diff --git a/lib/getdelim.c b/lib/getdelim.c
index 21f3abc294..d506c08738 100644
--- a/lib/getdelim.c
+++ b/lib/getdelim.c
@@ -89,9 +89,16 @@ getdelim (char **lineptr, size_t *n, int delimiter, FILE *fp)
result = -1;
goto unlock_return;
}
+ new_lineptr[0] = '\0';
*lineptr = new_lineptr;
}
+ if (feof (fp))
+ {
+ result = -1;
+ goto unlock_return;
+ }
+
{
size_t cur_len = 0;
for (;;)
diff --git a/m4/getdelim.m4 b/m4/getdelim.m4
index 8b6eff47aa..cdadd0cce0 100644
--- a/m4/getdelim.m4
+++ b/m4/getdelim.m4
@@ -1,5 +1,5 @@
# getdelim.m4
-# serial 21
+# serial 22
dnl Copyright (C) 2005-2007, 2009-2026 Free Software Foundation, Inc.
dnl
@@ -37,6 +37,7 @@ AC_DEFUN([gl_FUNC_GETDELIM]
gl_cv_func_working_getdelim=no ;;
*)
echo fooNbarN | tr -d '\012' | tr N '\012' > conftest.data
+ echo a > conftest.oneline
touch conftest.empty
AC_RUN_IFELSE([AC_LANG_SOURCE([[
# include <stdio.h>
@@ -69,19 +70,38 @@ AC_DEFUN([gl_FUNC_GETDELIM]
}
fclose (in);
{
- /* Test that reading EOF as the first character sets the first byte
- in the buffer to NUL. This fails on glibc 2.42 and earlier. */
+ /* glibc 2.43 and later null-terminate the buffer immediately after
+ allocation to avoid uninitialized reads on empty files. */
in = fopen ("./conftest.empty", "r");
if (!in)
return 1;
- char *line = malloc (1);
- line[0] = 'A';
- size_t siz = 1;
- if (getdelim (&line, &siz, '\n', in) != -1 || line[0] != '\0')
+ char *line = NULL;
+ size_t siz = 0;
+ if (!(getdelim (&line, &siz, '\n', in) == -1
+ && 0 < siz && line && line[0] == '\0'))
result |= 8;
free (line);
}
fclose (in);
+ {
+ /* Check that we can access the last line after reaching EOF. This
+ behavior is not required by POSIX, but some programs depend on
+ glibc's behavior in this case. See:
+ <https://sourceware.org/PR28038>. */
+ in = fopen ("./conftest.oneline", "r");
+ if (!in)
+ return 1;
+ char *line = NULL;
+ size_t siz = 0;
+ if (!(getdelim (&line, &siz, '\n', in) == 2 && 0 < siz && line
+ && line[0] == 'a' && line[1] == '\n' && line[2] == '\0'))
+ result |= 16;
+ if (!(getdelim (&line, &siz, '\n', in) == -1 && 0 < siz && line
+ && line[0] == 'a' && line[1] == '\n' && line[2] == '\0'))
+ result |= 16;
+ free (line);
+ }
+ fclose (in);
return result;
}
]])],
@@ -97,7 +117,7 @@ AC_DEFUN([gl_FUNC_GETDELIM]
])
;;
esac
- rm -f conftest.data conftest.empty
+ rm -f conftest.data conftest.empty conftest.oneline
])
case "$gl_cv_func_working_getdelim" in
*yes) ;;
diff --git a/m4/getline.m4 b/m4/getline.m4
index ed32fa10bf..b42bcf5f09 100644
--- a/m4/getline.m4
+++ b/m4/getline.m4
@@ -1,5 +1,5 @@
# getline.m4
-# serial 35
+# serial 36
dnl Copyright (C) 1998-2003, 2005-2007, 2009-2026 Free Software Foundation,
dnl Inc.
@@ -31,6 +31,7 @@ AC_DEFUN([gl_FUNC_GETLINE]
AC_CACHE_CHECK([for working getline function],
[am_cv_func_working_getline],
[echo fooNbarN | tr -d '\012' | tr N '\012' > conftest.data
+ echo a > conftest.oneline
touch conftest.empty
AC_RUN_IFELSE([AC_LANG_SOURCE([[
# include <stdio.h>
@@ -63,19 +64,38 @@ AC_DEFUN([gl_FUNC_GETLINE]
}
fclose (in);
{
- /* Test that reading EOF as the first character sets the first byte
- in the buffer to NUL. This fails on glibc 2.42 and earlier. */
+ /* glibc 2.43 and later null-terminate the buffer immediately after
+ allocation to avoid uninitialized reads on empty files. */
in = fopen ("./conftest.empty", "r");
if (!in)
return 1;
- char *line = malloc (1);
- line[0] = 'A';
- size_t siz = 1;
- if (getline (&line, &siz, in) != -1 || line[0] != '\0')
+ char *line = NULL;
+ size_t siz = 0;
+ if (!(getline (&line, &siz, in) == -1
+ && 0 < siz && line && line[0] == '\0'))
result |= 8;
free (line);
}
fclose (in);
+ {
+ /* Check that we can access the last line after reaching EOF. This
+ behavior is not required by POSIX, but some programs depend on
+ glibc's behavior in this case. See:
+ <https://sourceware.org/PR28038>. */
+ in = fopen ("./conftest.oneline", "r");
+ if (!in)
+ return 1;
+ char *line = NULL;
+ size_t siz = 0;
+ if (!(getline (&line, &siz, in) == 2 && 0 < siz && line
+ && line[0] == 'a' && line[1] == '\n' && line[2] == '\0'))
+ result |= 16;
+ if (!(getline (&line, &siz, in) == -1 && 0 < siz && line
+ && line[0] == 'a' && line[1] == '\n' && line[2] == '\0'))
+ result |= 16;
+ free (line);
+ }
+ fclose (in);
return result;
}
]])],
diff --git a/tests/test-getdelim.c b/tests/test-getdelim.c
index d472285c5e..fc0387f485 100644
--- a/tests/test-getdelim.c
+++ b/tests/test-getdelim.c
@@ -84,12 +84,23 @@ main (void)
ASSERT (memcmp (line, "d\0f", 4) == 0);
ASSERT (3 < len);
- /* Test that reading an EOF will terminate the buffer with a NUL
- character. */
+ /* Test that reading an EOF will not prevent us from using the
+ previously read characters. */
+ result = getdelim (&line, &len, 'n', f);
+ ASSERT (result == -1);
+ ASSERT (memcmp (line, "d\0f", 4) == 0);
+ ASSERT (3 < len);
+
+ /* glibc 2.43 and later immediately places a NUL character in the newly
+ allocated buffer so the caller doesn't accidentally use uninitialized
+ memory when reading from an empty file. */
+ free (line);
+ line = NULL;
+ len = 0;
result = getdelim (&line, &len, 'n', f);
- ASSERT (0 < len);
- ASSERT (line[0] == '\0');
ASSERT (result == -1);
+ ASSERT (line[0] == '\0');
+ ASSERT (0 < len);
free (line);
fclose (f);
diff --git a/tests/test-getline.c b/tests/test-getline.c
index 8572611d65..2839cac735 100644
--- a/tests/test-getline.c
+++ b/tests/test-getline.c
@@ -84,12 +84,23 @@ main (void)
ASSERT (memcmp (line, "d\0f", 4) == 0);
ASSERT (3 < len);
- /* Test that reading an EOF will terminate the buffer with a NUL
- character. */
+ /* Test that reading an EOF will not prevent us from using the
+ previously read characters. */
+ result = getline (&line, &len, f);
+ ASSERT (result == -1);
+ ASSERT (memcmp (line, "d\0f", 4) == 0);
+ ASSERT (3 < len);
+
+ /* glibc 2.43 and later immediately places a NUL character in the newly
+ allocated buffer so the caller doesn't accidentally use uninitialized
+ memory when reading from an empty file. */
+ free (line);
+ line = NULL;
+ len = 0;
result = getline (&line, &len, f);
- ASSERT (0 < len);
- ASSERT (line[0] == '\0');
ASSERT (result == -1);
+ ASSERT (line[0] == '\0');
+ ASSERT (0 < len);
free (line);
fclose (f);