Changes from v1:
 - Fallback to BUFSIZ if fstat fails instead of exiting.
 - Add a comment block to explain why the blksize is used in file_lines()
   (which I hope also clarifies why it is not needed elsewhere).
 - Only use blksize if it is > BUFSIZ (I had regressed to 4K reads, oops).
 - Update comments to reference bufsize instead of BUFSIZ.
 - Update subject because /proc is not affected, and it isn't limited
   to follow mode.
 - Add a test.

* src/tail.c (file_lines): Consider a full blocksize when
searching backwards to find sysfs file content on systems
with large page sizes.
* tests/tail/tail-1-sysfs.sh: Add a new test.
* tests/local.mk: Reference the new test.
Fixes https://bugs.gnu.org/67490
---
 src/tail.c                 | 41 +++++++++++++++++++++++++++-----------
 tests/local.mk             |  1 +
 tests/tail/tail-1-sysfs.sh | 31 ++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+), 12 deletions(-)
 create mode 100755 tests/tail/tail-1-sysfs.sh

diff --git a/src/tail.c b/src/tail.c
index c45f3b65a..f3219757d 100644
--- a/src/tail.c
+++ b/src/tail.c
@@ -518,19 +518,32 @@ static bool
 file_lines (char const *pretty_filename, int fd, uintmax_t n_lines,
             off_t start_pos, off_t end_pos, uintmax_t *read_pos)
 {
-  char buffer[BUFSIZ];
+  char *buffer;
   size_t bytes_read;
+  blksize_t bufsize = BUFSIZ;
   off_t pos = end_pos;
+  bool ok = true;
+  struct stat stats;
 
   if (n_lines == 0)
     return true;
 
+  if (fstat (fd, &stats) == 0)
+    /* sysfs files have a blksize == kernel page size, which can be > BUFSIZ.
+       lseek SEEK_END moves to the next page, which is likely more than
+       BUFSIZ away from the end of the actual data. Use a buffer large enough
+       to hold an entire page, so that the window we search for data is large
+       enough to find it.  */
+    bufsize = MAX(BUFSIZ, ST_BLKSIZE (stats));
+
+  buffer = xmalloc (bufsize);
+
   /* Set 'bytes_read' to the size of the last, probably partial, buffer;
-     0 < 'bytes_read' <= 'BUFSIZ'.  */
-  bytes_read = (pos - start_pos) % BUFSIZ;
+     0 < 'bytes_read' <= 'bufsize'.  */
+  bytes_read = (pos - start_pos) % bufsize;
   if (bytes_read == 0)
-    bytes_read = BUFSIZ;
-  /* Make 'pos' a multiple of 'BUFSIZ' (0 if the file is short), so that all
+    bytes_read = bufsize;
+  /* Make 'pos' a multiple of 'bufsize' (0 if the file is short), so that all
      reads will be on block boundaries, which might increase efficiency.  */
   pos -= bytes_read;
   xlseek (fd, pos, SEEK_SET, pretty_filename);
@@ -538,7 +551,8 @@ file_lines (char const *pretty_filename, int fd, uintmax_t 
n_lines,
   if (bytes_read == SAFE_READ_ERROR)
     {
       error (0, errno, _("error reading %s"), quoteaf (pretty_filename));
-      return false;
+      ok = false;
+      goto free_buffer;
     }
   *read_pos = pos + bytes_read;
 
@@ -565,7 +579,7 @@ file_lines (char const *pretty_filename, int fd, uintmax_t 
n_lines,
               xwrite_stdout (nl + 1, bytes_read - (n + 1));
               *read_pos += dump_remainder (false, pretty_filename, fd,
                                            end_pos - (pos + bytes_read));
-              return true;
+              goto free_buffer;
             }
         }
 
@@ -577,23 +591,26 @@ file_lines (char const *pretty_filename, int fd, 
uintmax_t n_lines,
           xlseek (fd, start_pos, SEEK_SET, pretty_filename);
           *read_pos = start_pos + dump_remainder (false, pretty_filename, fd,
                                                   end_pos);
-          return true;
+          goto free_buffer;
         }
-      pos -= BUFSIZ;
+      pos -= bufsize;
       xlseek (fd, pos, SEEK_SET, pretty_filename);
 
-      bytes_read = safe_read (fd, buffer, BUFSIZ);
+      bytes_read = safe_read (fd, buffer, bufsize);
       if (bytes_read == SAFE_READ_ERROR)
         {
           error (0, errno, _("error reading %s"), quoteaf (pretty_filename));
-          return false;
+          ok = false;
+          goto free_buffer;
         }
 
       *read_pos = pos + bytes_read;
     }
   while (bytes_read > 0);
 
-  return true;
+free_buffer:
+  free (buffer);
+  return ok;
 }
 
 /* Print the last N_LINES lines from the end of the standard input,
diff --git a/tests/local.mk b/tests/local.mk
index a5fb62d96..af41337b1 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -257,6 +257,7 @@ all_tests =                                 \
   tests/seq/seq-precision.sh                   \
   tests/head/head.pl                           \
   tests/head/head-elide-tail.pl                        \
+  tests/tail/tail-1-sysfs.sh                   \
   tests/tail/tail-n0f.sh                       \
   tests/ls/ls-misc.pl                          \
   tests/date/date.pl                           \
diff --git a/tests/tail/tail-1-sysfs.sh b/tests/tail/tail-1-sysfs.sh
new file mode 100755
index 000000000..078366588
--- /dev/null
+++ b/tests/tail/tail-1-sysfs.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# sysfs files have weird properties that can be influenced by page size
+
+# Copyright 2023 Free Software Foundation, Inc.
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+. "${srcdir=.}/tests/init.sh"; path_prepend_ ./src
+print_ver_ tail
+
+file=/sys/kernel/profiling
+
+if test -r $file; then
+  cp -f $file exp || framework_failure_
+  tail -1 $file > out || fail=1
+
+  compare exp out || fail=1
+fi
+
+Exit $fail
-- 
2.43.0




Reply via email to