bug#34672: [RFC] id --numeric
Hi Bernhard, Bernhard Voelker writes: > Thanks for the patch. > > What is the use case? I mean, if one wants only the numeric numbers, > then this is usually in a script for automatic processing, and then > I thinks it's clearer to have uid and group/groups separated: > > $ id -u > 1000 > > $ id -g > 100 > > $ id -G > 100 454 457 480 492 > > There's even a -z option to separate by NULs instead of white space. > > And, when still needing all in the same output, one could filter like e.g.: > > $ id | sed 's/([^)]*)//g' > uid=1000 gid=100 groups=100,454,457,480,492 > > So I'm currently 40:60 to add it. as I said, it doesn't really add anything. It is just an aesthetic shortcut that I find useful when using Linux user namespaces. In that use case the uid->username lookup doesn't make much sense. Regards, Giuseppe
bug#34672: [RFC] id --numeric
Hi, there are cases where I'd like to see only the numeric values in the `id` output. I could get this information separately, but having a new option and just one command simplifies the task. Are you fine with a change like the following? If you have nothing against it, I will clean it up and send it again. Thanks, Giuseppe diff --git a/src/id.c b/src/id.c index d710caaee..e16c9c625 100644 --- a/src/id.c +++ b/src/id.c @@ -59,6 +59,8 @@ static bool ok = true; static bool multiple_users = false; /* If true, output user/group name instead of ID number. -n */ static bool use_name = false; +/* If true, prints only numbers for the IDs. */ +static bool just_numeric = false; /* The real and effective IDs of the user to print. */ static uid_t ruid, euid; @@ -78,6 +80,7 @@ static struct option const longopts[] = {"group", no_argument, NULL, 'g'}, {"groups", no_argument, NULL, 'G'}, {"name", no_argument, NULL, 'n'}, + {"numeric", no_argument, NULL, 'N'}, {"real", no_argument, NULL, 'r'}, {"user", no_argument, NULL, 'u'}, {"zero", no_argument, NULL, 'z'}, @@ -105,6 +108,7 @@ or (when USER omitted) for the current user.\n\ -g, --groupprint only the effective group ID\n\ -G, --groups print all group IDs\n\ -n, --name print a name instead of a number, for -ugG\n\ + -N, --numeric print only numeric values for IDs\n\ -r, --real print the real ID instead of the effective ID, with -ugG\n\ -u, --user print only the effective user ID\n\ -z, --zero delimit entries with NUL characters, not whitespace;\n\ @@ -137,7 +141,7 @@ main (int argc, char **argv) atexit (close_stdout); - while ((optc = getopt_long (argc, argv, "agnruzGZ", longopts, NULL)) != -1) + while ((optc = getopt_long (argc, argv, "agnruzGZN", longopts, NULL)) != -1) { switch (optc) { @@ -178,6 +182,9 @@ main (int argc, char **argv) case 'G': just_group_list = true; break; +case 'N': + just_numeric = true; + break; case_GETOPT_HELP_CHAR; case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS); default: @@ -361,19 +368,19 @@ print_full_info (const char *username) printf (_("uid=%s"), uidtostr (ruid)); pwd = getpwuid (ruid); - if (pwd) + if (!just_numeric && pwd) printf ("(%s)", pwd->pw_name); printf (_(" gid=%s"), gidtostr (rgid)); grp = getgrgid (rgid); - if (grp) + if (!just_numeric && grp) printf ("(%s)", grp->gr_name); if (euid != ruid) { printf (_(" euid=%s"), uidtostr (euid)); pwd = getpwuid (euid); - if (pwd) + if (!just_numeric && pwd) printf ("(%s)", pwd->pw_name); } @@ -381,7 +388,7 @@ print_full_info (const char *username) { printf (_(" egid=%s"), gidtostr (egid)); grp = getgrgid (egid); - if (grp) + if (!just_numeric && grp) printf ("(%s)", grp->gr_name); } @@ -413,9 +420,12 @@ print_full_info (const char *username) if (i > 0) putchar (','); fputs (gidtostr (groups[i]), stdout); -grp = getgrgid (groups[i]); -if (grp) - printf ("(%s)", grp->gr_name); +if (! just_numeric) + { +grp = getgrgid (groups[i]); +if (grp) + printf ("(%s)", grp->gr_name); + } } free (groups); } Regards, Giuseppe
bug#20029: 'yes' surprisingly slow
Pádraig Brady p...@draigbrady.com writes: The attached should make things more efficient here. thanks, Pádraig. From 7959bbf19307705e98f08cfa32a9dcf67672590c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= p...@draigbrady.com Date: Mon, 9 Mar 2015 19:27:32 + Subject: [PATCH] yes: output data more efficiently yes(1) may be used to generate repeating patterns of text for test inputs etc., so adjust to be more efficient. Profiling the case where yes(1) is outputting small items through stdio (which was the default case), shows the overhead continuously processing small items in main() and in stdio: $ yes /dev/null perf top -p $! 31.02% yes [.] main 27.36% libc-2.20.so [.] _IO_file_xsputn@@GLIBC_2.2.5 14.51% libc-2.20.so [.] fputs_unlocked 13.50% libc-2.20.so [.] strlen 10.66% libc-2.20.so [.] __GI___mempcpy 1.98% yes [.] fputs_unlocked@plta Sending more data per stdio call improves the situation, but still, there is significant stdio overhead due to memory copies, and the repeated string length checking: $ yes `echo {1..1000}` /dev/null perf top -p $! 42.26% libc-2.20.so [.] __GI___mempcpy 17.38% libc-2.20.so [.] strlen 5.21% [kernel] [k] __srcu_read_lock 4.58% [kernel] [k] __srcu_read_unlock 4.27% libc-2.20.so [.] _IO_file_xsputn@@GLIBC_2.2.5 2.50% libc-2.20.so [.] __GI___libc_write 2.45% [kernel] [k] system_call 2.40% [kernel] [k] system_call_after_swapgs 2.27% [kernel] [k] vfs_write 2.09% libc-2.20.so [.] _IO_do_write@@GLIBC_2.2.5 2.01% [kernel] [k] fsnotify 1.95% libc-2.20.so [.] _IO_file_write@@GLIBC_2.2.5 1.44% yes [.] main We can avoid all stdio overhead by building up the buffer _once_ and outputting that, and the profile below shows the bottleneck moved to the kernel: $ src/yes /dev/null perf top -p $! 15.42% [kernel] [k] __srcu_read_lock 12.98% [kernel] [k] __srcu_read_unlock 9.41% libc-2.20.so [.] __GI___libc_write 9.11% [kernel] [k] vfs_write 8.35% [kernel] [k] fsnotify 8.02% [kernel] [k] system_call 5.84% [kernel] [k] system_call_after_swapgs 4.54% [kernel] [k] __fget_light 3.98% [kernel] [k] sys_write 3.65% [kernel] [k] selinux_file_permission 3.44% [kernel] [k] rw_verify_area 2.94% [kernel] [k] __fsnotify_parent 2.76% [kernel] [k] security_file_permission 2.39% yes [.] main 2.17% [kernel] [k] __fdget_pos 2.13% [kernel] [k] sysret_check 0.81% [kernel] [k] write_null 0.36% yes [.] write@plt Note this change also ensures that yes will only write complete lines for lines softer than BUFSIZ. * src/yes.c (main): Build up a BUFSIZ buffer of lines, and output that, rather than having stdio process each item. * tests/misc/yes.sh: Add a new test for various buffer sizes. * tests/local.mk: Reference the new test. Fixes http://bugs.gnu.org/20029 --- src/yes.c | 43 +-- tests/local.mk| 1 + tests/misc/yes.sh | 28 3 files changed, 70 insertions(+), 2 deletions(-) create mode 100755 tests/misc/yes.sh diff --git a/src/yes.c b/src/yes.c index b35b13f..91dea11 100644 --- a/src/yes.c +++ b/src/yes.c @@ -58,6 +58,10 @@ Repeatedly output a line with all specified STRING(s), or 'y'.\n\ int main (int argc, char **argv) { + char buf[BUFSIZ]; + char *pbuf = buf; + int i; + initialize_main (argc, argv); set_program_name (argv[0]); setlocale (LC_ALL, ); @@ -77,9 +81,44 @@ main (int argc, char **argv) argv[argc++] = bad_cast (y); } - while (true) + /* Buffer data locally once, rather than having the + large overhead of stdio buffering each item. */ + for (i = optind; i argc; i++) +{ + size_t len = strlen (argv[i]); + if (BUFSIZ len || BUFSIZ - len = pbuf - buf) +break; + memcpy (pbuf, argv[i], len); + pbuf += len; + *pbuf++ = i == argc - 1 ? '\n' : ' '; +} + if (i argc) +pbuf = NULL; since the buffer is partly filled, wouldn't be better to reuse it? Something like this (barely tested): diff --git a/src/yes.c b/src/yes.c index 91dea11..ac690ce 100644 --- a/src/yes.c +++ b/src/yes.c @@ -92,9 +92,7 @@ main (int argc, char **argv) pbuf += len; *pbuf++ = i == argc - 1 ? '\n' : ' '; } - if (i argc) -pbuf = NULL; - else + if (i == argc) { size_t line_len = pbuf - buf; size_t lines = BUFSIZ / line_len; @@ -106,7 +104,7 @@ main (int argc, char **argv) } /* The normal case is to continuously output the local buffer. */ - while (pbuf) + while (i == argc) { if (write
bug#19681: [PATCH] sync: use syncfs(2) if any argument is specified
Pádraig Brady p...@draigbrady.com writes: thanks! Pádraig. Thanks for the review, I've amended the changes you suggested: From b2babc9838b52892e2cdc46bc4590fa852daa0eb Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscri...@redhat.com Date: Sun, 25 Jan 2015 01:33:45 +0100 Subject: [PATCH] sync: add support for fsync(2), fdatasync(2) and syncfs(2) * AUTHORS: Add myself to sync's authors. * NEWS: Mention the new feature. * m4/jm-macros.m4 (coreutils_MACROS): Check for syncfs. * man/sync.x: Add references to syncfs, fsync and fdatasync. * doc/coreutils.texi (sync invocation): Document the new feature. * src/sync.c: Include quote.h. (AUTHORS): Include myself. (MODE_FILE, MODE_DATA, MODE_FILE_SYSTEM, MODE_SYNC): New enum values. (long_options): Define. (sync_arg): New function. (usage): Describe that arguments are now accepted. (main): Add arguments parsing and add support for fsync(2), fdatasync(2) and syncfs(2). --- AUTHORS| 2 +- NEWS | 3 + doc/coreutils.texi | 20 ++- m4/jm-macros.m4| 1 + man/sync.x | 2 +- src/sync.c | 157 + 6 files changed, 170 insertions(+), 15 deletions(-) diff --git a/AUTHORS b/AUTHORS index 0296830..64c11d7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -83,7 +83,7 @@ stat: Michael Meskes stdbuf: Pádraig Brady stty: David MacKenzie sum: Kayvan Aghaiepour, David MacKenzie -sync: Jim Meyering +sync: Jim Meyering, Giuseppe Scrivano tac: Jay Lepreau, David MacKenzie tail: Paul Rubin, David MacKenzie, Ian Lance Taylor, Jim Meyering tee: Mike Parker, Richard M. Stallman, David MacKenzie diff --git a/NEWS b/NEWS index e0a2893..3d4190b 100644 --- a/NEWS +++ b/NEWS @@ -48,6 +48,9 @@ GNU coreutils NEWS-*- outline -*- split accepts a new --separator option to select a record separator character other than the default newline character. + sync no longer ignores arguments and it uses fsync(2), fdatasync(2) + and syncfs(2) synchronization in addition to sync(2). + ** Changes in behavior df no longer suppresses separate exports of the same remote device, as diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 5a3c31a..a1e664b 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -12053,8 +12053,24 @@ crashes, data may be lost or the file system corrupted as a result. The @command{sync} command ensures everything in memory is written to disk. -Any arguments are ignored, except for a lone @option{--help} or -@option{--version} (@pxref{Common options}). +If any argument is specified then only the specified files will be +synchronized. It uses internally the syscall fsync(2) on each of them. + +If at least one file is specified, it is possible to change the +synchronization policy with the following options. Also see +@ref{Common options}. + +@table @samp +@item --data +@opindex --data +Do not synchronize the file metadata unless it is required to maintain +data integrity. It uses the syscall fdatasync(2). + +@item --file-system +@opindex --file-system +Synchronize all the I/O waiting for the file systems that contain the file. +It uses the syscall syncfs(2). +@end table @exitstatus diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4 index 58fdd97..79f124b 100644 --- a/m4/jm-macros.m4 +++ b/m4/jm-macros.m4 @@ -89,6 +89,7 @@ AC_DEFUN([coreutils_MACROS], sethostname siginterrupt sync +syncfs sysctl sysinfo tcgetpgrp diff --git a/man/sync.x b/man/sync.x index 7947bb7..6ced07e 100644 --- a/man/sync.x +++ b/man/sync.x @@ -3,4 +3,4 @@ sync \- flush file system buffers [DESCRIPTION] .\ Add any additional description here [SEE ALSO] -sync(2) +fdatasync(2), fsync(2), sync(2), syncfs(2) diff --git a/src/sync.c b/src/sync.c index e9f4d7e..6c4d571 100644 --- a/src/sync.c +++ b/src/sync.c @@ -23,12 +23,31 @@ #include system.h #include error.h -#include long-options.h +#include quote.h /* The official name of this program (e.g., no 'g' prefix). */ #define PROGRAM_NAME sync -#define AUTHORS proper_name (Jim Meyering) +#define AUTHORS \ + proper_name (Jim Meyering), \ + proper_name (Giuseppe Scrivano) + +enum +{ + MODE_FILE, + MODE_DATA, + MODE_FILE_SYSTEM, + MODE_SYNC +}; + +static struct option const long_options[] = +{ + {data, no_argument, NULL, MODE_DATA}, + {file-system, no_argument, NULL, MODE_FILE_SYSTEM}, + {GETOPT_HELP_OPTION_DECL}, + {GETOPT_VERSION_OPTION_DECL}, + {NULL, 0, NULL, 0} +}; void usage (int status) @@ -37,11 +56,21 @@ usage (int status) emit_try_help (); else { - printf (_(Usage: %s [OPTION]\n), program_name); + printf (_(Usage: %s [OPTION] [PATH]...\n), program_name); fputs (_(\ Force changed blocks to disk, update the super block.\n\ \n\ +If one or more file paths are specified, sync only them,\n\ +use --data and --file-system to change the default behavior\n
bug#19681: [PATCH] sync: use syncfs(2) if any argument is specified
Pádraig Brady p...@draigbrady.com writes: On 25/01/15 18:05, Bernhard Voelker wrote: On 01/25/2015 06:41 PM, Pádraig Brady wrote: So we have: fdatasync fsync syncfs sync referring to:: file data, file data + metadata, file system, all file systems [...] I'd be incline to go with the _what_ interface above. Either way, I think it's important to document sync is falling back to the bigger hammer if the smaller failed. ... or shouldn't do sync this? It should fall back where possible. Now there is a difference between the file and file system(s) interfaces in that the former can return EIO error for example, while the latter are specified to always return success. You wouldn't fall back to a syncfs() if an fsync() gave an EIO for example. Also gnulib guarantees that fsync() and fdatasync() are available, so I wouldn't fallback from file - file system interfaces, nor between file interfaces. one risk here is when multiple arguments are specified and the fsync will return EIO more than once, we will fallback to syncfs multiple times. Couldn't in this case a single sync be a better choice? Regards, Giuseppe
bug#19681: [PATCH] sync: use syncfs(2) if any argument is specified
Pádraig Brady p...@draigbrady.com writes: On 26/01/15 08:36, Giuseppe Scrivano wrote: Pádraig Brady p...@draigbrady.com writes: On 25/01/15 18:05, Bernhard Voelker wrote: On 01/25/2015 06:41 PM, Pádraig Brady wrote: So we have: fdatasync fsync syncfs sync referring to:: file data, file data + metadata, file system, all file systems [...] I'd be incline to go with the _what_ interface above. Either way, I think it's important to document sync is falling back to the bigger hammer if the smaller failed. ... or shouldn't do sync this? It should fall back where possible. Now there is a difference between the file and file system(s) interfaces in that the former can return EIO error for example, while the latter are specified to always return success. You wouldn't fall back to a syncfs() if an fsync() gave an EIO for example. Also gnulib guarantees that fsync() and fdatasync() are available, so I wouldn't fallback from file - file system interfaces, nor between file interfaces. one risk here is when multiple arguments are specified and the fsync will return EIO more than once, we will fallback to syncfs multiple times. Couldn't in this case a single sync be a better choice? I was saying we shouldn't fall back from fsync() to syncfs(). Just process each argument. Diagnose any errors and EXIT_FAILURE if there was any error? sorry for misunderstanding that. I've worked out a new version that includes these suggestions, also since now the user can explicitly ask for the sync mechanism to use, I agree with you and we should raise an error if something goes wrong. Since sync is completely different now, I took the freedom to add myself to the AUTHORS, feel free to drop this part if you disagree. Regards, Giuseppe From 0dbc5ce9c78bc97ec5a678803270767ad9980618 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscri...@redhat.com Date: Sun, 25 Jan 2015 01:33:45 +0100 Subject: [PATCH] sync: add support for fsync(2), fdatasync(2) and syncfs(2) * AUTHORS: Add myself to sync's authors. * NEWS: Mention the new feature. * m4/jm-macros.m4 (coreutils_MACROS): Check for syncfs. * doc/coreutils.texi (sync invocation): Document the new feature. * src/sync.c: Include quote.h. (AUTHORS): Include myself. (MODE_FILE, MODE_FILE_DATA, MODE_FILE_SYSTEM): New enum values. (long_options): Define. (usage): Describe that arguments are now accepted. (main): Add arguments parsing and add support for fsync(2), fdatasync(2) and syncfs(2). --- AUTHORS| 2 +- NEWS | 3 ++ doc/coreutils.texi | 20 - m4/jm-macros.m4| 1 + src/sync.c | 116 + 5 files changed, 131 insertions(+), 11 deletions(-) diff --git a/AUTHORS b/AUTHORS index 0296830..64c11d7 100644 --- a/AUTHORS +++ b/AUTHORS @@ -83,7 +83,7 @@ stat: Michael Meskes stdbuf: Pádraig Brady stty: David MacKenzie sum: Kayvan Aghaiepour, David MacKenzie -sync: Jim Meyering +sync: Jim Meyering, Giuseppe Scrivano tac: Jay Lepreau, David MacKenzie tail: Paul Rubin, David MacKenzie, Ian Lance Taylor, Jim Meyering tee: Mike Parker, Richard M. Stallman, David MacKenzie diff --git a/NEWS b/NEWS index e0a2893..3d4190b 100644 --- a/NEWS +++ b/NEWS @@ -48,6 +48,9 @@ GNU coreutils NEWS-*- outline -*- split accepts a new --separator option to select a record separator character other than the default newline character. + sync no longer ignores arguments and it uses fsync(2), fdatasync(2) + and syncfs(2) synchronization in addition to sync(2). + ** Changes in behavior df no longer suppresses separate exports of the same remote device, as diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 5a3c31a..c99b8ed 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -12053,8 +12053,24 @@ crashes, data may be lost or the file system corrupted as a result. The @command{sync} command ensures everything in memory is written to disk. -Any arguments are ignored, except for a lone @option{--help} or -@option{--version} (@pxref{Common options}). +If any argument is specified then only the specified paths will be +synchronized. It uses internally the syscall fsync(2) on each of them. + +If at least one path is specified, it is possible to change the +synchronization policy with the following options. Also see +@ref{Common options}. + +@table @samp +@item --data +@opindex --data +Do not synchronize the file metadata unless it is required to maintain +data integrity. It uses the syscall fdatasync(2). + +@item --file-system +@opindex --file-system +Synchronize all the I/O waiting for the file system that contains the path. +It uses the syscall syncfs(2). +@end table @exitstatus diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4 index 58fdd97..79f124b 100644 --- a/m4/jm-macros.m4 +++ b/m4/jm-macros.m4 @@ -89,6 +89,7 @@ AC_DEFUN([coreutils_MACROS], sethostname siginterrupt sync
bug#19681: [PATCH] sync: use syncfs(2) if any argument is specified
* configure.ac: Check if syncfs(2) is available. * NEWS: Mention the new feature. * doc/coreutils.texi (sync invocation): Document the new feature. * src/sync.c (usage): Describe that arguments are now accepted. (main): Use syncfs(2) to flush buffers for the file system which contain the specified arguments. Silently fallback to sync(2) on errors. --- I've amended the suggested fixes in this version. NEWS | 3 +++ configure.ac | 2 ++ doc/coreutils.texi | 7 ++- src/sync.c | 37 +++-- 4 files changed, 46 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index e0a2893..611fde6 100644 --- a/NEWS +++ b/NEWS @@ -48,6 +48,9 @@ GNU coreutils NEWS-*- outline -*- split accepts a new --separator option to select a record separator character other than the default newline character. + sync no longer ignores arguments, and now uses syncfs(2) to sync + the file systems associated with each specified path. + ** Changes in behavior df no longer suppresses separate exports of the same remote device, as diff --git a/configure.ac b/configure.ac index 3918f43..8fcfec9 100644 --- a/configure.ac +++ b/configure.ac @@ -328,6 +328,8 @@ if test $ac_cv_func_syslog = no; then done fi +AC_CHECK_FUNCS([syncfs]) + AC_CACHE_CHECK([for 3-argument setpriority function], [utils_cv_func_setpriority], [AC_LINK_IFELSE( diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 5a3c31a..6cc7414 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -12053,6 +12053,10 @@ crashes, data may be lost or the file system corrupted as a result. The @command{sync} command ensures everything in memory is written to disk. +If any argument is specified and the system supports the synfcs(2) +syscall, then only the file systems containing these paths will be +synchronized. If multiple paths point to the same file system, the +syncfs(2) syscall will be invoked for each one of them. Any arguments are ignored, except for a lone @option{--help} or @option{--version} (@pxref{Common options}). @@ -12081,7 +12085,8 @@ If a @var{file} is larger than the specified size, the extra data is lost. If a @var{file} is shorter, it is extended and the extended part (or hole) reads as zero bytes. -The program accepts the following options. Also see @ref{Common options}. +The only options are a lone @option{--help} or @option{--version}. +@xref{Common options}. @table @samp diff --git a/src/sync.c b/src/sync.c index e9f4d7e..c160e54 100644 --- a/src/sync.c +++ b/src/sync.c @@ -37,10 +37,13 @@ usage (int status) emit_try_help (); else { - printf (_(Usage: %s [OPTION]\n), program_name); + printf (_(Usage: %s [OPTION] [PATH]...\n), program_name); fputs (_(\ Force changed blocks to disk, update the super block.\n\ \n\ +If one or more file paths are specified, update only the\n\ +file-systems which contain those files.\n\ +\n\ ), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); @@ -52,6 +55,7 @@ Force changed blocks to disk, update the super block.\n\ int main (int argc, char **argv) { + bool args_specified; initialize_main (argc, argv); set_program_name (argv[0]); setlocale (LC_ALL, ); @@ -65,7 +69,36 @@ main (int argc, char **argv) if (getopt_long (argc, argv, , NULL, NULL) != -1) usage (EXIT_FAILURE); - if (optind argc) + args_specified = optind argc; + +#if HAVE_SYNCFS + /* If arguments are specified, use syncfs on each of them. + On any error, silently fallback to sync. */ + if (args_specified) +{ + while (optind argc) +{ + int fd = open (argv[optind], O_RDONLY); + if (fd 0) +goto sync; + + if (syncfs (fd) 0) +{ + close (fd); + goto sync; +} + + if (close (fd) 0) +goto sync; + + optind++; +} + return EXIT_SUCCESS; +} +#endif + +sync: + if (args_specified) error (0, 0, _(ignoring all arguments)); sync (); -- 2.1.0
bug#19681: [PATCH] sync: use syncfs(2) if any argument is specified
Paul Eggert egg...@cs.ucla.edu writes: If we're adding this sort of option, shouldn't we also give users the ability to invoke fsync and fdatasync on a single file, as opposed to syncfs on an entire file system? Good point. Should we instead add something like --file-system and --data-only, respectively for syncfs and fdatasync and use fsync if no option is specified? Giuseppe
bug#19681: [PATCH] sync: use syncfs(2) if any argument is specified
* configure.ac: Check if syncfs(2) is available. * NEWS: Mention the new feature. * doc/coreutils.texi (sync invocation): Document the new feature. * src/sync.c (usage): Describe that arguments are now accepted. (main): Use syncfs(2) to flush buffers for the file system which contain the specified arguments. Silently fallback to sync(2) on errors. --- NEWS | 3 +++ configure.ac | 2 ++ doc/coreutils.texi | 7 ++- src/sync.c | 31 +-- 4 files changed, 40 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index e0a2893..42bd02f 100644 --- a/NEWS +++ b/NEWS @@ -48,6 +48,9 @@ GNU coreutils NEWS-*- outline -*- split accepts a new --separator option to select a record separator character other than the default newline character. + sync accepts arguments, and if any is specified use syncfs(2) to + flush the buffers for the file systems which cointain these paths. + ** Changes in behavior df no longer suppresses separate exports of the same remote device, as diff --git a/configure.ac b/configure.ac index 3918f43..8fcfec9 100644 --- a/configure.ac +++ b/configure.ac @@ -328,6 +328,8 @@ if test $ac_cv_func_syslog = no; then done fi +AC_CHECK_FUNCS([syncfs]) + AC_CACHE_CHECK([for 3-argument setpriority function], [utils_cv_func_setpriority], [AC_LINK_IFELSE( diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 5a3c31a..6cc7414 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -12053,6 +12053,10 @@ crashes, data may be lost or the file system corrupted as a result. The @command{sync} command ensures everything in memory is written to disk. +If any argument is specified and the system supports the synfcs(2) +syscall, then only the file systems containing these paths will be +synchronized. If multiple paths point to the same file system, the +syncfs(2) syscall will be invoked for each one of them. Any arguments are ignored, except for a lone @option{--help} or @option{--version} (@pxref{Common options}). @@ -12081,7 +12085,8 @@ If a @var{file} is larger than the specified size, the extra data is lost. If a @var{file} is shorter, it is extended and the extended part (or hole) reads as zero bytes. -The program accepts the following options. Also see @ref{Common options}. +The only options are a lone @option{--help} or @option{--version}. +@xref{Common options}. @table @samp diff --git a/src/sync.c b/src/sync.c index e9f4d7e..940836e 100644 --- a/src/sync.c +++ b/src/sync.c @@ -37,10 +37,13 @@ usage (int status) emit_try_help (); else { - printf (_(Usage: %s [OPTION]\n), program_name); + printf (_(Usage: %s [OPTION] [PATH]...\n), program_name); fputs (_(\ Force changed blocks to disk, update the super block.\n\ \n\ +If one or more file paths are specified, update only the\n\ +file-systems which contain those files.\n\ +\n\ ), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); fputs (VERSION_OPTION_DESCRIPTION, stdout); @@ -65,9 +68,33 @@ main (int argc, char **argv) if (getopt_long (argc, argv, , NULL, NULL) != -1) usage (EXIT_FAILURE); +#if HAVE_SYNCFS + /* If arguments are specified, use syncfs on any of them. + On any error, silently fallback to sync. */ if (optind argc) -error (0, 0, _(ignoring all arguments)); +{ + while (optind argc) +{ + int fd = open (argv[optind], O_RDONLY); + if (fd 0) +goto sync; + + if (syncfs (fd) 0) +{ + close (fd); + goto sync; +} + + if (close (fd) 0) +goto sync; + + optind++; +} + return EXIT_SUCCESS; +} +#endif +sync: sync (); return EXIT_SUCCESS; } -- 2.1.0
bug#5804: sort --human
sort supports --human-numeric-sort since coreutils 7.5. Cheers, Giuseppe Phil Dumont p...@solidstatescientific.com writes: It sure would be nice if the sort command had a --human option: du -s --human * | sort --human phil
Re: coreutils patch to multithread md5sum for parallel hashing (ala the HP-UX days)
A similar patch was rejected some months ago: http://lists.gnu.org/archive/html/bug-coreutils/2009-10/msg00143.html As solution, Pádraig suggests to use find(1). You can take advantage of the new utility nproc(1), distributed with recent coreutils versions to get the number of processing units available on your system. Cheers, Giuseppe Brett L. Trotter br...@silcon.com writes: Hello, this is my first post to the list, so I'll say in advance here I'm pleased to meet you all. I've been out of C/C++ land for a while due to the economy, but found myself hashing a bunch of 46GB blu ray images and discs for verification lately and wanted a simple way to cut down the time involved without starting separate terminals, running screen, etc. HP-UX's md5sum had/has(?) a -n option for parallelizing the hashing. I did a quick implementation today, and it's probably nothing like the sort of code you folks write and likely can be optimized quite a bit, but I was sincerely hoping that the feature could make it into coreutils, either based on my code or someone else's. It's a patch against the version in coreutils-5.97-23.el5_4.2.src.rpm on RHEL 5.4. It's been tested lightly, shows a performance -decrease- for small numbers of small files, but in increase for larger files or larger numbers of files. I haven't yet gotten around to making the ptach apply to the makefile.am, so I was manually adding -lpthread to the link lines for the *sum programs in the generated makefile. Again, this is not anywhere near a production ready patch- and I'm aware that output ordering will be potentially out of order when N 1 is used, but I'd love any thoughts, improvements, or reasons why md5sum shouldn't be able to parallel process like the old days. -Brett
Re: echo -n?
Alfred M. Szmidt a...@gnu.org writes: A friend came up with this hack `echo -n\ '; note the space. Which is a bit of a cheat. And `echo -e --\\bn', which alas is not POSIXly. POSIXly correct or not, '--\bn' is not the same as '-n'. Giuseppe
Re: echo -n?
Hey Alfred, you can get it using: /bin/echo -e \\x2Dn Cheers, Giuseppe Alfred M. Szmidt a...@gnu.org writes: Here is a fun one, how does one output `-n' (literal string) (or any other option that echo accepts) using echo? $ /bin/echo --version|head -n1 echo (GNU coreutils) 8.4 $ /bin/echo -- -n -- -n $ /bin/echo - -n - -n $ /bin/echo '-n' $ /bin/echo -n $
Re: Usage of !! , in Unix
it is a bash feature, you can find more information using `info bash'. Anyway, the proper place for bash related questions is the bug-b...@gnu.org mailing list. Cheers, Giuseppe Sameer Kumbhare smrkumbh...@gmail.com writes: Hi, I dont know whether to call this a bug, or a what. But. whenever I go n press !! , the previous command I provided gets enacted. Can u specify the reason please, or is it that it has been puposefully made. If it is purposefully made, I would also suggest to disallow saving of filenames containing such symbols ( !! , * ,etc)... Thanking you in advance. Yours faithfully, Sameer Kumbhare
Re: tee|tee|tee
jida...@jidanni.org writes: The tee(1) documents fail to say what happens when tee is given no arguments. Do say what is going on in $ echo o|tee|tee|tee The `tee' command copies standard input to standard output and also to any files given as arguments. it looks quite clear to me, if you don't specify any file then stdin is copied only to stdout. Cheers, Giuseppe
Re: I: coreutils tests/misc/nproc-avail fails on GNU/Linux without /proc and /sys mounted
when --all fails for any reason, I think we should try with the number of available processing units, at least it is a more accurate value than return 1 (and document this behaviour). Bruno, Jim, what do you think? Cheers, Giuseppe Dmitry V. Levin l...@altlinux.org writes: Hi, The recently introduced nproc utility outputs wrong result when run in --all mode inside a /proc-less /sys-less GNU/Linux chroot on a system with several CPUs. In this environment, nproc --all always outputs 1 while plain nproc outputs correct number of available CPUs. The underlying num_processors() function from gnulib relies on sysconf (_SC_NPROCESSORS_CONF) which always return 1 when neither /sys nor /proc is mounted. I'm not sure whether this limitation is unavoidable (and have to be documented) or it should be treated as a bug. In the first case, tests/misc/nproc-avail also needs to be adjusted to call skip_test_ when the OS is GNU/Linux and neither /sys nor /proc is mounted.
Re: tail aborts while following by name if using inotify
Hello Jim, Jim Meyering j...@meyering.net writes: Rob Wortman wrote: I have noticed a behavioral quirk in versions of tail which use inotify. When following a file by name (using tail -F or tail --follow=name), tail will abort when a file returns after being renamed. I see that this bug was addressed in coreutils-8.1, but I still find the behavior in coreutils-8.2. Here's a first step: add a test to exercise the losing code. The final cat out isn't required, but I found it handy while writing the test, so left it in. this patch makes your test case pass successfully. It must be applied after the three patches you have sent in another thread. Cheers, Giuseppe From e249f9ab639d318d709eed722b57bc232a7657c1 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 29 Dec 2009 14:59:24 +0100 Subject: [PATCH] tail: remove `fdspec' from the hash table before change its key * src/tail.c (tail_forever_inotify): Avoid to modify fdspec-wd until it is contained in the wd_to_name hash table with a different key value. Once it is removed, it can be added using the new `wd' as key for the hash table. --- src/tail.c |9 ++--- 1 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/tail.c b/src/tail.c index 8e6c8ac..83b4253 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1474,15 +1474,18 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, continue; /* It's fine to add the same file more than once. */ - f[j].wd = inotify_add_watch (wd, f[j].name, inotify_wd_mask); - - if (f[j].wd 0) + int new_wd = inotify_add_watch (wd, f[j].name, inotify_wd_mask); + if (new_wd 0) { error (0, errno, _(cannot watch %s), quote (f[j].name)); continue; } fspec = (f[j]); + + /* Remove `fspec' and re-add it using `new_fd' as its key. */ + hash_delete (wd_to_name, fspec); + f[j].wd = new_wd; if (hash_insert (wd_to_name, fspec) == NULL) xalloc_die (); -- 1.6.5.7
Re: tail aborts while following by name if using inotify
Hi Jim, Jim Meyering j...@meyering.net writes: However, while looking at it, I discovered another problem. When tail-F'd files may be renamed, tail may fail to track the target of a rename. good catch and very useful test case! I am going to like the test driven development we we had today :-) This patch should fix the problem, other tests remain green. From f108dc01f86fb8df057172d2bafd8224759be884 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Wed, 30 Dec 2009 00:20:24 +0100 Subject: [PATCH] tail: ensure the wd is not already present in the hash table before add it * src/tail.c (tail_forever_inotify): When a new watch descriptor is added to the `wd_to_name' hash table, check that it is not already present. If it is present then remove the previous element. --- src/tail.c | 15 ++- 1 files changed, 14 insertions(+), 1 deletions(-) diff --git a/src/tail.c b/src/tail.c index 3d5e221..28a0e26 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1486,11 +1486,24 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, /* Remove `fspec' and re-add it using `new_fd' as its key. */ hash_delete (wd_to_name, fspec); fspec-wd = new_wd; + + /* If the file was moved then inotify will use the source file wd for + the destination file. Make sure the key is not present in the + table. */ + struct File_spec *prev = hash_delete (wd_to_name, fspec); + if (prev prev != fspec) +{ + if (follow_mode == Follow_name) +recheck (prev, false); + prev-wd = -1; + close_fd (prev-fd, pretty_name (prev)); +} + if (hash_insert (wd_to_name, fspec) == NULL) xalloc_die (); if (follow_mode == Follow_name) -recheck ((f[j]), false); +recheck (fspec, false); } else { -- 1.6.5 Here's a similar test that fails with inotify-enabled tail, yet passes with ---disable-inotify: +debug='---disable-inotify -s .01' +debug= What do you think about run the test in both modes? Cheers, Giuseppe
Re: tail + inotify over nfs
Hello Pádraig, Pádraig Brady p...@draigbrady.com writes: It doesn't handle the above remount case though as if I mount the parent dir of a file or bind mount the file itself then there are no inotify notifications. This remounting issue is independent of nfs anyway. So can inotify handle this or will we have to periodically check with a select rather than a blocking read? inotify should notify it somehow, but I haven't checked to say it for sure. IMO, a good solution actually is to return from `tail_forever_inotify' as soon as a remote file appears and enter `tail_forever'. The disadvantage is that there is no way to return back to the inotify backend when the file path becomes local again, but I don't really think this scenario is going to happen often. I am also thinking of a tail refactoring and merge the poll based backend with the new inotify backend, having the possibility to handle different files with different mechanisms, it will solve the stdin problem too. I hope to have soon some time to look at it. Cheers, Giuseppe
Re: tail + inotify over nfs
Hello, Jim Meyering j...@meyering.net writes: Pádraig Brady wrote: I've just noticed that `tail -f` will not work over NFS as changes on the remote system will not go through the local VFS and so will not be noticed by inotify. So what to do? I suppose we could statfs(filename) Yes, I think something like that is required. For even less impact, call fstatfs on the file descriptor. When this check should be done? At initialization before enter the tail_forever/tail_forever_inotify loop? In this case, shouldn't we take into account that the underyling FS can be changed when tail -F is used? Like: (sleep 5s; mount -F nfs server:/foo/bar /mnt/) tail -F /mnt/file Cheers, Giuseppe
Re: [PATCH] core-count: A new program to count the number of cpu cores
Hi Pádraig, Pádraig Brady p...@draigbrady.com writes: Well --available and --all are mutually exclusive and related. That fact is obvious if they're parameters to a single option. But I do take your point that --count is a bit redundant, and I don't see nproc getting many other options, so OK leave them as separate options. I'll hope to commit this soon. I amended this change and the bug in the texinfo documentation reported by Paolo. I hope it is fine now. Cheers, Giuseppe From 3e639852488e44c88c040d8b993dade4a3e81407 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 31 Oct 2009 18:59:50 +0100 Subject: [PATCH] nproc: A new program to count the available processors * AUTHORS: Add my name. * NEWS: Mention it. * README: Likewise. * bootstrap.conf (gnulib_modules): Add nproc. * doc/coreutils.texi (nproc invocation): Add nproc info. * man/.gitignore: Exclude nproc.1. * po/POTFILES.in: Add src/nproc.c. * src/.gitignore: Exclude nproc. * src/Makefile.am (EXTRA_PROGRAMS): Add nproc. * src/nproc.c: New file. * tests/Makefile.am (TESTS): Add nproc/{avail, cpuinfo, positive}. * tests/nproc/avail: New file. * tests/nproc/cpuinfo: New file. * tests/nproc/positive: New file. --- AUTHORS |1 + NEWS |4 ++ README |2 +- bootstrap.conf |1 + doc/coreutils.texi | 44 + man/.gitignore |1 + po/POTFILES.in |1 + src/.gitignore |1 + src/Makefile.am |3 + src/nproc.c | 126 ++ tests/Makefile.am|3 + tests/nproc/avail| 31 tests/nproc/cpuinfo | 33 + tests/nproc/positive | 39 +++ 14 files changed, 289 insertions(+), 1 deletions(-) create mode 100644 src/nproc.c create mode 100755 tests/nproc/avail create mode 100755 tests/nproc/cpuinfo create mode 100755 tests/nproc/positive diff --git a/AUTHORS b/AUTHORS index 59a0dfa..0c15472 100644 --- a/AUTHORS +++ b/AUTHORS @@ -51,6 +51,7 @@ mv: Mike Parker, David MacKenzie, Jim Meyering nice: David MacKenzie nl: Scott Bartram, David MacKenzie nohup: Jim Meyering +nproc: Giuseppe Scrivano od: Jim Meyering paste: David M. Ihnat, David MacKenzie pathchk: Paul Eggert, David MacKenzie, Jim Meyering diff --git a/NEWS b/NEWS index 5b75dbb..8c1614e 100644 --- a/NEWS +++ b/NEWS @@ -77,6 +77,10 @@ GNU coreutils NEWS-*- outline -*- touch now accepts the option --no-dereference (-h), as a means to change symlink timestamps on platforms with enough support. +** New programs + + nproc: A new program to print the number of processors. + * Noteworthy changes in release 8.0 (2009-10-06) [beta] diff --git a/README b/README index 7545eab..0951b62 100644 --- a/README +++ b/README @@ -11,7 +11,7 @@ The programs that can be built with this package are: csplit cut date dd df dir dircolors dirname du echo env expand expr factor false fmt fold groups head hostid hostname id install join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp mv nice nl nohup - od paste pathchk pinky pr printenv printf ptx pwd readlink rm rmdir + nproc od paste pathchk pinky pr printenv printf ptx pwd readlink rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum sha512sum shred shuf sleep sort split stat stdbuf stty su sum sync tac tail tee test timeout touch tr true truncate tsort tty uname unexpand uniq unlink uptime users diff --git a/bootstrap.conf b/bootstrap.conf index b3a82e0..53d3824 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -158,6 +158,7 @@ gnulib_modules= modechange mountlist mpsort + nproc obstack pathmax perl diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 227014c..f998a6e 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -84,6 +84,7 @@ * nice: (coreutils)nice invocation. Modify niceness. * nl: (coreutils)nl invocation. Number lines and write files. * nohup: (coreutils)nohup invocation. Immunize to hangups. +* nproc: (coreutils)nproc invocation. Print the number of processors. * od: (coreutils)od invocation. Dump files in octal, etc. * paste: (coreutils)paste invocation. Merge lines of files. * pathchk: (coreutils)pathchk invocation. Check file name portability. @@ -410,6 +411,7 @@ System context * arch invocation:: Print machine hardware name * date invocation:: Print or set system date and time +* nproc invocation:: Print the number of processors * uname invocation:: Print system information * hostname invocation:: Print or set system name * hostid invocation::Print numeric host identifier @@ -13407,6 +13409,7 @@ information. @menu * date invocation:: Print or set system date and time. * arch invocation:: Print
Re: [PATCH] core-count: A new program to count the number of cpu cores
Hello, Erik Auerswald auers...@unix-ag.uni-kl.de writes: Why have an option for the default operation at all? If --available is the same as specifying no option and the only other mode of operation is --all, only the --all option should be recognised. There is no need for --available. it is not very common case but it may be useful to override an alias: $ OMP_NUM_THREADS=1 nproc 1 $ alias nproc=nproc --all $ OMP_NUM_THREADS=1 nproc 2 $ OMP_NUM_THREADS=1 nproc --available 1 Beside this one, I don't see other cases. Cheers, Giuseppe
Re: [PATCH] core-count: A new program to count the number of cpu cores
Bruno Haible br...@clisp.org writes: There were no further comments except Pádraig's one, so I committed the change: 2009-11-04 Bruno Haible br...@clisp.org Make num_processors more flexible and consistent. * lib/nproc.h (enum nproc_query): New type. (num_processors): Add a 'query' argument. * lib/nproc.c: Include stdlib.h, sched.h, c-ctype.h. (num_processors): Add a 'query' argument. Test the value of the OMP_NUM_THREADS environment variable if requested. On Linux, NetBSD, mingw, count the number of CPUs available for the current process. * m4/nproc.m4 (gl_PREREQ_NPROC): Require AC_USE_SYSTEM_EXTENSIONS. Check for sched_getaffinity and sched_getaffinity_np. * modules/nproc (Depends-on): Add c-ctype, extensions. * NEWS: Mention the change. I have updated the new nproc program to use this change in gnulib. Thanks to Bruno, now nproc has not any logic inside but it is a mere wrapper around the gnulib module. I used as arguments to the new program the same names used by the `nproc_query' enum, except using --overridable instead of --current-overridable, avoiding the same prefix allows to use the shorter forms $(nproc --c) and $(nproc --o). What do you think? Giuseppe From d07e645265b38c5648e47467a5ffd829bbe966f2 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 31 Oct 2009 18:59:50 +0100 Subject: [PATCH] nproc: A new program to count the number of processors * AUTHORS: Add my name. * NEWS: Mention it. * README: Likewise. * bootstrap.conf (gnulib_modules): Add nproc. * doc/coreutils.texi (nproc invocation): Add nproc info. * po/POTFILES.in: Add src/nproc.c. * src/Makefile.am (EXTRA_PROGRAMS): Add nproc. * src/nproc.c: New file. * tests/Makefile.am (TESTS): Add nproc/{avail, cpuinfo, positive}. * tests/nproc/avail: New file. * tests/nproc/cpuinfo: New file. * tests/nproc/positive: New file. --- AUTHORS |1 + NEWS |4 ++ README |2 +- bootstrap.conf |1 + doc/coreutils.texi | 45 +++ po/POTFILES.in |1 + src/Makefile.am |3 + src/nproc.c | 120 ++ tests/Makefile.am|3 + tests/nproc/avail| 36 +++ tests/nproc/cpuinfo | 33 ++ tests/nproc/positive | 38 12 files changed, 286 insertions(+), 1 deletions(-) create mode 100644 src/nproc.c create mode 100755 tests/nproc/avail create mode 100755 tests/nproc/cpuinfo create mode 100755 tests/nproc/positive diff --git a/AUTHORS b/AUTHORS index 7095db0..3855622 100644 --- a/AUTHORS +++ b/AUTHORS @@ -51,6 +51,7 @@ mv: Mike Parker, David MacKenzie, Jim Meyering nice: David MacKenzie nl: Scott Bartram, David MacKenzie nohup: Jim Meyering +nproc: Giuseppe Scrivano od: Jim Meyering paste: David M. Ihnat, David MacKenzie pathchk: Paul Eggert, David MacKenzie, Jim Meyering diff --git a/NEWS b/NEWS index 0760775..8155807 100644 --- a/NEWS +++ b/NEWS @@ -60,6 +60,10 @@ GNU coreutils NEWS-*- outline -*- touch now accepts the option --no-dereference (-h), as a means to change symlink timestamps on platforms with enough support. +** New programs + + nproc: A new program to print the number of processors. + * Noteworthy changes in release 8.0 (2009-10-06) [beta] diff --git a/README b/README index 7545eab..0951b62 100644 --- a/README +++ b/README @@ -11,7 +11,7 @@ The programs that can be built with this package are: csplit cut date dd df dir dircolors dirname du echo env expand expr factor false fmt fold groups head hostid hostname id install join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp mv nice nl nohup - od paste pathchk pinky pr printenv printf ptx pwd readlink rm rmdir + nproc od paste pathchk pinky pr printenv printf ptx pwd readlink rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum sha512sum shred shuf sleep sort split stat stdbuf stty su sum sync tac tail tee test timeout touch tr true truncate tsort tty uname unexpand uniq unlink uptime users diff --git a/bootstrap.conf b/bootstrap.conf index 4c0f4c7..0bfc101 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -158,6 +158,7 @@ gnulib_modules= modechange mountlist mpsort + nproc obstack pathmax perl diff --git a/doc/coreutils.texi b/doc/coreutils.texi index ec5bcfb..203b3bc 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -84,6 +84,7 @@ * nice: (coreutils)nice invocation. Modify niceness. * nl: (coreutils)nl invocation. Number lines and write files. * nohup: (coreutils)nohup invocation. Immunize to hangups. +* nproc: (coreutils)nproc invocation. Print the number of processors. * od: (coreutils)od invocation. Dump files in octal, etc. * paste: (coreutils)paste invocation
Re: [PATCH] core-count: A new program to count the number of cpu cores
Hi, I included what we have discussed into my patch. I renamed the new program to `nproc', now it accepts two options: --available and --installed. By default --available is used, if --available is not know then --installed is used. I added another test to ensure nproc --available = nproc --installed. Any comment? Cheers, Giuseppe From 4665e1801f73eeba98cad9988c5d5829bad03a37 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sun, 25 Oct 2009 19:04:41 +0100 Subject: [PATCH] nproc: A new program to count the number of processors * AUTHORS: Add my name. * NEWS: Mention it. * bootstrap.conf (gnulib_modules): Add nproc and pthread. * doc/coreutils.texi (nproc invocation): Add nproc info. * src/Makefile.am (EXTRA_PROGRAMS): Add nproc. * src/nproc.c: New file. * tests/Makefile.am (TESTS): Add nproc/{avail, cpuinfo, positive}. * tests/nproc/avail: New file. * tests/nproc/cpuinfo: New file. * tests/nproc/positive: New file. --- AUTHORS |1 + NEWS |4 + bootstrap.conf |2 + doc/coreutils.texi | 39 + src/Makefile.am |3 + src/nproc.c | 155 ++ tests/Makefile.am|3 + tests/nproc/avail| 33 +++ tests/nproc/cpuinfo | 34 +++ tests/nproc/positive | 33 +++ 10 files changed, 307 insertions(+), 0 deletions(-) create mode 100644 src/nproc.c create mode 100644 tests/nproc/avail create mode 100755 tests/nproc/cpuinfo create mode 100755 tests/nproc/positive diff --git a/AUTHORS b/AUTHORS index 7095db0..3855622 100644 --- a/AUTHORS +++ b/AUTHORS @@ -51,6 +51,7 @@ mv: Mike Parker, David MacKenzie, Jim Meyering nice: David MacKenzie nl: Scott Bartram, David MacKenzie nohup: Jim Meyering +nproc: Giuseppe Scrivano od: Jim Meyering paste: David M. Ihnat, David MacKenzie pathchk: Paul Eggert, David MacKenzie, Jim Meyering diff --git a/NEWS b/NEWS index 1ed577f..4caa747 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,10 @@ GNU coreutils NEWS-*- outline -*- with the invoked command failing with status 1. Likewise, nohup fails with status 125 instead of 127. +** New programs + + nproc: A new program to get the number of processors. + ** New features md5sum --check now also accepts openssl-style checksums. diff --git a/bootstrap.conf b/bootstrap.conf index 1857489..67e2672 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -157,6 +157,7 @@ gnulib_modules= modechange mountlist mpsort + nproc obstack pathmax perl @@ -167,6 +168,7 @@ gnulib_modules= priv-set progname propername + pthread putenv quote quotearg diff --git a/doc/coreutils.texi b/doc/coreutils.texi index c54ffb8..588150b 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -84,6 +84,7 @@ * nice: (coreutils)nice invocation. Modify niceness. * nl: (coreutils)nl invocation. Number lines and write files. * nohup: (coreutils)nohup invocation. Immunize to hangups. +* nproc: (coreutils)nproc invocation. Print the number of processors. * od: (coreutils)od invocation. Dump files in octal, etc. * paste: (coreutils)paste invocation. Merge lines of files. * pathchk: (coreutils)pathchk invocation. Check file name portability. @@ -409,6 +410,7 @@ System context * arch invocation:: Print machine hardware name * date invocation:: Print or set system date and time +* nproc invocation:: Print the number of processors * uname invocation:: Print system information * hostname invocation:: Print or set system name * hostid invocation::Print numeric host identifier @@ -13221,6 +13223,7 @@ information. @menu * date invocation:: Print or set system date and time. * arch invocation:: Print machine hardware name. +* nproc invocation::Print the number of processors. * uname invocation::Print system information. * hostname invocation:: Print or set system name. * hostid invocation:: Print numeric host identifier. @@ -13880,6 +13883,42 @@ The program accepts the @ref{Common options} only. @exitstatus +...@node nproc invocation +...@section @command{nproc}: Print the number of processors + +...@pindex nproc +...@cindex Print the number of processors +...@cindex system information, printing + +...@command{nproc} prints the number of processors. It is not a hardware +inspection tool but a portable way to get how many processes +potentially can be executed in parallel. The result is guaranteed to +be a non-zero positive number. Synopsis: + +...@example +nproc [...@var{option}] +...@end example + +The program accepts the following options. Also see @ref{Common options}. + +...@table @samp + +...@item --available +...@opindex --available +Print the number
Re: [PATCH] core-count: A new program to count the number of cpu cores
Hi Jim, thanks for your quick review. Jim Meyering j...@meyering.net writes: Giuseppe Scrivano wrote: I included what we have discussed into my patch. I renamed the new program to `nproc', now it accepts two options: --available and --installed. By default --available is used, if --available is not know then --installed is used. I added another test to ensure nproc --available = nproc --installed. Any comment? Sure. That didn't apply via git am FILE, so I applied via patch. Please rebase against latest, before posting. I promise to be more careful next time, though it is a good thing that NEWS was changed in the last week :-) Here's a quick and superficial review. Sorry, I had to catch these problems before post. Regards, Giuseppe From d1dd83a6a4130ee8b8be47d5d5db461fc60e166a Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 31 Oct 2009 18:59:50 +0100 Subject: [PATCH] nproc: A new program to count the number of processors * AUTHORS: Add my name. * NEWS: Mention it. * README: Likewise. * bootstrap.conf (gnulib_modules): Add nproc and pthread. * doc/coreutils.texi (nproc invocation): Add nproc info. * po/POTFILES.in: Add src/nproc.c. * src/Makefile.am (EXTRA_PROGRAMS): Add nproc. * src/nproc.c: New file. * tests/Makefile.am (TESTS): Add nproc/{avail, cpuinfo, positive}. * tests/nproc/avail: New file. * tests/nproc/cpuinfo: New file. * tests/nproc/positive: New file. --- AUTHORS |1 + NEWS |4 + README |4 +- bootstrap.conf |2 + doc/coreutils.texi | 39 + po/POTFILES.in |1 + src/Makefile.am |3 + src/nproc.c | 155 ++ tests/Makefile.am|3 + tests/nproc/avail| 31 ++ tests/nproc/cpuinfo | 33 +++ tests/nproc/positive | 31 ++ 12 files changed, 305 insertions(+), 2 deletions(-) create mode 100644 src/nproc.c create mode 100755 tests/nproc/avail create mode 100755 tests/nproc/cpuinfo create mode 100755 tests/nproc/positive diff --git a/AUTHORS b/AUTHORS index 7095db0..3855622 100644 --- a/AUTHORS +++ b/AUTHORS @@ -51,6 +51,7 @@ mv: Mike Parker, David MacKenzie, Jim Meyering nice: David MacKenzie nl: Scott Bartram, David MacKenzie nohup: Jim Meyering +nproc: Giuseppe Scrivano od: Jim Meyering paste: David M. Ihnat, David MacKenzie pathchk: Paul Eggert, David MacKenzie, Jim Meyering diff --git a/NEWS b/NEWS index 0760775..6b8f6b3 100644 --- a/NEWS +++ b/NEWS @@ -60,6 +60,10 @@ GNU coreutils NEWS-*- outline -*- touch now accepts the option --no-dereference (-h), as a means to change symlink timestamps on platforms with enough support. +** New programs + + nproc: A new program to get the number of processors. + * Noteworthy changes in release 8.0 (2009-10-06) [beta] diff --git a/README b/README index 7545eab..b48661b 100644 --- a/README +++ b/README @@ -1,4 +1,4 @@ -These are the GNU core utilities. This package is the union of +hese are the GNU core utilities. This package is the union of the GNU fileutils, sh-utils, and textutils packages. Most of these programs have significant advantages over their Unix @@ -11,7 +11,7 @@ The programs that can be built with this package are: csplit cut date dd df dir dircolors dirname du echo env expand expr factor false fmt fold groups head hostid hostname id install join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp mv nice nl nohup - od paste pathchk pinky pr printenv printf ptx pwd readlink rm rmdir + nproc od paste pathchk pinky pr printenv printf ptx pwd readlink rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum sha512sum shred shuf sleep sort split stat stdbuf stty su sum sync tac tail tee test timeout touch tr true truncate tsort tty uname unexpand uniq unlink uptime users diff --git a/bootstrap.conf b/bootstrap.conf index 4c0f4c7..1f230df 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -158,6 +158,7 @@ gnulib_modules= modechange mountlist mpsort + nproc obstack pathmax perl @@ -168,6 +169,7 @@ gnulib_modules= priv-set progname propername + pthread putenv quote quotearg diff --git a/doc/coreutils.texi b/doc/coreutils.texi index ec5bcfb..edd2e4f 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -84,6 +84,7 @@ * nice: (coreutils)nice invocation. Modify niceness. * nl: (coreutils)nl invocation. Number lines and write files. * nohup: (coreutils)nohup invocation. Immunize to hangups. +* nproc: (coreutils)nproc invocation. Print the number of processors. * od: (coreutils)od invocation. Dump files in octal, etc. * paste: (coreutils)paste invocation. Merge lines of files. * pathchk: (coreutils)pathchk invocation. Check file name portability. @@ -409,6 +410,7 @@ System context
Re: [PATCH] core-count: A new program to count the number of cpu cores
Hi Pádraig, Pádraig Brady p...@draigbrady.com writes: I do wonder though whether it would be better to have num_processors() try to return this by default? num_processors is going to be used by programs as nproc will be used by scripts; all considerations we made for nproc can be applied to num_processors. I.E. can you think of a use case where someone would want to use the --installed option? BTW installed differs from the terms discussed here, and perhaps online would be better (if we did want to expose it at all). I don't see any common use case, except a quick way to see how many processors can potentially be available. Also I'm wondering why you used the pthread interface to this? I didn't notice pthread_getaffinity_np() in POSIX for example (is that what the _np represents?), so why not call sched_getaffinity directly without needing to link with the pthread library. Thanks, I am attaching a new version that uses sched instead of pthread. The _np suffix means non portable. Also perhaps we should be comparing to /proc/stat just in case /proc/cpuinfo was not showing the online processors: grep '^cpu[0-9]' /proc/stat | wc -l From what I can see, /proc/cpuinfo shows the number of online processors: # grep '^proc' /proc/cpuinfo | wc -l 2 # echo 0 /sys/devices/system/cpu/cpu1/online # grep '^proc' /proc/cpuinfo | wc -l 1 # echo 1 /sys/devices/system/cpu/cpu1/online # grep '^proc' /proc/cpuinfo | wc -l 2 Regards, Giuseppe From 31b047ef9f0e83b7f6387bdd7e628cbb17f24079 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 31 Oct 2009 18:59:50 +0100 Subject: [PATCH] nproc: A new program to count the number of processors * AUTHORS: Add my name. * NEWS: Mention it. * README: Likewise. * bootstrap.conf (gnulib_modules): Add nproc and sched. * doc/coreutils.texi (nproc invocation): Add nproc info. * po/POTFILES.in: Add src/nproc.c. * src/Makefile.am (EXTRA_PROGRAMS): Add nproc. * src/nproc.c: New file. * tests/Makefile.am (TESTS): Add nproc/{avail, cpuinfo, positive}. * tests/nproc/avail: New file. * tests/nproc/cpuinfo: New file. * tests/nproc/positive: New file. --- AUTHORS |1 + NEWS |4 + README |2 +- bootstrap.conf |2 + doc/coreutils.texi | 39 + po/POTFILES.in |1 + src/Makefile.am |3 + src/nproc.c | 155 ++ tests/Makefile.am|3 + tests/nproc/avail| 31 ++ tests/nproc/cpuinfo | 33 +++ tests/nproc/positive | 31 ++ 12 files changed, 304 insertions(+), 1 deletions(-) create mode 100644 src/nproc.c create mode 100755 tests/nproc/avail create mode 100755 tests/nproc/cpuinfo create mode 100755 tests/nproc/positive diff --git a/AUTHORS b/AUTHORS index 7095db0..3855622 100644 --- a/AUTHORS +++ b/AUTHORS @@ -51,6 +51,7 @@ mv: Mike Parker, David MacKenzie, Jim Meyering nice: David MacKenzie nl: Scott Bartram, David MacKenzie nohup: Jim Meyering +nproc: Giuseppe Scrivano od: Jim Meyering paste: David M. Ihnat, David MacKenzie pathchk: Paul Eggert, David MacKenzie, Jim Meyering diff --git a/NEWS b/NEWS index 0760775..8155807 100644 --- a/NEWS +++ b/NEWS @@ -60,6 +60,10 @@ GNU coreutils NEWS-*- outline -*- touch now accepts the option --no-dereference (-h), as a means to change symlink timestamps on platforms with enough support. +** New programs + + nproc: A new program to print the number of processors. + * Noteworthy changes in release 8.0 (2009-10-06) [beta] diff --git a/README b/README index 7545eab..0951b62 100644 --- a/README +++ b/README @@ -11,7 +11,7 @@ The programs that can be built with this package are: csplit cut date dd df dir dircolors dirname du echo env expand expr factor false fmt fold groups head hostid hostname id install join kill link ln logname ls md5sum mkdir mkfifo mknod mktemp mv nice nl nohup - od paste pathchk pinky pr printenv printf ptx pwd readlink rm rmdir + nproc od paste pathchk pinky pr printenv printf ptx pwd readlink rm rmdir runcon seq sha1sum sha224sum sha256sum sha384sum sha512sum shred shuf sleep sort split stat stdbuf stty su sum sync tac tail tee test timeout touch tr true truncate tsort tty uname unexpand uniq unlink uptime users diff --git a/bootstrap.conf b/bootstrap.conf index 4c0f4c7..c273065 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -158,6 +158,7 @@ gnulib_modules= modechange mountlist mpsort + nproc obstack pathmax perl @@ -190,6 +191,7 @@ gnulib_modules= save-cwd savedir savewd + sched selinux-at settime sig2str diff --git a/doc/coreutils.texi b/doc/coreutils.texi index ec5bcfb..d703520 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -84,6 +84,7 @@ * nice: (coreutils)nice invocation. Modify niceness. * nl: (coreutils)nl invocation
Re: [PATCH] core-count: A new program to count the number of cpu cores
Hi Bruno, Bruno Haible br...@clisp.org writes: No, it should not be a hardware inspection tool but a portable tool to help shell scripts to have an idea of how many processes can be executed at the same time. If we get too much into details then we loose portability Good. This is important info; IMO it belongs in the coreutils.texi documentation. Thanks, I will add this note to the documentation. Yes, and for the intended use that you described above the number of available cores should the result without an option, whereas the installed cores would require an option. (I would not want to use the terms real and effective here because that brings up wrong associations with user ids and group ids.) Ok, that makes sense. What about leave to the user the decisione to use less threads/processes than core-count reports? This is precisely what can be achieved with the OMP_NUM_THREADS variable. Say, he has a CPU with 4 cores, wants to reserve 1 for his GUI and 1 for s...@home, then he will launch the s...@home process with OMP_NUM_THREADS=1 and all other processes with OMP_NUM_THREADS=2. For example, assuming that `sort' will soon get the --threads option and an user decides to use all cores except one to sort a file, then it can be done as: sort --threads=$(($(core-count) - 1)) huge_file Or possibly by: env OMP_NUM_THREADS=$(($(core-count) - 1)) sort huge_file no? Some programs, like 'msgmerge' from GNU gettext, already pay attention to the OMP_NUM_THREADS variable - a convention shared by all programs that rely on OpenMP. Can you make the 'sort' program use the same convention? I am not working on the multi-threaded sort, but if somebody asks I can make it read OMP_NUM_THREADS. If is is already used by other GNU programs, then it seems a good idea to use this value when --threads is not specified on the command line. Regards, Giuseppe
Re: [PATCH] core-count: A new program to count the number of cpu cores
Bruno Haible br...@clisp.org writes: Pádraig Brady wrote: Of course this should only apply if its effect is not externally observable; if I have a very small file B and a very large file A, and I can get $ md5sum --threads A B abcdabcdabcdabcdabcdabcdabcdabcd B 12341234123412341234123412341234 A Then the option would be necessary. Good point Paolo. I think that's another argument for splitting separate files to different threads. Grr. An argument for _not_ splitting. Huh? You can make md5sum handle each file in parallel and still present the output in the order given on the command line. To achieve this, each thread will process one file and submit the result to the main thread before exiting. The main thread will collect the results and output the result for argument k only after the results for 1...k-1 have been collected and output. The implementation of --threads for md5sum, that I posted some days ago, collects data for any file before flush it, still it can be improved to flush immediately when 1..k files are ready. Cheers, Giuseppe
Re: [PATCH] core-count: A new program to count the number of cpucores
Pádraig Brady p...@draigbrady.com writes: Hmm it's a bit surprising that min()/max() are not available as $((shell arithmetic)) or in `expr`. Consequently I agree that adding the option you suggest is useful. What will we call it though? I remember a recent discussion about adding min/max to sort. Is still this feature desired? Maybe: --ignore=NIf possible reduce the count by N Even if it looks a bit ugly, IMO this new option can help to put less logic in every shell script using it. Cheers, Giuseppe
Re: [PATCH] md5: accepts a new --threads option
Jim Meyering j...@meyering.net writes: As far as I know, there is nothing portable. Want to add another program to coreutils? With what Bruno has just added to gnulib's nproc module, we should have most platforms covered. If nobody is already working on it, I can start doing it. What about the name? ncores or ncpus are fine? Cheers, Giuseppe
[PATCH] core-count: A new program to count the number of cpu cores (was: [PATCH] md5: accepts a new --threads option)
Jim Meyering j...@meyering.net writes: If nobody is already working on it, I can start doing it. What about the name? ncores or ncpus are fine? Here are some longer candidates: count-cores count-cpus cpu-count core-count Actually, cpu seems too ambiguous, so let's rule those out. That leaves: ncores count-cores core-count Any others? Preferences? I went for `core-count'. This is the first version of the new program, it is a simple wrapper around the gnulib nproc module, thanks to Bruno to have done the real work. Any comment? Cheers, Giuseppe From e3bff6a4dd2fe9560c7922e877ab57081a083c58 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sun, 25 Oct 2009 19:04:41 +0100 Subject: [PATCH] core-count: A new program to count the number of cpu cores * AUTHORS: Add my name. * NEWS: Mention it. * bootstrap.conf (gnulib_modules): Add nproc. * doc/coreutils.texi (core-count invocation): Add core-count info. * src/Makefile.am (EXTRA_PROGRAMS): Add core-count. * src/core_count.c: New file. * tests/Makefile.am (TESTS): Add core-count/{cpuinfo, positive}. * tests/core-count/cpuinfo: New file. * tests/core-count/positive: New file. --- AUTHORS |1 + NEWS |4 ++ bootstrap.conf|1 + doc/coreutils.texi| 22 +++ src/Makefile.am |3 ++ src/core_count.c | 89 + tests/Makefile.am |2 + tests/core-count/cpuinfo | 34 + tests/core-count/positive | 31 9 files changed, 187 insertions(+), 0 deletions(-) create mode 100644 src/core_count.c create mode 100755 tests/core-count/cpuinfo create mode 100755 tests/core-count/positive diff --git a/AUTHORS b/AUTHORS index 7095db0..0b09f6d 100644 --- a/AUTHORS +++ b/AUTHORS @@ -12,6 +12,7 @@ chown: David MacKenzie, Jim Meyering chroot: Roland McGrath cksum: Q. Frank Xia comm: Richard M. Stallman, David MacKenzie +core-count: Giuseppe Scrivano cp: Torbjörn Granlund, David MacKenzie, Jim Meyering csplit: Stuart Kemp, David MacKenzie cut: David M. Ihnat, David MacKenzie, Jim Meyering diff --git a/NEWS b/NEWS index 1ed577f..017cdec 100644 --- a/NEWS +++ b/NEWS @@ -32,6 +32,10 @@ GNU coreutils NEWS-*- outline -*- with the invoked command failing with status 1. Likewise, nohup fails with status 125 instead of 127. +** New programs + + core-count: A new program to get the number of cpu cores. + ** New features md5sum --check now also accepts openssl-style checksums. diff --git a/bootstrap.conf b/bootstrap.conf index 1857489..5a5d76d 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -157,6 +157,7 @@ gnulib_modules= modechange mountlist mpsort + nproc obstack pathmax perl diff --git a/doc/coreutils.texi b/doc/coreutils.texi index c54ffb8..5a8167a 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -46,6 +46,7 @@ * chroot: (coreutils)chroot invocation. Specify the root directory. * cksum: (coreutils)cksum invocation. Print POSIX CRC checksum. * comm: (coreutils)comm invocation. Compare sorted files by line. +* core-count: (coreutils)core-count invocation. Print the number of cpu cores. * cp: (coreutils)cp invocation. Copy files. * csplit: (coreutils)csplit invocation. Split by context. * cut: (coreutils)cut invocation. Print selected parts of lines. @@ -408,6 +409,7 @@ User information System context * arch invocation:: Print machine hardware name +* core-count invocation::Print the number of cpu cores * date invocation:: Print or set system date and time * uname invocation:: Print system information * hostname invocation:: Print or set system name @@ -13221,6 +13223,7 @@ information. @menu * date invocation:: Print or set system date and time. * arch invocation:: Print machine hardware name. +* core-count invocation::Print the number of cpu cores * uname invocation::Print system information. * hostname invocation:: Print or set system name. * hostid invocation:: Print numeric host identifier. @@ -13880,6 +13883,25 @@ The program accepts the @ref{Common options} only. @exitstatus +...@node core-count invocation +...@section @command{core-count}: Print the number of cpu cores + +...@pindex core-count +...@cindex Print the number of cpu cores +...@cindex system information, printing + +...@command{core-count} prints the number of cpu cores. +Synopsis: + +...@example +core-count [...@var{option}] +...@end example + +The program accepts the @ref{Common options} only. + +...@exitstatus + + @node uname invocation @section @command{uname}: Print system information diff --git a/src/Makefile.am b/src/Makefile.am index 67c29cc..8f722a1 100644
Re: [PATCH] core-count: A new program to count the number of cpu cores
Bruno Haible br...@clisp.org writes: This program (and the underlying gnulib 'nproc' module) is IMO too simplistic. First of all, is the program meant to be a hardware inspection tool (like hwinfo --cpu)? Or is meant to be an auxiliary program for helping shell scripts that want to dispatch tasks onto a maximum number of processors? No, it should not be a hardware inspection tool but a portable tool to help shell scripts to have an idea of how many processes can be executed at the same time. If we get too much into details then we loose portability (at least at a cheap price). If it is meant as a tool for helping the parallelization of tasks at the shell script level, then it needs to take into account 1) the fact that the current process may be limited to a certain subset of the available CPUs. See the Linux/NetBSD function pthread_setaffinity_np [5][6] and the IRIX notion of a CPU that is available only to root processes [7]. Probably it is better to add an option, making a difference between the number of real and effective cores. Where it is not possible to know correctly the latter, they'll coincide. 2) the wish of users to not use all processors at once. Users may want to save 1 CPU for their GUI interactions. This can most comfortably be done through an environment variable, such as OMP_NUM_THREADS. [8] What about leave to the user the decisione to use less threads/processes than core-count reports? For example, assuming that `sort' will soon get the --threads option and an user decides to use all cores except one to sort a file, then it can be done as: sort --threads=$(($(core-count) - 1)) huge_file Cheers, Giuseppe
Re: [PATCH] tail: fix a race condition
Hello, I wrote a simple test case, taking some ideas from the old commit 11b626c20fbc65c668081996bfb9d1118f475eda. The test has a race (it is funny, considering that it checks for a race) and I used quite long time periods to minimize it, probably under high loads it can cause a false positive. Cheers, Giuseppe From e86c16342e856bec744ea01b8a2f8ab1b8695d63 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 20 Oct 2009 10:49:17 +0200 Subject: [PATCH] tests: add a new test that checks for a possible `tail' race. If new data is available between the initial read and tail registers the inotify watch descriptors, ensure that it is read before a new event happens on the file. * tests/Makefile.am (TESTS): Add tail-2/inotify-race. * tests/tail-2/inotify-race: New file. --- tests/Makefile.am |1 + tests/tail-2/inotify-race | 77 + 2 files changed, 78 insertions(+), 0 deletions(-) create mode 100755 tests/tail-2/inotify-race diff --git a/tests/Makefile.am b/tests/Makefile.am index 751db1c..bb11c23 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -74,6 +74,7 @@ EXTRA_DIST += $(TESTS) TESTS =\ misc/help-version\ + tail-2/inotify-race \ misc/invalid-opt \ rm/ext3-perf \ rm/cycle \ diff --git a/tests/tail-2/inotify-race b/tests/tail-2/inotify-race new file mode 100755 index 000..32ebfcb --- /dev/null +++ b/tests/tail-2/inotify-race @@ -0,0 +1,77 @@ +#!/bin/sh +# Ensure that when open creates a destination file, +# that file has properly restrictive permissions. +# Before coreutils-6.7, there was an interval in which +# a just-created file would have too-generous permissions. + +# Copyright (C) 2006, 2009 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +if test $VERBOSE = yes; then + set -x + tail --version +fi + +. $srcdir/test-lib.sh + +fail=0 + +umask 2 +touch file || framework_failure + +( gdb --version ) gdb.out 21 +case `cat gdb.out` in + 'GNU gdb'*) ;; + *) echo $0: can't run gdb. Skipping this test. 12; + (exit 77); exit 77;; +esac + + +# Use `xnanosleep' to sleep the debugged process for a specified +# amount of time. `xnanosleep' is used by tail and the symbol is +# exported, so we can use it without problems. + +gdb -nx --batch-silent \ +--eval-command='break tail_forever_inotify'\ +--eval-command='run -f file'\ +--eval-command='print xnanosleep (0.01)' \ +--eval-command='quit'\ +$abs_top_builddir/src/tail /dev/null gdb.out 21 + +if test -s gdb.out; then + cat EOF 12 +$0: can't set breakpoints or use xnanosleep in tail. Skipping this test. +EOF + (exit 77); exit 77 +fi + +(sleep 1s; echo hello working tail file) + +gdb -nx --batch-silent \ +--eval-command='break tail_forever_inotify'\ +--eval-command='run -f file'\ +--eval-command='print xnanosleep (2.0)' \ +--eval-command='continue' \ +--eval-command='quit'\ +$abs_top_builddir/src/tail /dev/null gdb.out + +sleep 3s +kill $! + +test -s gdb.out || fail=1 + +Exit $fail -- 1.6.5
Re: [PATCH] tail: fix a race condition
Hi Jim, Jim Meyering j...@meyering.net writes: Thanks for doing that. A couple suggestions: Thanks for the suggestions! I am using a 1.5 seconds time interval before kill the debugged process, I hope it is enough when there are many parallel tests. Giuseppe From e5532992b1e7c018e5754c7082c2d9ac256cee3d Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 20 Oct 2009 10:49:17 +0200 Subject: [PATCH] tests: add a new test that checks for a possible `tail' race. If new data is available between the initial read and tail registers the inotify watch descriptors, ensure that it is read before a new event happens on the file. * tests/Makefile.am (TESTS): Add tail-2/inotify-race. * tests/tail-2/inotify-race: New file. --- tests/Makefile.am |1 + tests/tail-2/inotify-race | 72 + 2 files changed, 73 insertions(+), 0 deletions(-) create mode 100755 tests/tail-2/inotify-race diff --git a/tests/Makefile.am b/tests/Makefile.am index 751db1c..bb11c23 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -74,6 +74,7 @@ EXTRA_DIST += $(TESTS) TESTS =\ misc/help-version\ + tail-2/inotify-race \ misc/invalid-opt \ rm/ext3-perf \ rm/cycle \ diff --git a/tests/tail-2/inotify-race b/tests/tail-2/inotify-race new file mode 100755 index 000..304 --- /dev/null +++ b/tests/tail-2/inotify-race @@ -0,0 +1,72 @@ +#!/bin/sh +# Copyright (C) 2006, 2009 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + +if test $VERBOSE = yes; then + set -x + tail --version +fi + +. $srcdir/test-lib.sh + +fail=0 + +umask 2 +touch file || framework_failure + +( gdb --version ) gdb.out 21 +case $(cat gdb.out) in + 'GNU gdb'*) ;; + *) echo $0: can't run gdb. Skipping this test. 12; + (exit 77); exit 77;; +esac + + +# Use `xnanosleep' to sleep the debugged process for a specified +# amount of time. `xnanosleep' is used by tail and the symbol is +# exported, so we can use it without problems. + +gdb -nx --batch-silent \ +--eval-command='break tail_forever_inotify'\ +--eval-command='run -f file'\ +--eval-command='quit'\ +$abs_top_builddir/src/tail /dev/null gdb.out 21 + +if test -s gdb.out; then + cat EOF 12 +$0: can't set breakpoints in tail. Skipping this test. +EOF + exit 77 +fi + +gdb -nx --batch-silent \ +--eval-command='break tail_forever_inotify'\ +--eval-command='run -f file'\ +--eval-command=shell echo never-seen-with-tail-7.5 file \ +--eval-command='continue' \ +--eval-command='quit'\ +$abs_top_builddir/src/tail /dev/null gdb.out + +pid=$! + +sleep 1.5s + +kill $pid + +test -s gdb.out || fail=1 + +Exit $fail -- 1.6.5
Re: [PATCH] tail: fix a race condition
Jim Meyering j...@meyering.net writes: Thanks again for your notes. I amended them into the patch. Cheers, Giuseppe From 3f0f5744899afc15e69554220be836f673b1dad3 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 20 Oct 2009 10:49:17 +0200 Subject: [PATCH] tests: add a new test that checks for a possible `tail' race. If new data is available between the initial read and tail registers the inotify watch descriptors, ensure that it is read before a new event happens on the file. * tests/Makefile.am (TESTS): Add tail-2/inotify-race. * tests/tail-2/inotify-race: New file. --- tests/Makefile.am |1 + tests/tail-2/inotify-race | 69 + 2 files changed, 70 insertions(+), 0 deletions(-) create mode 100755 tests/tail-2/inotify-race diff --git a/tests/Makefile.am b/tests/Makefile.am index 751db1c..bb11c23 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -74,6 +74,7 @@ EXTRA_DIST += $(TESTS) TESTS =\ misc/help-version\ + tail-2/inotify-race \ misc/invalid-opt \ rm/ext3-perf \ rm/cycle \ diff --git a/tests/tail-2/inotify-race b/tests/tail-2/inotify-race new file mode 100755 index 000..47129db --- /dev/null +++ b/tests/tail-2/inotify-race @@ -0,0 +1,69 @@ +#!/bin/sh +# Copyright (C) 2006, 2009 Free Software Foundation, Inc. + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +# 02110-1301, USA. + + +# Ensure that tail does not ignore data that is appended to a tailed-forever +# file between tail's initial read-to-EOF, and when the inotify watches +# are established in tail_forever_inotify. That data could be ignored +# indefinitely if no *other* data is appended, but it would be printed as +# soon as any additional appended data is detected. + +if test $VERBOSE = yes; then + set -x + tail --version +fi + +. $srcdir/test-lib.sh + +fail=0 + +touch file || framework_failure +touch tail.out || framework_failure + +( gdb --version ) gdb.out 21 +case $(cat gdb.out) in +'GNU gdb'*) ;; +*) skip_test_ can't run gdb;; +esac + + +# See if gdb works: + +gdb -nx --batch-silent \ +--eval-command='break tail_forever_inotify'\ +--eval-command='run -f file'\ +--eval-command='quit'\ +$abs_top_builddir/src/tail /dev/null gdb.out 21 + +test -s gdb.out skip_test_ can't set breakpoints in tail + + +timeout 10s gdb -nx --batch-silent\ +--eval-command='break tail_forever_inotify'\ +--eval-command='run -f file tail.out'\ +--eval-command=shell echo never-seen-with-tail-7.5 file \ +--eval-command='continue' \ +--eval-command='quit'\ +$abs_top_builddir/src/tail /dev/null +pid=$! + +tail --pid=$pid -f tail.out | (read; kill $pid) + +test -s tail.out || fail=1 + +Exit $fail -- 1.6.3.3
Re: [PATCH] md5: accepts a new --threads option
Jim Meyering j...@meyering.net writes: Hmm, I could see it being useful to specify NCPUs-1 also. I wonder is there a general external method to determine the number of CPUs. As far as I know, there is nothing portable. Want to add another program to coreutils? With what Bruno has just added to gnulib's nproc module, we should have most platforms covered. What about a new switch to `arch'? Giuseppe
Re: [PATCH] md5: accepts a new --threads option
Pádraig Brady p...@draigbrady.com writes: With -n4 only 1 process would be started. Could you repeat with -n1 please for comparison. One would only need to increase -n for large numbers of files. Sprry the mistake. In this case results are equal. $ time ./sha1sum --threads=2 /tmp/test_files/* a91f3339c20255db34c8489440dcc43c004d41cb /tmp/test_files/a 89b1e89850b377e74b02c7cb15e480ba6311 /tmp/test_files/b a54bc57f943632adb9982f492b145a61229c2721 /tmp/test_files/c e1a9de5dc7f97cc18cade55d04ea0b3dd52ac4f0 /tmp/test_files/d real0m1.821s $ time find /tmp/test_files/* | xargs -n2 -P2 ./sha1sum a54bc57f943632adb9982f492b145a61229c2721 /tmp/test_files/c e1a9de5dc7f97cc18cade55d04ea0b3dd52ac4f0 /tmp/test_files/d a91f3339c20255db34c8489440dcc43c004d41cb /tmp/test_files/a 89b1e89850b377e74b02c7cb15e480ba6311 /tmp/test_files/b real0m3.389s $ time ls /tmp/test_files/* | xargs -n1 -P2 ./sha1sum 89b1e89850b377e74b02c7cb15e480ba6311 /tmp/test_files/b a54bc57f943632adb9982f492b145a61229c2721 /tmp/test_files/c e1a9de5dc7f97cc18cade55d04ea0b3dd52ac4f0 /tmp/test_files/d a91f3339c20255db34c8489440dcc43c004d41cb /tmp/test_files/a real0m1.844s Giuseppe
Re: [PATCH] md5: accepts a new --threads option
Jim Meyering j...@meyering.net writes: What about a new switch to `arch'? Sorry, no. arch is not installed by default, for portability reasons. Does uname have the same problem? It already has a --processor option and IMHO it would be better to get similar information using the same tool. By the way, under GNU/Linux `uname --processor' returns unknown. Do you think it is a good idea to read this information from /proc? Cheers, Giuseppe
[PATCH] md5: accepts a new --threads option
Hello, inspired by the attempt to make `sort' multi-threaded, I added threads support to md5sum and the sha* programs family. It has effect only when multiple files are specified. Any comment? Cheers, Giuseppe From 1e4ed081f41ac0955542d3a0f1ad143047b8ac25 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sun, 18 Oct 2009 00:19:25 +0200 Subject: [PATCH] md5: accepts a new --threads option * NEWS: Mention it. * bootstrap.conf: Use the `nproc' and `pthread' modules from gnulib. * doc/coreutils.texi: Document the new feature. * src/Makefile.am (md5sum, sha1sum, sha224, sha256, sha384, sha512): Link to the pthread library. * src/md5sum.c (main): Add --threads and move some code into new functions. (long_options, usage): Add --threads. (do_file): New function. (thread_start): New function. (check_files): New function. * tests/misc/md5sum: Test the new --threads option. * tests/misc/sha1sum: Ditto. * tests/misc/sha224sum: Ditto. * tests/misc/sha256sum: Ditto. * tests/misc/sha384sum: Ditto. * tests/misc/sha512sum: Ditto. --- NEWS |3 + bootstrap.conf |2 + doc/coreutils.texi |8 ++ src/Makefile.am | 12 ++-- src/md5sum.c | 234 +- tests/misc/md5sum|6 ++ tests/misc/sha1sum |6 ++ tests/misc/sha224sum |6 ++ tests/misc/sha256sum |6 ++ tests/misc/sha384sum |6 ++ tests/misc/sha512sum |6 ++ 11 files changed, 230 insertions(+), 65 deletions(-) diff --git a/NEWS b/NEWS index f8269fc..70af0b3 100644 --- a/NEWS +++ b/NEWS @@ -17,6 +17,9 @@ GNU coreutils NEWS-*- outline -*- md5sum --check now also accepts openssl-style checksums. So do sha1sum, sha224sum, sha384sum and sha512sum. + md5sum, sha1sum, sha224sum, sha384sum and sha512sum accept a new option + --threads to improve parallelism when multiple files are specified. + * Noteworthy changes in release 8.0 (2009-10-06) [beta] diff --git a/bootstrap.conf b/bootstrap.conf index e9b198c..fb3304d 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -155,6 +155,7 @@ gnulib_modules= mktime modechange mountlist + nproc mpsort obstack pathmax @@ -166,6 +167,7 @@ gnulib_modules= priv-set progname propername + pthread putenv quote quotearg diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 5026e76..b81cb81 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -3496,6 +3496,14 @@ distinguish between binary and text files. On other systems, it is the default for reading standard input when standard input is a terminal. +...@itemx --threa...@var{n} +...@opindex --threads +...@cindex verifying MD5 checksums +Use up to @var{n} threads when multiple files are specified. If a +value is not specified then the number of processors is used. The +number of threads used is limited by the number of specified files +thus in any case are not created more threads than files. + @item -w @itemx --warn @opindex -w diff --git a/src/Makefile.am b/src/Makefile.am index 915ea81..33d2563 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -220,7 +220,7 @@ link_LDADD = $(LDADD) ln_LDADD = $(LDADD) logname_LDADD = $(LDADD) ls_LDADD = $(LDADD) -md5sum_LDADD = $(LDADD) +md5sum_LDADD = $(LDADD) $(LIB_PTHREAD) mkdir_LDADD = $(LDADD) mkfifo_LDADD = $(LDADD) mknod_LDADD = $(LDADD) @@ -244,11 +244,11 @@ rmdir_LDADD = $(LDADD) runcon_LDADD = $(LDADD) seq_LDADD = $(LDADD) setuidgid_LDADD = $(LDADD) -sha1sum_LDADD = $(LDADD) -sha224sum_LDADD = $(LDADD) -sha256sum_LDADD = $(LDADD) -sha384sum_LDADD = $(LDADD) -sha512sum_LDADD = $(LDADD) +sha1sum_LDADD = $(LDADD) $(LIB_PTHREAD) +sha224sum_LDADD = $(LDADD) $(LIB_PTHREAD) +sha256sum_LDADD = $(LDADD) $(LIB_PTHREAD) +sha384sum_LDADD = $(LDADD) $(LIB_PTHREAD) +sha512sum_LDADD = $(LDADD) $(LIB_PTHREAD) shred_LDADD = $(LDADD) shuf_LDADD = $(LDADD) sleep_LDADD = $(LDADD) diff --git a/src/md5sum.c b/src/md5sum.c index aa2a144..161f1a6 100644 --- a/src/md5sum.c +++ b/src/md5sum.c @@ -20,8 +20,11 @@ #include getopt.h #include sys/types.h +#include pthread.h #include system.h +#include nproc.h +#include xstrtol.h #if HASH_ALGO_MD5 # include md5.h @@ -126,7 +129,8 @@ static bool quiet = false; enum { STATUS_OPTION = CHAR_MAX + 1, - QUIET_OPTION + QUIET_OPTION, + THREADS_OPTION }; static struct option const long_options[] = @@ -136,12 +140,28 @@ static struct option const long_options[] = { quiet, no_argument, NULL, QUIET_OPTION }, { status, no_argument, NULL, STATUS_OPTION }, { text, no_argument, NULL, 't' }, + { threads, optional_argument, NULL, THREADS_OPTION}, { warn, no_argument, NULL, 'w' }, { GETOPT_HELP_OPTION_DECL }, { GETOPT_VERSION_OPTION_DECL }, { NULL, 0, NULL, 0 } }; + +struct thread_arg +{ + char **files; + int n_files; + unsigned char **bin_buffer; + bool *res; + int *file_is_binary; + bool do_check; + bool *busy
Re: [PATCH] md5: accepts a new --threads option
Hi Pádraig, How does it compare to: files_per_process=10 cpus=4 find files | xargs -n$files_per_process -P$cpus md5sum I would expect it to be a bit better as file_per_process could be very large, thus having less overhead in starting processes. Though is the benefit worth the extra implementation complexity and new less general interface for users? The main advantage is not to save processes startup overhead but to keep the CPUs always busy when it is possible. If the files have similar length then the results are approximately the same. It is very different when the files don't have similar length and in this case a processor remains sleeping. I created four files: `a' and `b' are 150Mb, differently `c' and `d' are few Kb. These are the results I get: $ time ./sha1sum --threads /tmp/test_files/* 09e00486b4fb88805f7261fac1dd4c7f0ee7640e /tmp/test_files/a 203c0607c7ebff14ecf23b37005a714f2dc19b0b /tmp/test_files/b 3f786850e387550fdab836ed7e6dc881de23001b /tmp/test_files/c d7c8127a20a396cff08af086a1c695b0636f0c29 /tmp/test_files/d real0m1.804s $ time find /tmp/test_files/* | xargs -n4 -P2 ./sha1sum 09e00486b4fb88805f7261fac1dd4c7f0ee7640e /tmp/test_files/a 203c0607c7ebff14ecf23b37005a714f2dc19b0b /tmp/test_files/b 3f786850e387550fdab836ed7e6dc881de23001b /tmp/test_files/c d7c8127a20a396cff08af086a1c695b0636f0c29 /tmp/test_files/d real0m3.288s Cheers, Giuseppe
Re: [PATCH] tail: fix a race condition
Hi Jim, Jim Meyering j...@meyering.net writes: I haven't reviewed this yet, but will do so today or tomorrow. I know triggering race conditions can be hard, but can you write a script that demonstrates the failure? This might be a good excuse to experiment with the now-usable-as-non-root system tap (stap) tool. It's fine for test scripts to rely on bleeding-edge technology -- just skip the test if the tool is not available; most developers will have it. I didn't know about stap, I'll take a look at it asap. I tried to write a test but I didn't know how cause this condition. BTW, did you notice an actual failure, or did you spot this via inspection? I found it via inspection. Cheers, Giuseppe
Re: Bug#545422: coreutils: tail -f - fails
Hi Jim, have you considered this patch for inclusion? I don't see a clearer way to avoid polling without inotify fd support. Regards, Giuseppe Giuseppe Scrivano gscriv...@gnu.org writes: This patch changes `tail' to handle stdin separately from inotify events, similar to what we are already doing when a --pid is specified. Regards, Giuseppe From f3010bebf9e25be9a83868b4ad9db2cc6cb6613f Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Mon, 7 Sep 2009 16:35:16 +0200 Subject: [PATCH] tail: handle - properly * src/tail.c (tail_forever_inotify): Handle stdin (i.e., -, but not /dev/stdin) separately from inotify. * tests/tail-2/wait: Ensure that when a stdin is watched, tail does not raise errors. --- src/tail.c| 176 ++--- tests/tail-2/wait |6 ++ 2 files changed, 119 insertions(+), 63 deletions(-) diff --git a/src/tail.c b/src/tail.c index e3b9529..b817ecb 100644 --- a/src/tail.c +++ b/src/tail.c @@ -134,7 +134,7 @@ struct File_spec int errnum; #if HAVE_INOTIFY - /* The watch descriptor used by inotify. */ + /* The watch descriptor used by inotify, -1 on error, -2 if stdin. */ int wd; /* The parent directory watch descriptor. It is used only @@ -1184,6 +1184,7 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, char *evbuf; size_t evbuf_off = 0; size_t len = 0; + struct File_spec *stdin_spec = NULL; wd_table = hash_initialize (n_files, NULL, wd_hasher, wd_comparator, NULL); if (! wd_table) @@ -1196,6 +1197,34 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, { if (!f[i].ignore) { + if (STREQ (f[i].name, -)) +{ + int old_flags = fcntl (f[i].fd, F_GETFL); + int new_flags = old_flags | O_NONBLOCK; + + stdin_spec = f[i]; + found_watchable = true; + + if (old_flags 0 + || (new_flags != old_flags + fcntl (f[i].fd, F_SETFL, new_flags) == -1)) +{ + /* Don't update f[i].blocking if fcntl fails. */ + if (S_ISREG (f[i].mode) errno == EPERM) +{ + /* This happens when using tail -f on a file with + the append-only attribute. */ +} + else +error (EXIT_FAILURE, errno, + _(%s: cannot change stdin nonblocking mode)); +} + f[i].blocking = false; + f[i].wd = -2; + prev_wd = f[i].wd; + continue; +} + size_t fnlen = strlen (f[i].name); if (evlen fnlen) evlen = fnlen; @@ -1235,6 +1264,8 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, continue; } + prev_wd = f[i].wd; + if (hash_insert (wd_table, (f[i])) == NULL) xalloc_die (); @@ -1245,8 +1276,6 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, if (follow_mode == Follow_descriptor !found_watchable) return; - prev_wd = f[n_files - 1].wd; - evlen += sizeof (struct inotify_event) + 1; evbuf = xmalloc (evlen); @@ -1259,12 +1288,12 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, struct File_spec *fspec; uintmax_t bytes_read; struct stat stats; - + bool check_stdin = false; struct inotify_event *ev; - /* When watching a PID, ensure that a read from WD will not block - indefinetely. */ - if (pid) + /* When watching a PID or stdin, ensure that a read from WD will not block + indefinitely. */ + if (pid || stdin_spec) { fd_set rfd; struct timeval select_timeout; @@ -1284,78 +1313,92 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, if (n_descriptors == 0) { - /* See if the process we are monitoring is still alive. */ - if (kill (pid, 0) != 0 errno != EPERM) -exit (EXIT_SUCCESS); + if (stdin_spec) +check_stdin = true; + if (pid) +{ + /* See if the process we are monitoring is still alive. */ + if (kill (pid, 0) != 0 errno != EPERM) +exit (EXIT_SUCCESS); - continue; + if (!check_stdin) +continue; +} } } - if (len = evbuf_off) + if (check_stdin) { - len = safe_read (wd, evbuf, evlen); - evbuf_off = 0; - - /* For kernels prior to 2.6.21
Re: Bug#545422: coreutils: tail -f - fails
Jim Meyering j...@meyering.net writes: Considering the amount of complexity it adds to already-dense code (in spite of the fact that some is just due to indentation changes), for so little gain (who will use tail -f on stdin and care whether tail is sleep-based or inotify-based?), I'm reluctant to use it at all. Is there a good reason to want to avoid the sleep-based code in this corner case? In my opinion, it is desiderable that tail works approximately in the same way when stdin is specified, as we are already doing with --pid. If we decide to handle this too, there will not be any known case that the inotify back-end doesn't support. Summarizing what this patch does: it changes e.g., tail -f - F1 F2 F3 not to revert to the sleep-based implementation solely due to the presence of an unnamed (stdin) file, -. Instead, the files F1, F2, F3 would still be tracked efficiently via inotify, and stdin would be tracked via a select-based wait. Exactly. Giuseppe
Re: extend inotify to support file descriptors in addition to paths
Eric Paris epa...@parisplace.org writes: On Tue, Sep 8, 2009 at 8:21 PM, Giuseppe Scrivano gscriv...@gnu.org wrote: at the moment inotify permits to add new files to be watched using their path. There are situations where the file path is not know but a descriptor is available. It would be desiderable to have the possibility to use the inotify system even in these (rare) cases. I don't think specifying the inode in question by fd is fundamentally a bad idea. It is the reason I decided to use fd's when registering event's in the upcoming fanotify rather than pathnames. I do however question if we really want to add yet another syscall for inotify. We've already seen that inotify is very hard to expand. The fixed message length, lack of information a number of users want, and difficultly in extending those things make me reticent to support more extentions. Personally I'd rather see us/you move to fanotify which is (I hope) extensible forever. If only I could get networking people to review it. Have you looked at fanotify? I'm going to repost the series in a couple minutes, maybe you could tell me if fanotify might work for you? Sure, I'll take a look at it. I have few questions before look at details: is it an attempt to replace inotify? Does fanotify use only fd? Have the possibility to watch a file by its path, like inotify does, is not a bad idea when the file is not already opened. Thanks, Giuseppe
extend inotify to support file descriptors in addition to paths
Hello, at the moment inotify permits to add new files to be watched using their path. There are situations where the file path is not know but a descriptor is available. It would be desiderable to have the possibility to use the inotify system even in these (rare) cases. A concrete example of application that can take advantage of this possibility is GNU tail. GNU tail, since version 7.5, is using inotify to watch changes on files. This command will make `tail' sleeps until some events are catched: tail -qf foo bar baz Inotify can't be used when stdin is watched too: tail -qf boo bar - baz While the user gets the same result, internally it is a completely different thing because polling is done on stdin instead of graciously sleep until something is ready. I don't know if it is possible to obtain the former result when the fd is know but the path isn't. My proposal is to extend inotify adding a new syscall: long sys_inotify_add_watch_fd(int wd, int fd, u32 mask); in this way the same mechanism can be used indifferently either a descriptor or on a path is provided. What do you think about this idea? Is there another way to get the same result? Regards, Giuseppe P.S. In the attached patch I changed the syscalls table only for x86. From 6f3340347fb5d26000bec3a0a9cd8ef59c710922 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 8 Sep 2009 23:40:03 +0200 Subject: [PATCH] Add new syscall \`inotify_add_watch_fd'. It makes possible to register a file to be watched by inotify using its file descriptor instead of its path. --- arch/x86/kernel/syscall_table_32.S |1 + fs/notify/inotify/inotify_user.c | 54 ++-- include/linux/syscalls.h |3 ++ 3 files changed, 55 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index d51321d..46eb4ac 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S @@ -336,3 +336,4 @@ ENTRY(sys_call_table) .long sys_pwritev .long sys_rt_tgsigqueueinfo /* 335 */ .long sys_perf_counter_open + .long sys_inotify_add_watch_fd diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index dcd2040..53f3c41 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -747,16 +747,64 @@ SYSCALL_DEFINE3(inotify_add_watch, int, fd, const char __user *, pathname, /* create/update an inode mark */ ret = inotify_update_watch(group, inode, mask); - if (unlikely(ret)) - goto path_put_and_out; -path_put_and_out: path_put(path); fput_and_out: fput_light(filp, fput_needed); return ret; } +SYSCALL_DEFINE3(inotify_add_watch_fd, int, wd, int, fd,u32, mask) +{ + struct fsnotify_group *group; + struct inode *inode; + struct path path; + struct file *filp; + struct file *filp2; + int ret, fput_needed, fput2_needed; + unsigned flags = 0; + + filp = fget_light(wd, fput_needed); + if (unlikely(!filp)) + return -EBADF; + + /* verify that this is indeed an inotify instance */ + if (unlikely(filp-f_op != inotify_fops)) { + ret = -EINVAL; + goto fput_and_out; + } + + if (!(mask IN_DONT_FOLLOW)) + flags |= LOOKUP_FOLLOW; + if (mask IN_ONLYDIR) + flags |= LOOKUP_DIRECTORY; + + filp2 = fget_light(fd, fput2_needed); + + if (unlikely(!filp2)){ + ret = -EBADF; + goto fput_and_out; + } + + /* you can only watch an inode if you have read permissions on it */ + ret = inode_permission(filp2-f_path.dentry-d_inode, MAY_READ); + if (ret) + goto fput2_and_out; + + /* inode held in place by reference to path; group by fget on fd */ + inode = filp2-f_path.dentry-d_inode; + group = filp-private_data; + + /* create/update an inode mark */ + ret = inotify_update_watch(group, inode, mask); + +fput2_and_out: + fput_light(filp2, fput2_needed); +fput_and_out: + fput_light(filp, fput_needed); + return ret; +} + SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { struct fsnotify_group *group; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 80de700..0833b1d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -761,4 +761,7 @@ int kernel_execve(const char *filename, char *const argv[], char *const envp[]); asmlinkage long sys_perf_counter_open( struct perf_counter_attr __user *attr_uptr, pid_t pid, int cpu, int group_fd, unsigned long flags); + + +asmlinkage long sys_inotify_add_watch_fd(int wd, int fd, u32 mask); #endif -- 1.6.3.3
Re: Bug#545422: coreutils: tail -f - fails
Hi Jim, what do you think about the following solution? It avoids to revert to the old polling mechanism using /dev/stdin instead of - to inotify_add_watch. Cheers, Giuseppe diff --git a/src/tail.c b/src/tail.c index e3b9529..016b712 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1152,6 +1152,12 @@ tail_forever (struct File_spec *f, size_t n_files, double sleep_interval) #if HAVE_INOTIFY +static char const * +map_inotify_fname (char const *name) +{ + return STREQ (name, -) ? /dev/stdin : name; +} + static size_t wd_hasher (const void *entry, size_t tabsize) { @@ -1226,7 +1232,8 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, } } - f[i].wd = inotify_add_watch (wd, f[i].name, inotify_wd_mask); + f[i].wd = inotify_add_watch (wd, map_inotify_fname (f[i].name), + inotify_wd_mask); if (f[i].wd 0) { @@ -1330,7 +1337,8 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, if (i == n_files) continue; - f[i].wd = inotify_add_watch (wd, f[i].name, inotify_wd_mask); + f[i].wd = inotify_add_watch (wd, map_inotify_fname (f[i].name), + inotify_wd_mask); if (f[i].wd 0) { Jim Meyering j...@meyering.net writes: Bill Brelsford wrote: Package: coreutils Version: 7.5-3 Severity: normal tail -f no longer works with stdin. E.g. commands such as somecommand | tail -f - somecommand | tail -f tail -f /var/log/kern fail with the message: tail: cannot watch `-': No such file or directory Worked under 7.4-2 and previous versions. Thanks for the report. I'm fixing it like this, upstream. Test coming momentarily. From 30269c9ca38c06b31a7c764c192562e3b0268725 Mon Sep 17 00:00:00 2001 From: Jim Meyering meyer...@redhat.com Date: Mon, 7 Sep 2009 08:37:08 +0200 Subject: [PATCH] tail -f: work on - once again * src/tail.c (main) [HAVE_INOTIFY]: When stdin (i.e., -, but not /dev/stdin) is specified on the command line, don't use inotify. Reported by Bill Brelsford in http://bugs.debian.org/545422. * NEWS (Bug fixes): Mention it. This bug was introduced in coreutils-7.5 via commit ae494d4b, 2009-06-02, tail: use inotify if it is available. --- NEWS |9 + src/tail.c | 14 +- 2 files changed, 22 insertions(+), 1 deletions(-) diff --git a/NEWS b/NEWS index b02d2da..5c7fb82 100644 --- a/NEWS +++ b/NEWS @@ -31,6 +31,15 @@ GNU coreutils NEWS-*- outline -*- Before, this would print nothing and wait: stdbuf -o 4K tail -f /etc/passwd Note that this bug affects tail -f only when its standard output is buffered, which is relatively unusual. + [bug introduced in coreutils-7.5] + + tail -f once again works with standard input. inotify-enabled tail -f + would fail when operating on a nameless stdin. I.e., tail -f /etc/passwd + would say tail: cannot watch `-': No such file or directory, yet the + relatively baroque tail -f /dev/stdin /etc/passwd would work. Now, the + offending usage causes tail to revert to its conventional sleep-based + (i.e., not inotify-based) implementation. + [bug introduced in coreutils-7.5] ** New features diff --git a/src/tail.c b/src/tail.c index e3b9529..c53df9e 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1982,7 +1982,19 @@ main (int argc, char **argv) if (forever) { #if HAVE_INOTIFY - if (!disable_inotify) + /* If the user specifies stdin via a command line argument of -, + or implicitly by providing no arguments, we won't use inotify. + Technically, on systems with a working /dev/stdin, we *could*, + but would it be worth it? Verifying that it's a real device + and hooked up to stdin is not trivial, while reverting to + non-inotify-based tail_forever is easy and portable. */ + bool stdin_cmdline_arg = false; + + for (i = 0; i n_files; i++) +if (STREQ (file[i], -)) + stdin_cmdline_arg = true; + + if (!disable_inotify !stdin_cmdline_arg) { int wd = inotify_init (); if (wd 0) -- 1.6.4.2.419.gab238
Re: Bug#545422: coreutils: tail -f - fails
Jim Meyering j...@meyering.net writes: I considered that and discussed the trade-off in the comment I committed. There have been systems and configurations with nonexistent and unusable /dev/stdin files. sorry, I didn't read you comment. This patch changes `tail' to handle stdin separately from inotify events, similar to what we are already doing when a --pid is specified. Regards, Giuseppe From f3010bebf9e25be9a83868b4ad9db2cc6cb6613f Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Mon, 7 Sep 2009 16:35:16 +0200 Subject: [PATCH] tail: handle - properly * src/tail.c (tail_forever_inotify): Handle stdin (i.e., -, but not /dev/stdin) separately from inotify. * tests/tail-2/wait: Ensure that when a stdin is watched, tail does not raise errors. --- src/tail.c| 176 ++--- tests/tail-2/wait |6 ++ 2 files changed, 119 insertions(+), 63 deletions(-) diff --git a/src/tail.c b/src/tail.c index e3b9529..b817ecb 100644 --- a/src/tail.c +++ b/src/tail.c @@ -134,7 +134,7 @@ struct File_spec int errnum; #if HAVE_INOTIFY - /* The watch descriptor used by inotify. */ + /* The watch descriptor used by inotify, -1 on error, -2 if stdin. */ int wd; /* The parent directory watch descriptor. It is used only @@ -1184,6 +1184,7 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, char *evbuf; size_t evbuf_off = 0; size_t len = 0; + struct File_spec *stdin_spec = NULL; wd_table = hash_initialize (n_files, NULL, wd_hasher, wd_comparator, NULL); if (! wd_table) @@ -1196,6 +1197,34 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, { if (!f[i].ignore) { + if (STREQ (f[i].name, -)) +{ + int old_flags = fcntl (f[i].fd, F_GETFL); + int new_flags = old_flags | O_NONBLOCK; + + stdin_spec = f[i]; + found_watchable = true; + + if (old_flags 0 + || (new_flags != old_flags + fcntl (f[i].fd, F_SETFL, new_flags) == -1)) +{ + /* Don't update f[i].blocking if fcntl fails. */ + if (S_ISREG (f[i].mode) errno == EPERM) +{ + /* This happens when using tail -f on a file with + the append-only attribute. */ +} + else +error (EXIT_FAILURE, errno, + _(%s: cannot change stdin nonblocking mode)); +} + f[i].blocking = false; + f[i].wd = -2; + prev_wd = f[i].wd; + continue; +} + size_t fnlen = strlen (f[i].name); if (evlen fnlen) evlen = fnlen; @@ -1235,6 +1264,8 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, continue; } + prev_wd = f[i].wd; + if (hash_insert (wd_table, (f[i])) == NULL) xalloc_die (); @@ -1245,8 +1276,6 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, if (follow_mode == Follow_descriptor !found_watchable) return; - prev_wd = f[n_files - 1].wd; - evlen += sizeof (struct inotify_event) + 1; evbuf = xmalloc (evlen); @@ -1259,12 +1288,12 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, struct File_spec *fspec; uintmax_t bytes_read; struct stat stats; - + bool check_stdin = false; struct inotify_event *ev; - /* When watching a PID, ensure that a read from WD will not block - indefinetely. */ - if (pid) + /* When watching a PID or stdin, ensure that a read from WD will not block + indefinitely. */ + if (pid || stdin_spec) { fd_set rfd; struct timeval select_timeout; @@ -1284,78 +1313,92 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, if (n_descriptors == 0) { - /* See if the process we are monitoring is still alive. */ - if (kill (pid, 0) != 0 errno != EPERM) -exit (EXIT_SUCCESS); + if (stdin_spec) +check_stdin = true; + if (pid) +{ + /* See if the process we are monitoring is still alive. */ + if (kill (pid, 0) != 0 errno != EPERM) +exit (EXIT_SUCCESS); - continue; + if (!check_stdin) +continue; +} } } - if (len = evbuf_off) + if (check_stdin) { - len = safe_read (wd, evbuf, evlen); - evbuf_off = 0; - - /* For kernels prior to 2.6.21, read returns 0 when the buffer - is too small. */ - if ((len == 0
Re: Bug report
Hello, can you please tell us where is it documented to ask APT related questions to this mailing list? It is not the first time Ubuntu questions are directed here and in case this documentation should be fixed. Thanks, Giuseppe Gil Miller millgi...@yahoo.com writes: E: type 'sudo' is not known on line 58 in sources.list /etc/apt/sources.list. E: The list of sources could not be read.millgi...@yahoo.com
[PATCH]: nl: deprecate --page-increment in favour of --line-increment
Hello, --page-increment seems the wrong name for this option, --line-increment is clearer. what do you think of this change? Cheers, Giuseppe From e71bee2c6731fe65c07744ac95e1e4058eea773c Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 18 Aug 2009 12:22:37 +0200 Subject: [PATCH] nl: deprecate --page-increment in favour of --line-increment * NEWS: Mention the change. * doc/coreutils.texi: Document the --line-increment option. * src/nl.c (struct option): Add --line-increment, (usage): Describe it, (main): Use it. --- NEWS |5 + doc/coreutils.texi |4 ++-- src/nl.c |9 +++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 85cc82b..d485e44 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,11 @@ GNU coreutils NEWS-*- outline -*- were renamed from 'HARDLINK' and 'hl' which were available since coreutils-7.1 when this feature was introduced. +** Deprecated options + + nl --page-increment: deprecated in favour of --line-increment, the new option + maintains the previous semantic and the same short option, -i. + ** New features chroot now accepts the options --userspec and --groups. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index bb1d87a..8e1b73d 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -1688,9 +1688,9 @@ Analogous to @option{--body-numbering}. Analogous to @option{--body-numbering}. @item -i @var{number} -...@itemx --page-increme...@var{number} +...@itemx --line-increme...@var{number} @opindex -i -...@opindex --page-increment +...@opindex --line-increment Increment line numbers by @var{number} (default 1). @item -l @var{number} diff --git a/src/nl.c b/src/nl.c index 2deb314..ea7ebe6 100644 --- a/src/nl.c +++ b/src/nl.c @@ -150,7 +150,9 @@ static struct option const longopts[] = {body-numbering, required_argument, NULL, 'b'}, {footer-numbering, required_argument, NULL, 'f'}, {starting-line-number, required_argument, NULL, 'v'}, - {page-increment, required_argument, NULL, 'i'}, + {line-increment, required_argument, NULL, 'i'}, + /* FIXME: page-increment is deprecated, remove in dec-2011. */ + {page-increment, required_argument, NULL, 'I'}, {no-renumber, no_argument, NULL, 'p'}, {join-blank-lines, required_argument, NULL, 'l'}, {number-separator, required_argument, NULL, 's'}, @@ -191,7 +193,7 @@ Mandatory arguments to long options are mandatory for short options too.\n\ ), stdout); fputs (_(\ -h, --header-numbering=STYLEuse STYLE for numbering header lines\n\ - -i, --page-increment=NUMBER line number increment at each line\n\ + -i, --line-increment=NUMBER line number increment at each line\n\ -l, --join-blank-lines=NUMBER group of NUMBER empty lines counted as one\n\ -n, --number-format=FORMAT insert line numbers according to FORMAT\n\ -p, --no-renumber do not reset line numbers at logical pages\n\ @@ -504,6 +506,9 @@ main (int argc, char **argv) ok = false; } break; + case 'I': +error (0, 0, _(\ +--page-increment is deprecated, use --line-increment instead.)); case 'i': if (! (xstrtoimax (optarg, NULL, 10, page_incr, ) == LONGINT_OK 0 page_incr)) -- 1.6.3.3
Re: [PATCH]: nl: deprecate --page-increment in favour of --line-increment
Thanks Kamil, yes, CHAR_MAX + 1 looks like a better choice. Are there other comments? Giuseppe Kamil Dudka kdu...@redhat.com writes: Hello Giuseppe, On Tue August 18 2009 12:47:06 Giuseppe Scrivano wrote: diff --git a/src/nl.c b/src/nl.c index 2deb314..ea7ebe6 100644 --- a/src/nl.c +++ b/src/nl.c @@ -150,7 +150,9 @@ static struct option const longopts[] = {body-numbering, required_argument, NULL, 'b'}, {footer-numbering, required_argument, NULL, 'f'}, {starting-line-number, required_argument, NULL, 'v'}, - {page-increment, required_argument, NULL, 'i'}, + {line-increment, required_argument, NULL, 'i'}, + /* FIXME: page-increment is deprecated, remove in dec-2011. */ + {page-increment, required_argument, NULL, 'I'}, what about use of CHAR_MAX + 1 instead of 'I'? Kamil
Re: [PATCH]: nl: deprecate --page-increment in favour of --line-increment
Hello, Jim Meyering j...@meyering.net writes: + nl --page-increment: deprecated in favour of --line-increment, the new option + maintains the previous semantic and the same short option, -i. s/semantic/semantics/ ... Please don't use I. Use something like PAGE_INCREMENT_OPTION_DEPRECATED, as is done in install.c for PRESERVE_CONTEXT_OPTION_DEPRECATED. I.e., define this, enum { PAGE_INCREMENT_OPTION_DEPRECATED = CHAR_MAX + 1, }; Then handle it in the same way install.c handles its deprecated long option: case PRESERVE_CONTEXT_OPTION_DEPRECATED: error (0, 0, _(WARNING: --preserve_context is deprecated; use --preserve-context instead)); /* fall through */ case PRESERVE_CONTEXT_OPTION: ... thanks for the comments. This is the cleaned version of the patch. Cheers, Giuseppe From 8dc27341e8ed57e790a71d4b61df44e908bc73cd Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 18 Aug 2009 12:22:37 +0200 Subject: [PATCH] nl: deprecate --page-increment in favour of --line-increment * NEWS: Mention the change. * doc/coreutils.texi: Document the --line-increment option. * src/nl.c (struct option): Add --line-increment, (usage): Describe it, (main): Use it. --- NEWS |5 + doc/coreutils.texi |4 ++-- src/nl.c | 15 +-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 85cc82b..679576f 100644 --- a/NEWS +++ b/NEWS @@ -40,6 +40,11 @@ GNU coreutils NEWS-*- outline -*- were renamed from 'HARDLINK' and 'hl' which were available since coreutils-7.1 when this feature was introduced. +** Deprecated options + + nl --page-increment: deprecated in favour of --line-increment, the new option + maintains the previous semantics and the same short option, -i. + ** New features chroot now accepts the options --userspec and --groups. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index bb1d87a..8e1b73d 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -1688,9 +1688,9 @@ Analogous to @option{--body-numbering}. Analogous to @option{--body-numbering}. @item -i @var{number} -...@itemx --page-increme...@var{number} +...@itemx --line-increme...@var{number} @opindex -i -...@opindex --page-increment +...@opindex --line-increment Increment line numbers by @var{number} (default 1). @item -l @var{number} diff --git a/src/nl.c b/src/nl.c index 2deb314..67b79d9 100644 --- a/src/nl.c +++ b/src/nl.c @@ -144,13 +144,20 @@ static intmax_t line_no; /* True if we have ever read standard input. */ static bool have_read_stdin; +enum +{ + PAGE_INCREMENT_OPTION_DEPRECATED = CHAR_MAX + 1 +}; + static struct option const longopts[] = { {header-numbering, required_argument, NULL, 'h'}, {body-numbering, required_argument, NULL, 'b'}, {footer-numbering, required_argument, NULL, 'f'}, {starting-line-number, required_argument, NULL, 'v'}, - {page-increment, required_argument, NULL, 'i'}, + {line-increment, required_argument, NULL, 'i'}, + /* FIXME: page-increment is deprecated, remove in dec-2011. */ + {page-increment, required_argument, NULL, PAGE_INCREMENT_OPTION_DEPRECATED}, {no-renumber, no_argument, NULL, 'p'}, {join-blank-lines, required_argument, NULL, 'l'}, {number-separator, required_argument, NULL, 's'}, @@ -191,7 +198,7 @@ Mandatory arguments to long options are mandatory for short options too.\n\ ), stdout); fputs (_(\ -h, --header-numbering=STYLEuse STYLE for numbering header lines\n\ - -i, --page-increment=NUMBER line number increment at each line\n\ + -i, --line-increment=NUMBER line number increment at each line\n\ -l, --join-blank-lines=NUMBER group of NUMBER empty lines counted as one\n\ -n, --number-format=FORMAT insert line numbers according to FORMAT\n\ -p, --no-renumber do not reset line numbers at logical pages\n\ @@ -504,6 +511,10 @@ main (int argc, char **argv) ok = false; } break; + case PAGE_INCREMENT_OPTION_DEPRECATED: +error (0, 0, _(WARNING: --preserve_context is deprecated; + use --preserve-context instead)); +/* fall through */ case 'i': if (! (xstrtoimax (optarg, NULL, 10, page_incr, ) == LONGINT_OK 0 page_incr)) -- 1.6.3.3
Re: Linus' sha1 is much faster!
Pádraig Brady p...@draigbrady.com writes: -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m32 -march=i586 -mtune=generic -fasynchronous-unwind-tables -D_GNU_SOURCE=1 thanks. I did again all tests on my machine using these same options. I repeated each test 6 times and I took the median without consider the first result. Except the first run that it is not considered, I didn't report a big variance on results of the same test. gcc 4.3.3 gnulib sha1:real0m2.543s gnulib sha1 lookup: real0m1.906s (-25%) linus's sha1: real0m2.468s (-3%) linus's sha1 no asm:real0m2.289s (-9%) gcc 4.4.1 gnulib sha1:real0m3.386s gnulib sha1 lookup: real0m3.110s (-8%) linus's sha1: real0m1.701s (-49%) linus's sha1 no asm:real0m1.284s (-62%) I don't see such big differences in asm generated by gcc 4.4.1 and gcc 4.3.3 to explain this performance difference, what I noticed immediately is that in the gcc-4.4 generated asm there are more lea instructions (+30%), but I doubt this is the reason of these poor results. Anyway, I haven't yet looked much in details. Cheers, Giuseppe
Re: Linus' sha1 is much faster!
Hi, These are the results I reported (median of 5 plus an additional not considered first run) on the Steve Reid's SHA1 implementation using the same flags to the compiler that I used for previous tests. GCC 4.3.3: real0m2.627s GCC 4.4.1: real0m3.742s In both cases it showed to be slower than other implementations I have already tried. Additional note: as for gnulib SHA1, GCC 4.4.1 produced slower code than GCC 4.3.3. Cheers, Giuseppe Steven Noonan ste...@uplinklabs.net writes: Interesting. I compared Linus' implementation to the public domain one by Steve Reid[1], which is used in OpenLDAP and a few other projects. Anyone with some experience testing these kinds of things in a statistically sound manner want to try it out? In my tests, I got this: (average of 5 runs) Linus' sha1: 283MB/s Steve Reid's sha1: 305MB/s - Steven [1] http://gpl.nas-central.org/SYNOLOGY/x07-series/514_UNTARED/source/openldap-2.3.11/libraries/liblutil/sha1.c
Re: Linus' sha1 is much faster!
Hi Pádraig, I tried to reproduce your results but I wasn't able to do it. The biggest difference on a 300MB file I noticed was approximately 15% using on both implementations -O2, and 5% using -O3. My GCC version is gcc (Debian 4.3.3-14) 4.3.3 and the CPU is: Intel(R) Pentium(R) D CPU 3.20GHz. I also spent some time trying to improve the gnulib SHA1 implementation and it seems a lookup table can improve things a bit. Can you please try the patch that I have attached and tell me which performance difference (if any) you get? Thanks, Giuseppe From b975a5e0849eaa46e5cf410c5bf6e2308f044d61 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sun, 16 Aug 2009 20:53:54 +0200 Subject: [PATCH] SHA1: use a lookup table for faster hashing * lib/sha1.c (struct sha1_pre): New member. * lib/sha1.c (sha1_process_block): Use the lookup table to quickly find indices to use in the current round. --- lib/sha1.c | 160 ++- 1 files changed, 92 insertions(+), 68 deletions(-) diff --git a/lib/sha1.c b/lib/sha1.c index 9c6c7ae..ec18ba7 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -283,6 +283,32 @@ sha1_process_bytes (const void *buffer, size_t len, struct sha1_ctx *ctx) #define F3(B,C,D) ( ( B C ) | ( D ( B | C ) ) ) #define F4(B,C,D) (B ^ C ^ D) +struct lookup_t +{ + unsigned char l1 : 4; + unsigned char l2 : 4; + unsigned char l3 : 4; + unsigned char l4 : 4; +}; + +const static struct lookup_t +sha1_pre[16] = {{(0 - 3) 0x0f, (0 - 8) 0x0f, (0 - 14) 0x0f}, +{(1 - 3) 0x0f, (1 - 8) 0x0f, (1 - 14) 0x0f}, +{(2 - 3) 0x0f, (2 - 8) 0x0f, (2 - 14) 0x0f}, +{(3 - 3) 0x0f, (3 - 8) 0x0f, (3 - 14) 0x0f}, +{(4 - 3) 0x0f, (4 - 8) 0x0f, (4 - 14) 0x0f}, +{(5 - 3) 0x0f, (5 - 8) 0x0f, (5 - 14) 0x0f}, +{(6 - 3) 0x0f, (6 - 8) 0x0f, (6 - 14) 0x0f}, +{(7 - 3) 0x0f, (7 - 8) 0x0f, (7 - 14) 0x0f}, +{(8 - 3) 0x0f, (8 - 8) 0x0f, (8 - 14) 0x0f}, +{(9 - 3) 0x0f, (9 - 8) 0x0f, (9 - 14) 0x0f}, +{(10 - 3) 0x0f, (10 - 8) 0x0f, (10 - 14) 0x0f}, +{(11 - 3) 0x0f, (11 - 8) 0x0f, (11 - 14) 0x0f}, +{(12 - 3) 0x0f, (12 - 8) 0x0f, (12 - 14) 0x0f}, +{(13 - 3) 0x0f, (13 - 8) 0x0f, (13 - 14) 0x0f}, +{(14 - 3) 0x0f, (14 - 8) 0x0f, (14 - 14) 0x0f}, +{(15 - 3) 0x0f, (15 - 8) 0x0f, (15 - 14) 0x0f}}; + /* Process LEN bytes of BUFFER, accumulating context into CTX. It is assumed that LEN % 64 == 0. Most of this code comes from GnuPG's cipher/sha1.c. */ @@ -309,9 +335,8 @@ sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) #define rol(x, n) (((x) (n)) | ((uint32_t) (x) (32 - (n -#define M(I) ( tm = x[I0x0f] ^ x[(I-14)0x0f] \ - ^ x[(I-8)0x0f] ^ x[(I-3)0x0f] \ - , (x[I0x0f] = rol(tm, 1)) ) +#define M(I) (x[I] = rol (x[sha1_pre[I].l1] ^ x[sha1_pre[I].l2] \ + ^ x[sha1_pre[I].l3] ^ x[I], 1)) #define R(A,B,C,D,E,F,K,M) do { E += rol( A, 5 ) \ + F( B, C, D ) \ @@ -322,7 +347,6 @@ sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) while (words endp) { - uint32_t tm; int t; for (t = 0; t 16; t++) { @@ -346,70 +370,70 @@ sha1_process_block (const void *buffer, size_t len, struct sha1_ctx *ctx) R( c, d, e, a, b, F1, K1, x[13] ); R( b, c, d, e, a, F1, K1, x[14] ); R( a, b, c, d, e, F1, K1, x[15] ); - R( e, a, b, c, d, F1, K1, M(16) ); - R( d, e, a, b, c, F1, K1, M(17) ); - R( c, d, e, a, b, F1, K1, M(18) ); - R( b, c, d, e, a, F1, K1, M(19) ); - R( a, b, c, d, e, F2, K2, M(20) ); - R( e, a, b, c, d, F2, K2, M(21) ); - R( d, e, a, b, c, F2, K2, M(22) ); - R( c, d, e, a, b, F2, K2, M(23) ); - R( b, c, d, e, a, F2, K2, M(24) ); - R( a, b, c, d, e, F2, K2, M(25) ); - R( e, a, b, c, d, F2, K2, M(26) ); - R( d, e, a, b, c, F2, K2, M(27) ); - R( c, d, e, a, b, F2, K2, M(28) ); - R( b, c, d, e, a, F2, K2, M(29) ); - R( a, b, c, d, e, F2, K2, M(30) ); - R( e, a, b, c, d, F2, K2, M(31) ); - R( d, e, a, b, c, F2, K2, M(32) ); - R( c, d, e, a, b, F2, K2, M(33) ); - R( b, c, d, e, a, F2, K2, M(34) ); - R( a, b, c, d, e, F2, K2, M(35) ); - R( e, a, b, c, d, F2, K2, M(36) ); - R( d, e, a, b, c, F2, K2, M(37) ); - R( c, d, e, a, b, F2, K2, M(38) ); - R( b, c, d, e, a, F2, K2, M(39) ); - R( a, b, c, d, e, F3, K3, M(40) ); - R( e, a, b, c, d, F3, K3, M(41) ); - R( d, e, a, b, c, F3, K3, M(42) ); - R( c, d, e, a, b, F3, K3, M(43) ); - R( b, c, d, e, a, F3, K3, M(44) ); - R( a, b, c, d, e, F3, K3, M(45) ); - R( e, a, b, c, d, F3, K3, M(46) ); - R( d, e, a, b, c, F3, K3, M(47) ); - R( c, d, e
Re: Linus' sha1 is much faster!
Linus Torvalds torva...@linux-foundation.org writes: I pretty much can guarantee you that it improves things only because it makes gcc generate crap code, which then hides some of the P4 issues. I'd also suggest you try gcc-4.4, since that apparently fixes some of the oddest spill issues. Thanks for the hint. I tried gcc-4.4 and it produces slower code than 4.3 on the gnulib SHA1 implementation and my patch makes it even more! I noticed that on my machine your implementation is ~30-40% faster using SHA_ROT for rol/ror instructions than inline assembly, at least with the test-case Pádraig wrote. Am I the only one reporting it? Cheers, Giuseppe
Re: BTRFS file clone support for cp
Jim Meyering j...@meyering.net writes: + if (clone_file (dest_desc, source_desc)) +{ + error (0, errno, _(cannot fstat %s), quote (dst_name)); I prefer this diagnostic ;-) error (0, errno, _(failed to clone %s), quote (dst_name)); Too wild copypaste :) I included your notes in the following patch. Cheers, Giuseppe From 63c0a1840f236eebb9ba3a28d8f1e6242a7c5898 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 1 Aug 2009 19:36:48 +0200 Subject: [PATCH] cp: accept the --reflink option * NEWS: Mention it. * doc/coreutils.texi: Likewise. * src/copy.h (struct cp_options): New member reflink. * src/copy.c (usage): Likewise. (copy_reg): If reflink is true try to clone the file. (main): Check for --reflink. (cp_option_init): By default set reflink to false. * src/install.c (cp_option_init): By default set reflink to false. * src/mv.c (cp_option_init): By default set reflink to false. * tests/cp/sparse: Add a new test case. --- NEWS |5 +++-- doc/coreutils.texi |9 + src/copy.c | 16 ++-- src/copy.h |3 +++ src/cp.c | 16 +++- src/install.c |1 + src/mv.c |1 + tests/cp/sparse|4 8 files changed, 46 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index 80c60e2..94511b7 100644 --- a/NEWS +++ b/NEWS @@ -35,8 +35,9 @@ GNU coreutils NEWS-*- outline -*- chroot now accepts the options --userspec and --groups. - cp, install, mv: take advantage of btrfs' O(1) copy-on-write feature - when both the source and destination are on the same btrfs partition. + cp accepts a new option, --reflink: create a lightweight copy + using copy-on-write (COW). This is currently supported only on + btrfs file systems. sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index acec76e..a300d56 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -7543,6 +7543,15 @@ Also, it is not portable to use @option{-R} to copy symbolic links unless you also specify @option{-P}, as @acronym{POSIX} allows implementations that dereference symbolic links by default. +...@item --reflink +...@opindex --reflink +Perform a lightweight, copy-on-write (COW) copy. +Copying with this option can succeed only on some relatively new file systems. +Once it has succeeded, beware that the source and destination files +share the same disk data blocks as long as they remain unmodified. +Thus, if a disk I/O error affects data blocks of one of the files, +the other suffers the exact same fate. + @item --remove-destination @opindex --remove-destination Remove each existing destination file before attempting to open it diff --git a/src/copy.c b/src/copy.c index bbed336..3e83de3 100644 --- a/src/copy.c +++ b/src/copy.c @@ -610,13 +610,16 @@ copy_reg (char const *src_name, char const *dst_name, goto close_src_and_dst_desc; } - /* If --sparse=auto is in effect, attempt a btrfs clone operation. - If the operation is not supported or it fails then copy the file - in the usual way. */ - bool copied = (x-sparse_mode == SPARSE_AUTO - clone_file (dest_desc, source_desc) == 0); + if (x-reflink) +{ + if (clone_file (dest_desc, source_desc)) +{ + error (0, errno, _(failed to clone %s), quote (dst_name)); + return_val = false; +} + goto close_src_and_dst_desc; +} - if (!copied) { typedef uintptr_t word; off_t n_read_total = 0; @@ -,6 +2225,7 @@ valid_options (const struct cp_options *co) assert (VALID_BACKUP_TYPE (co-backup_type)); assert (VALID_SPARSE_MODE (co-sparse_mode)); assert (!(co-hard_link co-symbolic_link)); + assert (!(co-reflink co-sparse_mode != SPARSE_AUTO)); return true; } diff --git a/src/copy.h b/src/copy.h index 8e0b408..ddf4f4e 100644 --- a/src/copy.h +++ b/src/copy.h @@ -219,6 +219,9 @@ struct cp_options such a symlink) and returns false. */ bool open_dangling_dest_symlink; + /* If true, attempt to clone the file instead of copying it. */ + bool reflink; + /* This is a set of destination name/inode/dev triples. Each such triple represents a file we have created corresponding to a source file name that was specified on the command line. Use it to avoid clobbering diff --git a/src/cp.c b/src/cp.c index 8785076..635c7c7 100644 --- a/src/cp.c +++ b/src/cp.c @@ -78,7 +78,8 @@ enum PRESERVE_ATTRIBUTES_OPTION, SPARSE_OPTION, STRIP_TRAILING_SLASHES_OPTION, - UNLINK_DEST_BEFORE_OPENING + UNLINK_DEST_BEFORE_OPENING, + REFLINK_OPTION }; /* True if the kernel is SELinux enabled. */ @@ -121,6 +122,7 @@ static struct option const long_opts[] = {recursive, no_argument, NULL, 'R'}, {remove
Re: BTRFS file clone support for cp
Hi Jim, I attached the cleaned patch, according to your advises. If there are other things, I'll fix them. Regards, Giuseppe Jim Meyering j...@meyering.net writes: How about this for NEWS: cp accepts a new option, --reflink: create a lightweight copy using copy-on-write (COW). This is currently supported only on btrfs file systems. Typically, with a feature like this, if a tool is unable to provide the functionality implied by the new option, it gives a diagnostic and exits nonzero. Otherwise, it would be far more work for an application to determine whether the option was honored. Deciding whether we also want an option saying copy-via-COW-if-possible-and-ignore-any-failure can wait, but I'm leaning away from it for now. With your change, the new reflink member may be used uninitialized via install and mv. To avoid that, just initialize it to false in each cp_option_init. Also, please make cp give a diagnostic when --reflink is used with --sparse=never or --sparse=always. Then, add this assertion in copy.c: assert ( ! (x-reflink x-sparse_mode != SPARSE_AUTO)); From 75610aed5c325d14a3ef43fb2c319ace12f36c57 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 1 Aug 2009 19:36:48 +0200 Subject: [PATCH] cp: accept the --reflink option * NEWS: Mention it. * doc/coreutils.texi: Likewise. * src/copy.h (struct cp_options): New member reflink. * src/copy.c (usage): Likewise. (copy_reg): If reflink is true try to clone the file. (main): Check for --reflink. (cp_option_init): By default set reflink to false. * src/install.c (cp_option_init): By default set reflink to false. * src/mv.c (cp_option_init): By default set reflink to false. * tests/cp/sparse: Add a new test case. --- NEWS |5 +++-- doc/coreutils.texi |9 + src/copy.c | 16 ++-- src/copy.h |3 +++ src/cp.c | 16 +++- src/install.c |1 + src/mv.c |1 + tests/cp/sparse|4 8 files changed, 46 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index 80c60e2..94511b7 100644 --- a/NEWS +++ b/NEWS @@ -35,8 +35,9 @@ GNU coreutils NEWS-*- outline -*- chroot now accepts the options --userspec and --groups. - cp, install, mv: take advantage of btrfs' O(1) copy-on-write feature - when both the source and destination are on the same btrfs partition. + cp accepts a new option, --reflink: create a lightweight copy + using copy-on-write (COW). This is currently supported only on + btrfs file systems. sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index acec76e..03c9eb7 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -7543,6 +7543,15 @@ Also, it is not portable to use @option{-R} to copy symbolic links unless you also specify @option{-P}, as @acronym{POSIX} allows implementations that dereference symbolic links by default. +...@item --reflink +...@opindex --reflink +Attempt a O(1) copy-on-write (COW) when the underlying file system +supports this operation instead of really copying the file. +The source and destination files share the same disk data blocks until +they are equal. Changes done in a file are not visible in the other +one because shared blocks will be duplicated before these changes are +stored. + @item --remove-destination @opindex --remove-destination Remove each existing destination file before attempting to open it diff --git a/src/copy.c b/src/copy.c index bbed336..5eae9f4 100644 --- a/src/copy.c +++ b/src/copy.c @@ -610,13 +610,16 @@ copy_reg (char const *src_name, char const *dst_name, goto close_src_and_dst_desc; } - /* If --sparse=auto is in effect, attempt a btrfs clone operation. - If the operation is not supported or it fails then copy the file - in the usual way. */ - bool copied = (x-sparse_mode == SPARSE_AUTO - clone_file (dest_desc, source_desc) == 0); + if (x-reflink) +{ + if (clone_file (dest_desc, source_desc)) +{ + error (0, errno, _(cannot fstat %s), quote (dst_name)); + return_val = false; +} + goto close_src_and_dst_desc; +} - if (!copied) { typedef uintptr_t word; off_t n_read_total = 0; @@ -,6 +2225,7 @@ valid_options (const struct cp_options *co) assert (VALID_BACKUP_TYPE (co-backup_type)); assert (VALID_SPARSE_MODE (co-sparse_mode)); assert (!(co-hard_link co-symbolic_link)); + assert (!(co-reflink co-sparse_mode != SPARSE_AUTO)); return true; } diff --git a/src/copy.h b/src/copy.h index 8e0b408..ddf4f4e 100644 --- a/src/copy.h +++ b/src/copy.h @@ -219,6 +219,9 @@ struct cp_options such a symlink) and returns false. */ bool open_dangling_dest_symlink
Re: BTRFS file clone support for cp
Jim Meyering j...@meyering.net writes: I am now convinced that cp's new behavior belongs on a separate option, --reflink (i.e., it should not be the default). Giuseppe, do you feel like adding that option and adjusting your test accordingly? sure. Things to adjust, other than copy.c and the test: NEWS cp --help doc/coreutils.texi For now, let's continue to use the ioctl, but eventually we'll use the reflinkat syscall. Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: BTRFS file clone support for cp
Jim Meyering j...@meyering.net writes: I am now convinced that cp's new behavior belongs on a separate option, --reflink (i.e., it should not be the default). Giuseppe, do you feel like adding that option and adjusting your test accordingly? I attached two separate patches, --reflink option and file-clone test. Last versions of btrfs have a bug (I asked on #btrfs and they confirmed it), btrfs doesn't use correctly all the free space available. In fact I get ENOSPC while in reality only 54% is used. Probably it is better to postpone the second patch inclusion after the bug is fixed. Another note, I changed this line in the NEWS file: - when both the source and destination are on the same btrfs partition. considering that BTRFS supports multiple devices I am not convinced that it is always true, I guess source and destination could be on different partitions, though I couldn't find a clear answer on the btrfs wiki to this question. Any comment? Thanks, Giuseppe From d110badaf7583acf957477bc7eda2e212b404343 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 1 Aug 2009 19:36:48 +0200 Subject: [PATCH 1/2] cp: accept the --reflink option * NEWS: Mention it. * doc/coreutils.texi: Likewise. * src/copy.h (struct cp_options): New member reflink. * src/copy.c (usage): Likewise. (copy_reg): If reflink is true try to clone the file. (main): Check for --reflink. (cp_option_init): By default set reflink to false. --- NEWS |4 ++-- doc/coreutils.texi |9 + src/copy.c |5 +++-- src/copy.h |3 +++ src/cp.c | 10 +- 5 files changed, 26 insertions(+), 5 deletions(-) diff --git a/NEWS b/NEWS index 80c60e2..8d6d7a6 100644 --- a/NEWS +++ b/NEWS @@ -35,8 +35,8 @@ GNU coreutils NEWS-*- outline -*- chroot now accepts the options --userspec and --groups. - cp, install, mv: take advantage of btrfs' O(1) copy-on-write feature - when both the source and destination are on the same btrfs partition. + cp accepts a new option, --reflink: attempt a copy-on-write (COW) + when the file system supports it. sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. diff --git a/doc/coreutils.texi b/doc/coreutils.texi index acec76e..03c9eb7 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -7543,6 +7543,15 @@ Also, it is not portable to use @option{-R} to copy symbolic links unless you also specify @option{-P}, as @acronym{POSIX} allows implementations that dereference symbolic links by default. +...@item --reflink +...@opindex --reflink +Attempt a O(1) copy-on-write (COW) when the underlying file system +supports this operation instead of really copying the file. +The source and destination files share the same disk data blocks until +they are equal. Changes done in a file are not visible in the other +one because shared blocks will be duplicated before these changes are +stored. + @item --remove-destination @opindex --remove-destination Remove each existing destination file before attempting to open it diff --git a/src/copy.c b/src/copy.c index bbed336..02d36f3 100644 --- a/src/copy.c +++ b/src/copy.c @@ -610,10 +610,11 @@ copy_reg (char const *src_name, char const *dst_name, goto close_src_and_dst_desc; } - /* If --sparse=auto is in effect, attempt a btrfs clone operation. + /* Attempt a clone operation. It is possible only when --sparse=auto + is in effect. If the operation is not supported or it fails then copy the file in the usual way. */ - bool copied = (x-sparse_mode == SPARSE_AUTO + bool copied = (x-reflink x-sparse_mode == SPARSE_AUTO clone_file (dest_desc, source_desc) == 0); if (!copied) diff --git a/src/copy.h b/src/copy.h index 8e0b408..ddf4f4e 100644 --- a/src/copy.h +++ b/src/copy.h @@ -219,6 +219,9 @@ struct cp_options such a symlink) and returns false. */ bool open_dangling_dest_symlink; + /* If true, attempt to clone the file instead of copying it. */ + bool reflink; + /* This is a set of destination name/inode/dev triples. Each such triple represents a file we have created corresponding to a source file name that was specified on the command line. Use it to avoid clobbering diff --git a/src/cp.c b/src/cp.c index 8785076..63c07d4 100644 --- a/src/cp.c +++ b/src/cp.c @@ -78,7 +78,8 @@ enum PRESERVE_ATTRIBUTES_OPTION, SPARSE_OPTION, STRIP_TRAILING_SLASHES_OPTION, - UNLINK_DEST_BEFORE_OPENING + UNLINK_DEST_BEFORE_OPENING, + REFLINK_OPTION }; /* True if the kernel is SELinux enabled. */ @@ -121,6 +122,7 @@ static struct option const long_opts[] = {recursive, no_argument, NULL, 'R'}, {remove-destination, no_argument, NULL, UNLINK_DEST_BEFORE_OPENING}, {sparse, required_argument, NULL, SPARSE_OPTION}, + {reflink, no_argument, NULL, REFLINK_OPTION
Re: BTRFS file clone support for cp
Pádraig Brady p...@draigbrady.com writes: +#expensive_ That comment is just for testing I presume? Note you can run a single expensive test like: (cd tests make check TESTS=cp/file-clone VERBOSE=yes RUN_EXPENSIVE_TESTS=yes) sorry, yes I commented it out only for testing purpose. If you think the rest is fine and want to push it, can you please amend my commit? Thanks, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [RFC] fallocate utility
Hello, Pádraig Brady p...@draigbrady.com writes: Tilman Schmidt wrote: Pádraig Brady schrieb: I don't see a problem in extending the meaning of the truncate command. Now truncate isn't the best name for the command but that name already existed in BSD and so I thought it best to align with that. So what about also having an fallocate command in coreutils? Well it would benefit from all the existing options of the truncate command, I.E. would share most of the code, so I'm not convinced. Why not make it, in the best Unix tradition, a single executable whose action depends on the name it is run as? Hmm. Good idea. There is precedent for that already in coreutils. What do you think about having two separate executables? Considering fallocate and truncate will share almost all code, these differences can be separed at compilation-time. It seems that the same approach is already used by md5sum and shaXXXsum. IMHO, it is a bit cleaner than depend from the argv[0] value at run-time (Tilman, is it what you meant?). Cheers, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: BTRFS file clone support for cp
Hi Pádraig, thanks for the comments. Pádraig Brady p...@draigbrady.com writes: # 300MB seems to be the minimum size for a btrfs with default parameters. Actually, it seems the minimum space required is 256MB. Using a 255MB image I get: device btrfs.img is too small (must be at least 256 MB) # FIXME: use `truncate --allocate` when it becomes available, which # may allow unmarking this as an expensive test. Are you sure that this feature will make the test less expensive? Still the test files must be written there, so in the best case (considering the fallocate done in 0s) only the dd cost will be saved but still it looks like an expensive test. In the version I attached, I am using a sparse file (truncate --size) and it seems to work fine. Is it correct or am I missing something? I haven't looked yet but probably there are other tests that can take advantage of sparse files instead of using dd. I am also considering the Jim's note doing the umount in the cleanup_ function. Cheers, Giuseppe From 7add4b337b7db0a63bca0dd0fe0f146f175163f8 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Wed, 29 Jul 2009 20:31:20 +0200 Subject: [PATCH] tests: add a test for btrfs' copy-on-write file clone operation * tests/Makefile.am: Consider the new test. * tests/cp/file-clone: New file. --- tests/Makefile.am |1 + tests/cp/file-clone | 58 +++ 2 files changed, 59 insertions(+), 0 deletions(-) create mode 100755 tests/cp/file-clone diff --git a/tests/Makefile.am b/tests/Makefile.am index 59737a0..9841aa3 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -20,6 +20,7 @@ EXTRA_DIST = \ root_tests = \ chown/basic \ + cp/file-clone\ cp/cp-a-selinux \ cp/preserve-gid \ cp/special-bits \ diff --git a/tests/cp/file-clone b/tests/cp/file-clone new file mode 100755 index 000..c65b9cb --- /dev/null +++ b/tests/cp/file-clone @@ -0,0 +1,58 @@ +#!/bin/sh +# Make sure file-clone on a btrfs file system works properly. + +# Copyright (C) 2009 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see http://www.gnu.org/licenses/. + + +if test $VERBOSE = yes; then + set -x + cp --version +fi + +. $srcdir/test-lib.sh + +require_root_ +require_sparse_support_ +#expensive_ + +cleanup_(){ umount btrfs; } + +fail=0 + +mkfs.btrfs --version || skip_test_ btrfs userland tools not installed + +# 256MB seems to be the minimum size for a btrfs with default parameters. +truncate --size=256M btrfs.img || framework_failure + +mkfs.btrfs btrfs.img || framework_failure + +mkdir btrfs || framework_failure + +mount -t btrfs -o loop btrfs.img btrfs || framework_failure + +dd bs=1M count=200 if=/dev/zero of=btrfs/alloc.test || framework_failure + +# If the file is cloned, only additional space for metadata is required. +# Two 200MB files can be present even if the total file system space is 256MB. +cp btrfs/alloc.test btrfs/clone.test || fail=1 +rm btrfs/clone.test + +# When --sparse={always,never} is used, the file is copied without any cloning. +# Use --sparse=never to be sure the file is copied without holes and it is not +# possible since there is not enough free space. +cp --sparse=never btrfs/alloc.test btrfs/clone.test fail=1 + +Exit $fail -- 1.6.3.3 ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: BTRFS file clone support for cp
Hi Pádraig, Pádraig Brady p...@draigbrady.com writes: How different exactly? OK I tried this myself on F11 with inconclusive results. I can't replicate it now, all tests I am doing report that blocks used before and after the clone are the same. Probably yesterday the difference I noticed was in reality the original file flushed to the disk. The above suggests that the clone does actually allocate space but btrfs isn't reporting it through statvfs correctly? The same message appeared here too some days ago, though I cloned only few Kb files, not much to fill the entire partition. If the clone does allocate space, then how can one clone without allocation which could be very useful for snapshotting for example? I don't know if snapshotting is handled in the same way as a clone, but in this case it seems more obvious to me that no additional space should be reported. Also I tried the above twice and both times got: http://www.kerneloops.org/submitresult.php?number=578993 I didn't get these errors. I am using the btrfs git version. Regards, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: BTRFS file clone support for cp
Jim Meyering j...@meyering.net writes: Another possible issue with this I can think of is depending on the modification pattern of the COW files, the modification processes could fragment the file or more seriously be given ENOSPC errors. I hope btrfs takes care of this behind the scene. How does the clone work wrt to space consumed, a la df? If copying a 1GB file this way does not update usage stats to reflect the additional 1GB of space used, ... I tried to clone a big file and df reported a different used blocks stat that it was before the clone operation. Cheers, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: tail -f: --pid *and* inotify
Jim Meyering j...@meyering.net writes: A couple of points: Please move these declarations down into the scope where they are used. It would be better not to perform the kill test after every single select call when actively tailing files. Considering how --pid is documented (in the texinfo manual), it should be ok to call kill only when select times out (returns 0). Of course, that means a constantly-running tail -f will never check for process death, but that is consistent with the documentation. Thanks for the advises. I cleaned it following them. I was checking for the pid every time exactly to avoid the problem that it is never checked and can run indefinitely even if the process is already not running. Maybe we can add a real clock controlling how much time passed since last check or even simpler, a counter like don't exit without check more than N consecutive times. What do you think? Regards, Giuseppe From 65f2737fa6e2519fbccbad7d285ca8923a893057 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sun, 26 Jul 2009 13:22:57 +0200 Subject: [PATCH] tail: use the inotify backend when a PID is specified. * src/tail.c (tail_forever_inotify): When a PID is specified avoid read to sleep indefinitely. Check if the specified PID if it is still alive, otherwise exit from tail. (main): Use the new tail_forever_inotify interface. --- src/tail.c | 52 +--- 1 files changed, 41 insertions(+), 11 deletions(-) diff --git a/src/tail.c b/src/tail.c index fd44e22..1474b06 100644 --- a/src/tail.c +++ b/src/tail.c @@ -50,6 +50,8 @@ #if HAVE_INOTIFY # include hash.h # include sys/inotify.h +/* `select' is used by tail_forever_inotify. */ +# include sys/select.h #endif /* The official name of this program (e.g., no `g' prefix). */ @@ -1162,7 +1164,8 @@ wd_comparator (const void *e1, const void *e2) Check modifications using the inotify events system. */ static void -tail_forever_inotify (int wd, struct File_spec *f, size_t n_files) +tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, int pid, + double sleep_interval) { size_t i; unsigned int max_realloc = 3; @@ -1253,6 +1256,36 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files) struct inotify_event *ev; + /* If a process is watched be sure that read from wd will not block + indefinetely. */ + if (pid) +{ + fd_set rfd; + struct timeval select_timeout; + int n_descriptors; + + FD_ZERO (rfd); + FD_SET (wd, rfd); + + select_timeout.tv_sec = (time_t) sleep_interval; + select_timeout.tv_usec = 10 * (sleep_interval + - select_timeout.tv_sec); + + n_descriptors = select (wd + 1, rfd, NULL, NULL, select_timeout); + + if (n_descriptors == -1) +error (EXIT_FAILURE, errno, _(error monitoring inotify event)); + + if (n_descriptors == 0) +{ + /* Check if the process we are monitoring is still alive. */ + if (kill (pid, 0) != 0 errno != EPERM) +break; + + continue; +} +} + if (len = evbuf_off) { len = safe_read (wd, evbuf, evlen); @@ -1940,18 +1973,15 @@ main (int argc, char **argv) if (forever) { #if HAVE_INOTIFY - if (pid == 0) + int wd = inotify_init (); + if (wd 0) +error (0, errno, _(inotify cannot be used, reverting to polling)); + else { - int wd = inotify_init (); - if (wd 0) -error (0, errno, _(inotify cannot be used, reverting to polling)); - else -{ - tail_forever_inotify (wd, F, n_files); + tail_forever_inotify (wd, F, n_files, pid, sleep_interval); - /* The only way the above returns is upon failure. */ - exit (EXIT_FAILURE); -} + /* The only way the above returns is upon failure. */ + exit (EXIT_FAILURE); } #endif tail_forever (F, n_files, sleep_interval); -- 1.6.3.3 ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: BTRFS file clone support for cp
Jim Meyering j...@meyering.net writes: Adding this optimization should not change the meaning of --sparse=always. So do you want to use it only when --sparse=auto is used? Precisely. I cleaned the patch a bit, the clone operation is done only when --sparse=auto is used. Regards, Giuseppe From 747c96980acc25220cc436210403cdcaed6239c9 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 25 Jul 2009 16:35:27 +0200 Subject: [PATCH] cp: support the btrfs file system clone operation. * src/copy.c(copy_reg): Use the btrfs clone operation if it is possible. --- src/copy.c | 15 +++ 1 files changed, 15 insertions(+), 0 deletions(-) diff --git a/src/copy.c b/src/copy.c index 4c8c432..e0ddec5 100644 --- a/src/copy.c +++ b/src/copy.c @@ -61,6 +61,11 @@ # include verror.h #endif +#ifdef __linux__ +# include sys/ioctl.h +# define BTRFS_IOC_CLONE 1074041865 +#endif + #ifndef HAVE_FCHOWN # define HAVE_FCHOWN false # define fchown(fd, uid, gid) (-1) @@ -444,6 +449,7 @@ copy_reg (char const *src_name, char const *dst_name, struct stat sb; struct stat src_open_sb; bool return_val = true; + bool copied = false; source_desc = open (src_name, (O_RDONLY | O_BINARY @@ -589,6 +595,15 @@ copy_reg (char const *src_name, char const *dst_name, goto close_src_and_dst_desc; } +#ifdef __linux__ + /* Try a btrfs clone operation. If the operation is not supported + or it fails then copy the file in the usual way. */ + if ((x-sparse_mode == SPARSE_AUTO) !ioctl (dest_desc, BTRFS_IOC_CLONE, + source_desc)) +copied = true; +#endif + + if (!copied) { typedef uintptr_t word; off_t n_read_total = 0; -- 1.6.3.3 ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: tail -f: --pid *and* inotify
Hi Jim, Jim Meyering j...@meyering.net writes: Hi Giuseppe, I've realized that there is a good way to remove the ugly exclusion that currently disables inotify-based tail -f when --pid is specified. Instead of the existing while-1-loop around code that reads the inotify FD, we can use a loop that polls that single FD with a 1-2-second timeout. Then, it can periodically run the usual PID-alive check. Are you interested in doing that? Between this, the ls -1U bug, and the btrfs O(1) copy, I'm now expecting to defer the release by at least a week. I modified tail as you suggested. Regards, Giuseppe From 862c9d934dc2ee188e9fe29985e463e2ec4d16ca Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sun, 26 Jul 2009 13:22:57 +0200 Subject: [PATCH] tail: use the inotify backend when a PID is specified. * src/tail.c (tail_forever_inotify): When a PID is specified avoid read to sleep indefinitely. Check if the specified PID if it is still alive, otherwise exit from tail. (main): Use the new tail_forever_inotify interface. --- src/tail.c | 49 ++--- 1 files changed, 38 insertions(+), 11 deletions(-) diff --git a/src/tail.c b/src/tail.c index fd44e22..74bda55 100644 --- a/src/tail.c +++ b/src/tail.c @@ -50,6 +50,8 @@ #if HAVE_INOTIFY # include hash.h # include sys/inotify.h +/* `select' is used by tail_forever_inotify. */ +# include sys/select.h #endif /* The official name of this program (e.g., no `g' prefix). */ @@ -1162,7 +1164,8 @@ wd_comparator (const void *e1, const void *e2) Check modifications using the inotify events system. */ static void -tail_forever_inotify (int wd, struct File_spec *f, size_t n_files) +tail_forever_inotify (int wd, struct File_spec *f, size_t n_files, int pid, + double sleep_interval) { size_t i; unsigned int max_realloc = 3; @@ -1174,6 +1177,8 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files) char *evbuf; size_t evbuf_off = 0; size_t len = 0; + fd_set rfd; + struct timeval select_timeout; wd_table = hash_initialize (n_files, NULL, wd_hasher, wd_comparator, NULL); if (! wd_table) @@ -1253,6 +1258,31 @@ tail_forever_inotify (int wd, struct File_spec *f, size_t n_files) struct inotify_event *ev; + /* If a process is watched be sure that read from wd will not block + indefinetely. */ + if (pid) +{ + int n_descriptors; + FD_ZERO (rfd); + FD_SET (wd, rfd); + + select_timeout.tv_sec = (time_t) sleep_interval; + select_timeout.tv_usec = 10 * (sleep_interval + - select_timeout.tv_sec); + + n_descriptors = select (wd + 1, rfd, NULL, NULL, select_timeout); + + if (n_descriptors == -1) +error (EXIT_FAILURE, errno, _(error monitoring inotify event)); + + /* Check if the process we are monitoring is still alive. */ + if (kill (pid, 0) != 0 errno != EPERM) +break; + + if (n_descriptors == 0) +continue; +} + if (len = evbuf_off) { len = safe_read (wd, evbuf, evlen); @@ -1940,18 +1970,15 @@ main (int argc, char **argv) if (forever) { #if HAVE_INOTIFY - if (pid == 0) + int wd = inotify_init (); + if (wd 0) +error (0, errno, _(inotify cannot be used, reverting to polling)); + else { - int wd = inotify_init (); - if (wd 0) -error (0, errno, _(inotify cannot be used, reverting to polling)); - else -{ - tail_forever_inotify (wd, F, n_files); + tail_forever_inotify (wd, F, n_files, pid, sleep_interval); - /* The only way the above returns is upon failure. */ - exit (EXIT_FAILURE); -} + /* The only way the above returns is upon failure. */ + exit (EXIT_FAILURE); } #endif tail_forever (F, n_files, sleep_interval); -- 1.6.3.3 ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
BTRFS file clone support for cp
Hello, the BTRFS file system, avaiable on Linux since its 2.6.29 version, supports file cloning. This simple patch adds the support for this feature to the cp utility. Is there an easy and quick way to determine which file system is used by a file? Probably it would be safer to add a guard around the ioctl call. This simple test shows the huge performance boost copying a file on an BTRFS partition. $ du -b foo 51200 foo $ env time cp foo bar 0.00user 1.75system 0:34.97elapsed 5%CPU (0avgtext+0avgdata 0maxresident)k 1000192inputs+124outputs (2major+273minor)pagefaults 0swaps $ env time ./cp foo baz 0.00user 0.00system 0:00.40elapsed 0%CPU (0avgtext+0avgdata 0maxresident)k 5744inputs+32outputs (1major+253minor)pagefaults 0swaps Cheers, Giuseppe From deea0ee0c2a521aae5a89d8613f937707d8f0e7b Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 25 Jul 2009 16:35:27 +0200 Subject: [PATCH] cp: support the btrfs file system clone operation. * src/copy.c(copy_reg): Use the btrfs clone operation if it is possible. --- src/copy.c | 13 + 1 files changed, 13 insertions(+), 0 deletions(-) diff --git a/src/copy.c b/src/copy.c index 4c8c432..7779df4 100644 --- a/src/copy.c +++ b/src/copy.c @@ -61,6 +61,10 @@ # include verror.h #endif +#ifdef __linux__ +# include sys/ioctl.h +#endif + #ifndef HAVE_FCHOWN # define HAVE_FCHOWN false # define fchown(fd, uid, gid) (-1) @@ -444,6 +448,7 @@ copy_reg (char const *src_name, char const *dst_name, struct stat sb; struct stat src_open_sb; bool return_val = true; + bool copied = false; source_desc = open (src_name, (O_RDONLY | O_BINARY @@ -589,6 +594,14 @@ copy_reg (char const *src_name, char const *dst_name, goto close_src_and_dst_desc; } +#ifdef __linux__ + /* Try a btrfs clone operation. If the operation is not supported + or it fails then copy the file in the usual way. */ + if (!ioctl (dest_desc, 1074041865, source_desc)) +copied = true; +#endif + + if (!copied) { typedef uintptr_t word; off_t n_read_total = 0; -- 1.6.3.3 ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: BTRFS file clone support for cp
Jim Meyering j...@meyering.net writes: Doesn't that constant, 1074041865, have a symbolic name? Maybe BTRFS_IOC_CLONE? Yes, it is exactly BTRFS_IOC_CLONE and it is defined in the fs/btrfs/ioctl.h file. Is there an easy and quick way to determine which file system is used by a file? Probably it would be safer to add a guard around the ioctl call. Before thinking about that, I'd like to know the approximate cost of the failing ioctl, e.g., on a kernel with btrfs support and on one without, in case that makes a difference. I.e., what impact would it have if left unprotected? Does it make a measurable difference when copying 20K empty files on a tmpfs file system? If necessary, we can avoid almost all of the per-file ioctl calls via a map that associates each distinct device number encountered with a boolean is btrfs file system. gnulib's fts does something similar, but its goal is to determine whether a different FS-specific performance optimization is likely to help. I didn't notice any difference using a patched and an un-patched cp on tmpfs. To go more in detail I used this simple test: int src = open (foo, O_RDONLY); int dest = open (bar, O_WRONLY | O_CREAT, 0777); assert (src); assert (dest); for (i = 0; i ITERATIONS; i++) if (!ioctl (dest, 1074041865, src)) error (1, 0, cannot be successful!); close (src); close (dest); On BTRFS, it raises correctly the cannot be successful! message. Either on EXT3 and TMPFS the result is similar. With 10 iterations: 0.01user 0.00system 0:00.01elapsed 118%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+120minor)pagefaults 0swaps With 1000 iterations: 0.68user 0.82system 0:01.50elapsed 99%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+119minor)pagefaults 0swaps Regards, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: BTRFS file clone support for cp
Hi Jim, Jim Meyering j...@meyering.net writes: However, but what about cp's --sparse option? btrfs supports sparse files, so this new code will have to honor that. The trouble is that there is currently no option to say preserve precisely and only whatever holes are present in src, which is the behavior I would expect from that ioctl. Yes, sparse files are maintained. $ dd if=/dev/zero of=a bs=1 seek=200M count=0 $ stat a File: `a' Size: 209715200 Blocks: 0 IO Block: 4096 regular file ... $ ./cp a b $ stat b File: `b' Size: 209715200 Blocks: 0 IO Block: 4096 regular file ... The special case to handle differently is --sparse=never. I am inclined to extend the definition of --sparse=auto (the default) to mean make as faithful a copy as possible (btrfs clone), and failing that, use the sparse-preserving heuristic. Then we can use the new ioctl in the default case. The down side of such a move is that it would induce a subtle change in behavior: whereas before, a partially-sparse file would be copied (assuming btrfs FS src and dest) to a fully-sparse destination, now, you'll get a mirror image, partially-sparse copy. Right, it will behave differently on different file systems. What about --sparse=always? But seeing as how btrfs is so new, there is little legacy to worry about. And besides, for a command whose job it to copy, a feature to permit more faithful copying is hard to turn down or relegate to non-default status. Moreover the principal reason to use sparse files is to use less space on the file system, a cloned partially-sparse file on btrfs takes less space than a fully-sparse copied file. IMHO use the btrfs clone method is to prefer in both cases: --sparse=auto and --sparse=always. I think that in any case, considering a file system could not support sparse files, --sparse should be considered just a suggestion and not mandatory, and the final decision left to cp. Regards, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: two new chroot bugs
Hi Jim, Jim Meyering j...@meyering.net writes: From cfe8e7a0001a10d4deceb6065134fe8c402d0368 Mon Sep 17 00:00:00 2001 From: Jim Meyering meyer...@redhat.com Date: Thu, 28 May 2009 22:36:05 +0200 Subject: [PATCH 1/4] tests: use nobody as the default group name in chroot test the nobody group is not present on all systems, for example on Debian it is nogroup so IMHO it is better to find it at runtime using the nobody user's group. The second patch is just a small refactoring. Cheers, Giuseppe From a089ec8770afa349f1e0b6e9d090d739f0afe6dd Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 4 Jul 2009 10:14:31 +0200 Subject: [PATCH 1/2] tests: use the nobody user's group as the default group id test * tests/chroot/credentials: Use the group id, not its name. * tests/test-lib.sh (NON_ROOT_GROUP): Use the nobody user's group in place of nogroup. --- tests/chroot/credentials |2 +- tests/test-lib.sh|2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/chroot/credentials b/tests/chroot/credentials index 58c098f..f200f14 100755 --- a/tests/chroot/credentials +++ b/tests/chroot/credentials @@ -37,7 +37,7 @@ test $(chroot --userspec=$NON_ROOT_USERNAME:$NON_ROOT_GROUP / whoami) != root || fail=1 # Verify that there are no additional groups. -test $(chroot --userspec=$NON_ROOT_USERNAME:$NON_ROOT_GROUP --groups=$NON_ROOT_GROUP / id -nG)\ +test $(chroot --userspec=$NON_ROOT_USERNAME:$NON_ROOT_GROUP --groups=$NON_ROOT_GROUP / id -G)\ = $NON_ROOT_GROUP || fail=1 # Verify that when specifying only the user name we get the current diff --git a/tests/test-lib.sh b/tests/test-lib.sh index d99e3a9..9797b55 100644 --- a/tests/test-lib.sh +++ b/tests/test-lib.sh @@ -209,7 +209,7 @@ require_root_() { uid_is_privileged_ || skip_test_ must be run as root NON_ROOT_USERNAME=${NON_ROOT_USERNAME=nobody} - NON_ROOT_GROUP=${NON_ROOT_GROUP=nobody} + NON_ROOT_GROUP=${NON_ROOT_GROUP=$(id -g $NON_ROOT_USERNAME)} } skip_if_root_() { uid_is_privileged_ skip_test_ must be run as non-root; } -- 1.6.3.3 From e892d0d2155dab81ded428e0c0d04d91febe9b3c Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 4 Jul 2009 10:25:03 +0200 Subject: [PATCH 2/2] tests: refactor code to use require_proc_pid_status_ * tests/tail-2/tail-n0f: Read the process status using the test-lib.sh require_proc_pid_status_ function. --- tests/tail-2/tail-n0f |4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/tests/tail-2/tail-n0f b/tests/tail-2/tail-n0f index 322fddc..fce7ed1 100755 --- a/tests/tail-2/tail-n0f +++ b/tests/tail-2/tail-n0f @@ -40,9 +40,7 @@ for file in empty nonempty; do tail --sleep=4 -${c_or_n} 0 -f $file pid=$! sleep .5 -set _ `sed -n '/^State:[]*\([^ ]\)/s//\1/p' /proc/$pid/status` -shift # Remove the leading `_'. -state=$1 +state=$(get_process_status_ $pid) case $state in S*) ;; *) echo $0: process in unexpected state: $state 12; fail=1 ;; -- 1.6.3.3 ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] tail: add comments noting potential inotify-related problems
Hi Jim, Jim Meyering j...@meyering.net writes: I don't (yet?) see why a tree would be the preferred data structure. ... Because inotify doesn't add recursive watchers. For example, you want to follow by name `/var/foo/bar', and `/var/foo' doesn't exist yet. To catch the event for the `bar' file creation, you will need to register a watcher on the /var directory and when the foo subdirectory is created finally watch the file. Since different files in different directories can be watched, I thought to use a tree to propagate events to children nodes. Though, if you prefer a hash table, approximately the same mechanism can be adapted without problems. Regards, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] tail: add comments noting potential inotify-related problems
Hi Jim, Jim Meyering j...@meyering.net writes: I'm not convinced that adding a lot of new code just to make tail -f handle a far-fetched case like that is worthwhile. But that's just my opinion, and if someone can present a use-case that makes it seem the additional code would be put to good use, I'll keep an open mind ;) So you don't want tail -F to handle the case that the parent directory is removed and after re-created? tail will not open again the watched file in this case, or in the case the parent directory was created after tail initialization. As any parent directory up to '/' can be removed, if we decide to handle the case the parent can be removed, it is not a bad idea to do in a more generic way. I think the difference, in lines of code, between handle just the parent or the full hierarchy is not much, it is just a generalization. On the other hand, as user I rarely use -F and -f is enough, I really don't remember any case where the parent directory could be removed or renamed and don't handle the parent directory will not be a real problem. Cheers, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] tail: add comments noting potential inotify-related problems
Hi Jim, I took a look at the problems you reported. The first one is fixed with the first attached patch. I have tested it under Linux 2.6.18-6-xen-686. tail -F works until the parent directory is not removed and it is very related to the second problem you showed. At this point I think the best way is to find a solution to both, using a tree instead of a hash map. What do you think? Thanks, Giuseppe Jim Meyering j...@meyering.net writes: Hi Giuseppe, I noticed two potential problems. The first appears to affects only kernels 2.6.13..2.6.20. The second one doesn't have to be fixed before the upcoming release. From 8fbe1d2d1f666a0428f41d03831e18d4d1b56e89 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Thu, 2 Jul 2009 23:38:46 +0200 Subject: [PATCH 1/2] tail: avoid a problem for kernels prior to 2.6.21 * src/tail.c (tail_forever_inotify): Handle the special case that the inotify watcher returns zero bytes. --- src/tail.c |7 --- 1 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/tail.c b/src/tail.c index 89c43b8..a99091a 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1259,8 +1259,9 @@ tail_forever_inotify (int wd, struct File_spec *f, int n_files) evbuf_off = 0; /* For kernels prior to 2.6.21, read returns 0 when the buffer - is too small. FIXME: handle that. */ - if (len == SAFE_READ_ERROR errno == EINVAL max_realloc--) + is too small. */ + if ((len == 0 || (len == SAFE_READ_ERROR errno == EINVAL)) + max_realloc--) { len = 0; evlen *= 2; @@ -1268,7 +1269,7 @@ tail_forever_inotify (int wd, struct File_spec *f, int n_files) continue; } - if (len == SAFE_READ_ERROR) + if (len == SAFE_READ_ERROR || len == 0) error (EXIT_FAILURE, errno, _(error reading inotify event)); } -- 1.6.3.3 From bfbd6a82055326ea45882664890a5e77aa3bb2a1 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Thu, 2 Jul 2009 23:40:40 +0200 Subject: [PATCH 2/2] tail: fixed a test case * tests/tail-2/wait: Be sure the `not_accessible' file is really not accessible before try to tail -f it. --- tests/tail-2/wait | 23 +-- 1 files changed, 13 insertions(+), 10 deletions(-) diff --git a/tests/tail-2/wait b/tests/tail-2/wait index 7eee8b1..8f2f610 100755 --- a/tests/tail-2/wait +++ b/tests/tail-2/wait @@ -45,17 +45,20 @@ if test -n $state; then kill $pid fi -tail -s0.1 -f not_accessible -pid=$! -sleep .5 -state=$(get_process_status_ $pid) - -if test -n $state; then - case $state in -S*) echo $0: process still active 12; fail=1 ;; -*) ;; - esac - kill $pid +# Check if the file is really not accessible before use it. +if ! cat not_accessible; then +tail -s0.1 -f not_accessible +pid=$! +sleep .5 +state=$(get_process_status_ $pid) + +if test -n $state; then +case $state in +S*) echo $0: process still active 12; fail=1 ;; +*) ;; +esac +kill $pid +fi fi (tail -s0.1 -f here 2tail.err) -- 1.6.3.3 ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: Savannah report new bug link not obvious
Is https://savannah.gnu.org/bugs/?func=additemgroup=coreutils the page you are looking for? Giuseppe jida...@jidanni.org writes: Logged in to https://savannah.gnu.org/ the user cannot find the link to report a new bug. ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: inotify back end for tail -f on linux
Jim Meyering j...@meyering.net writes: I found an undesired change in functionality. inotify-enabled tail -F sometimes follows the file descriptor of a renamed file, which is contrary to the semantics of --follow=name. The non-inotify version follows the name, not the file descriptor in that case. Oops, I was using a different events mask for re-added files. I added your test case to the tests suite now. Thanks, Giuseppe This is the new version: From 106b1d32a6956f902109bffdf87e7897210c0e04 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 2 Jun 2009 08:28:23 +0200 Subject: [PATCH] tail: Use inotify if it is available. * NEWS: Document the new feature. * m4/jm-macros.m4: Check if inotify is present. * src/tail.c (tail_forever_inotify): New function. (main): Use the inotify-based function, if possible. * tests/Makefile.am: Add new tests for tail. * tests/test-lib.sh (require_proc_pid_status_, get_process_status_): New functions. * tests/tail-2/pid: New file. * tests/tail-2/wait: New file. * tests/tail-2/tail-n0f: Refactor code into the test-lib.sh require_proc_pid_status_ function. --- NEWS |2 + m4/jm-macros.m4 |5 + src/tail.c| 275 +++-- tests/Makefile.am |2 + tests/tail-2/pid | 68 tests/tail-2/tail-n0f |9 +-- tests/tail-2/wait | 131 +++ tests/test-lib.sh | 16 +++ 8 files changed, 493 insertions(+), 15 deletions(-) create mode 100755 tests/tail-2/pid create mode 100755 tests/tail-2/wait diff --git a/NEWS b/NEWS index 29b09a0..cc61bdc 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ GNU coreutils NEWS-*- outline -*- sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. + tail uses inotify when possible. + * Noteworthy changes in release 7.4 (2009-05-07) [stable] diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4 index 90c55bf..f14d6a3 100644 --- a/m4/jm-macros.m4 +++ b/m4/jm-macros.m4 @@ -52,6 +52,11 @@ AC_DEFUN([coreutils_MACROS], # Used by sort.c. AC_CHECK_FUNCS_ONCE([nl_langinfo]) + # Used by tail.c. + AC_CHECK_FUNCS([inotify_init], +[AC_DEFINE([HAVE_INOTIFY], [1], + [Define to 1 if you have usable inotify support.])]) + AC_CHECK_FUNCS_ONCE( \ endgrent \ endpwent \ diff --git a/src/tail.c b/src/tail.c index 9d416e1..059ee82 100644 --- a/src/tail.c +++ b/src/tail.c @@ -21,7 +21,8 @@ Original version by Paul Rubin p...@ocf.berkeley.edu. Extensions by David MacKenzie d...@gnu.ai.mit.edu. - tail -f for multiple files by Ian Lance Taylor i...@airs.com. */ + tail -f for multiple files by Ian Lance Taylor i...@airs.com. + inotify back-end by Giuseppe Scrivano gscriv...@gnu.org. */ #include config.h @@ -46,6 +47,11 @@ #include xstrtol.h #include xstrtod.h +#if HAVE_INOTIFY +# include hash.h +# include sys/inotify.h +#endif + /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME tail @@ -125,8 +131,26 @@ struct File_spec /* The value of errno seen last time we checked this file. */ int errnum; +#if HAVE_INOTIFY + /* The watch descriptor used by inotify. */ + int wd; + + /* The parent directory watch descriptor. It is used only + * when Follow_name is used. */ + int parent_wd; + + /* Offset in NAME of the basename part. */ + size_t basename_start; +#endif }; +#if HAVE_INOTIFY +/* The events mask used with inotify on files. This mask is not used on + directories. */ +const uint32_t inotify_wd_mask = (IN_MODIFY | IN_ATTRIB | IN_DELETE_SELF + | IN_MOVE_SELF); +#endif + /* Keep trying to open a file even if it is inaccessible when tail starts or if it becomes inaccessible later -- useful only with -f. */ static bool reopen_inaccessible_files; @@ -964,7 +988,7 @@ any_live_files (const struct File_spec *f, int n_files) return false; } -/* Tail NFILES files forever, or until killed. +/* Tail N_FILES files forever, or until killed. The pertinent information for each file is stored in an entry of F. Loop over each of them, doing an fstat to see if they have changed size, and an occasional open/fstat to see if any dev/ino pair has changed. @@ -972,22 +996,22 @@ any_live_files (const struct File_spec *f, int n_files) while and try again. Continue until the user interrupts us. */ static void -tail_forever (struct File_spec *f, int nfiles, double sleep_interval) +tail_forever (struct File_spec *f, int n_files, double sleep_interval) { /* Use blocking I/O as an optimization, when it's easy. */ bool blocking = (pid == 0 follow_mode == Follow_descriptor - nfiles == 1 ! S_ISREG (f[0].mode)); + n_files == 1 ! S_ISREG (f[0].mode)); int last; bool writer_is_dead = false; - last
Re: wc
Hello, if you need a specific wc output field, the words count in this case, you can use the command `wc -w FILE'. Is it what you want? Regards, Giuseppe Iram CHELLI iram.che...@loria.fr writes: Hello, i am using wc in shell scripts the exact command is: wc FILE | cut -d -f 2 it usually works but sometimes wc outputs the result in a different formatting, that is, I have to do a cut -d -f 3 to get the wc. This is not very convenient that the wc program shall not return results in a fixed formatting. Regards, Iram ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: inotify back end for tail -f on linux
Jim, thank for the review. Jim Meyering j...@meyering.net writes: Perhaps prev_wd would be more accurate? I fixed it. The same name is used in `tail_forever', that is why I used it, should it be changed in `tail_forever' too? Another regression: touch k; chmod 0 k; tail -F k fails to open k (as it must), but even when I run chmod u+r k in another window, the inotify-based version of tail fails to open it. I fixed it. This version includes, among other things, a refactoring of the new tests as previously reported. Cheers, Giuseppe From 28cc1f8e2582adb269babd5a6371fc5797f9ba52 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 2 Jun 2009 08:28:23 +0200 Subject: [PATCH] tail: Use inotify if it is available. * NEWS: Document the new feature. * m4/jm-macros.m4: Check if inotify is present. * src/tail.c (tail_forever_inotify): New function. (main): Use the inotify-based function, if possible. * tests/Makefile.am: Add new tests for tail. * tests/test-lib.sh (require_proc_pid_status_, get_process_status_): New functions. * tests/tail-2/pid: New file. * tests/tail-2/wait: New file. * tests/tail-2/tail-n0f: Refactor code into the test-lib.sh require_proc_pid_status_ function. --- NEWS |2 + m4/jm-macros.m4 |5 + src/tail.c| 268 +++-- tests/Makefile.am |2 + tests/tail-2/pid | 68 + tests/tail-2/tail-n0f |9 +-- tests/tail-2/wait | 118 ++ tests/test-lib.sh | 16 +++ 8 files changed, 473 insertions(+), 15 deletions(-) create mode 100755 tests/tail-2/pid create mode 100755 tests/tail-2/wait diff --git a/NEWS b/NEWS index 29b09a0..cc61bdc 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ GNU coreutils NEWS-*- outline -*- sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. + tail uses inotify when possible. + * Noteworthy changes in release 7.4 (2009-05-07) [stable] diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4 index 90c55bf..f14d6a3 100644 --- a/m4/jm-macros.m4 +++ b/m4/jm-macros.m4 @@ -52,6 +52,11 @@ AC_DEFUN([coreutils_MACROS], # Used by sort.c. AC_CHECK_FUNCS_ONCE([nl_langinfo]) + # Used by tail.c. + AC_CHECK_FUNCS([inotify_init], +[AC_DEFINE([HAVE_INOTIFY], [1], + [Define to 1 if you have usable inotify support.])]) + AC_CHECK_FUNCS_ONCE( \ endgrent \ endpwent \ diff --git a/src/tail.c b/src/tail.c index 9d416e1..460e255 100644 --- a/src/tail.c +++ b/src/tail.c @@ -21,7 +21,8 @@ Original version by Paul Rubin p...@ocf.berkeley.edu. Extensions by David MacKenzie d...@gnu.ai.mit.edu. - tail -f for multiple files by Ian Lance Taylor i...@airs.com. */ + tail -f for multiple files by Ian Lance Taylor i...@airs.com. + inotify back-end by Giuseppe Scrivano gscriv...@gnu.org. */ #include config.h @@ -46,6 +47,11 @@ #include xstrtol.h #include xstrtod.h +#if HAVE_INOTIFY +# include hash.h +# include sys/inotify.h +#endif + /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME tail @@ -125,6 +131,17 @@ struct File_spec /* The value of errno seen last time we checked this file. */ int errnum; +#if HAVE_INOTIFY + /* The watch descriptor used by inotify. */ + int wd; + + /* The parent directory watch descriptor. It is used only + * when Follow_name is used. */ + int parent_wd; + + /* Offset in NAME of the basename part. */ + size_t basename_start; +#endif }; /* Keep trying to open a file even if it is inaccessible when tail starts @@ -964,7 +981,7 @@ any_live_files (const struct File_spec *f, int n_files) return false; } -/* Tail NFILES files forever, or until killed. +/* Tail N_FILES files forever, or until killed. The pertinent information for each file is stored in an entry of F. Loop over each of them, doing an fstat to see if they have changed size, and an occasional open/fstat to see if any dev/ino pair has changed. @@ -972,22 +989,22 @@ any_live_files (const struct File_spec *f, int n_files) while and try again. Continue until the user interrupts us. */ static void -tail_forever (struct File_spec *f, int nfiles, double sleep_interval) +tail_forever (struct File_spec *f, int n_files, double sleep_interval) { /* Use blocking I/O as an optimization, when it's easy. */ bool blocking = (pid == 0 follow_mode == Follow_descriptor - nfiles == 1 ! S_ISREG (f[0].mode)); + n_files == 1 ! S_ISREG (f[0].mode)); int last; bool writer_is_dead = false; - last = nfiles - 1; + last = n_files - 1; while (1) { int i; bool any_input = false; - for (i = 0; i nfiles; i++) + for (i = 0; i n_files; i++) { int fd; char const *name; @@ -1087,7 +1104,7
Re: inotify back end for tail -f on linux
Hello, Thanks for your fast review. Jim Meyering j...@meyering.net writes: Please rename to n_files, to be consistent with other variables in this file. Besides, it's slightly more readable. The same name is used in the tail_forever function, to keep consistency between tail_forever and tail_forever_inotify, I changed nfiles in tail_forever too. Typically we abbreviate with buf, not buff. How about evbuf and evbuf_off? Ok, done. Whoops. You forgot to detect insertion failure. I'm adding __attribute__ ((__warn_unused_result__)) to that function and a few others in gnulib's hash.h, so the compiler will detect this if it happens again. Added. I have a slight preference for the shorter #if ...: #if HAVE_INOTIFY I replaced all #ifdef HAVE_INOTIFY with the shorter form. For readability, I usually prefer not to use a single-line else block unless the then-block is also a one-liner. Please do this instead: if (wd 0) error (0, errno, _(inotify cannot be used, reverting to polling)); else { ... } Done. How about just this (like below)? touch here || framework_failure Each test is guaranteed to start in an empty directory. I removed all the unuseful additional checks to don't modify existing files, I didn't know that tests start in a clean temporary directory. Bonus points if you can test effectively (i.e., still avoid race conditions) without sleeping for long. I consider 4-5 seconds long. I couldn't remove `sleep's completely but I reduced their time. I also fixed some mistakes I previously did. No need to remove files. The entire temporary directory is removed automatically. Same below. As above, I didn't know it was a temporary directory. I removed these `rm's too. With these two new uses, the above test has been repeated enough times now that it deserves to be factored into a require_proc_pid_status_ function in test-lib.sh. [] The above 3 lines are repeated far too many times. Please define a function in test-lib.sh, and then do e.g., I refactored the code as you suggested, also I changed tail-2/tail-n0f to use this new function. - insufficient quoting; that will evoke a syntax error if $state is empty (btw, below, no quotes in case $state in... is fine) - use -n in place of ! -z; - I prefer to use test in place of [...] I changed it. +test $(wc -w tail.err) eq 0 || fail=1 This can be written more simply: test -s tail.err fail=1 Done. The new version includes all these changes. Cheers, Giuseppe From 53bf397a3d34ccd2186197926c63b089477e5a62 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 2 Jun 2009 08:28:23 +0200 Subject: [PATCH] tail: Use inotify if it is available. * NEWS: Document the new feature. * m4/jm-macros.m4: Check if inotify is present. * src/tail.c (tail_forever_inotify): New function. (main): Use the inotify-based function, if possible. * tests/Makefile.am: Added new tests for tail. * tests/test-lib.sh: The require_proc_pid_status_ and get_process_status_ functions were added. * tests/tail-2/pid: New file. * tests/tail-2/wait: New file. * tests/tail-2/tail-n0f: Refactored code into the test-lib.sh require_proc_pid_status_ function. --- NEWS |2 + m4/jm-macros.m4 |5 + src/tail.c| 265 +++-- tests/Makefile.am |2 + tests/tail-2/pid | 68 + tests/tail-2/tail-n0f |9 +-- tests/tail-2/wait | 118 ++ tests/test-lib.sh | 18 8 files changed, 472 insertions(+), 15 deletions(-) create mode 100755 tests/tail-2/pid create mode 100755 tests/tail-2/wait diff --git a/NEWS b/NEWS index 29b09a0..cc61bdc 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ GNU coreutils NEWS-*- outline -*- sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. + tail uses inotify when possible. + * Noteworthy changes in release 7.4 (2009-05-07) [stable] diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4 index 90c55bf..f14d6a3 100644 --- a/m4/jm-macros.m4 +++ b/m4/jm-macros.m4 @@ -52,6 +52,11 @@ AC_DEFUN([coreutils_MACROS], # Used by sort.c. AC_CHECK_FUNCS_ONCE([nl_langinfo]) + # Used by tail.c. + AC_CHECK_FUNCS([inotify_init], +[AC_DEFINE([HAVE_INOTIFY], [1], + [Define to 1 if you have usable inotify support.])]) + AC_CHECK_FUNCS_ONCE( \ endgrent \ endpwent \ diff --git a/src/tail.c b/src/tail.c index 9d416e1..1de76b1 100644 --- a/src/tail.c +++ b/src/tail.c @@ -21,7 +21,8 @@ Original version by Paul Rubin p...@ocf.berkeley.edu. Extensions by David MacKenzie d...@gnu.ai.mit.edu. - tail -f for multiple files by Ian Lance Taylor i...@airs.com. */ + tail -f for multiple files by Ian Lance Taylor i
Re: inotify back end for tail -f on linux
Hi Jim, Jim Meyering j...@meyering.net writes: Oh, and it'd be better to put that test in m4/jm-macros.m4, not in configure.ac. Ok, I'll move it. AC_CONFIG_FILES( Makefile doc/Makefile ... + if (len = evbuff_off) +{ + len = read (wd, evbuff, evlen); Please use safe_read here. Then you can drop the EINTR test/block below. Ok. -- Notice how this waits, as it should: $ touch k; chmod 0 k; ./tail --pid $$ -F k ./tail: cannot open `k' for reading: Permission denied yet this exits immediately: [also, it'd be nice not to give two diagnostics about the same problem] $ touch k; chmod 0 k; ./tail -F k ./tail: cannot open `k' for reading: Permission denied ./tail: cannot watch `k': Permission denied [Exit 1] -- I haven't fixed that last problem. Thanks, I'll take a look at it. Please fold the following patch into yours for the next iteration. From 2a200db8a5f3b36d23b179bc4343c647957d6d13 Mon Sep 17 00:00:00 2001 From: Jim Meyering meyer...@redhat.com Date: Sat, 6 Jun 2009 10:12:02 +0200 Subject: [PATCH] tail: minor changes * src/tail.c (main): Don't call inotify_init when waiting for a PID. [HAVE_INOTIFY]: Exit nonzero whenever tail_forever_inotify returns. Change wd 0 to 0 = wd, since 0 is a valid file descriptor. --- src/tail.c | 12 +--- 1 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/tail.c b/src/tail.c index 6d5f870..4d9270f 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1929,9 +1929,15 @@ main (int argc, char **argv) if (forever) { #ifdef HAVE_INOTIFY - int wd = inotify_init (); - if (wd 0 pid == 0) -tail_forever_inotify (wd, F, n_files); + if (pid == 0) +{ + int wd = inotify_init (); + if (0 = wd) +tail_forever_inotify (wd, F, n_files); + + /* The only way the above returns is upon failure. */ + exit (EXIT_FAILURE); +} #endif tail_forever (F, n_files, sleep_interval); } I think the exit (EXIT_FAILURE) should be included in the if: if (0 = wd) { tail_forever_inotify (wd, F, n_files); exit (EXIT_FAILURE); } The inotify_init () call can fail if the inotify support is not present at runtime, and in this case it is a good idea to use polling instead of exit immediately. Do you agree? Regards, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: inotify back end for tail -f on linux
Jim Meyering j...@meyering.net writes: Actually, there may be a nice way to allow --pid=PID to integrate seamlessly with inotify support. I think you can add an IN_DELETE_SELF inotify watch on the /proc/PID directory. I suspect that every system with inotify support also has usable /proc/PID directories. The only requirement seems to be that the desired directory be readable. I tried to watch /proc/$PID using inotify-tools and a few lines program that I wrote ad-hoc and it seems I can't monitor /proc correctly. I didn't receive IN_DELETE_SELF or any other message when the $PID process was killed. It seems I can't use inotify in this direct way to monitor processes status. Cheers, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: inotify back end for tail -f on linux
Hi Jim, Jim Meyering j...@meyering.net writes: I haven't fixed that last problem. Please fold the following patch into yours for the next iteration. in this version I included all the problems you reported me, the touch k; chmod 0 k; ./tail -F k test case is included in the tests suite now. Regards, Giuseppe From 529e1c2ba2a74168995de9ae7f8b9efa0d2d71c4 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 2 Jun 2009 08:28:23 +0200 Subject: [PATCH] tail: Use inotify if it is available. * NEWS: Document the new feature. * m4/jm-macros.m4: Check if inotify is present. * src/tail.c (tail_forever_inotify): New function. (main): Use the inotify-based function, if possible. * tests/Makefile.am: Added new tests for tail. * tests/tail-2/pid: New file. * tests/tail-2/wait: New file. --- NEWS |2 + m4/jm-macros.m4 |5 + src/tail.c| 248 - tests/Makefile.am |2 + tests/tail-2/pid | 81 + tests/tail-2/wait | 146 +++ 6 files changed, 483 insertions(+), 1 deletions(-) create mode 100755 tests/tail-2/pid create mode 100755 tests/tail-2/wait diff --git a/NEWS b/NEWS index 29b09a0..cc61bdc 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ GNU coreutils NEWS-*- outline -*- sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. + tail uses inotify when possible. + * Noteworthy changes in release 7.4 (2009-05-07) [stable] diff --git a/m4/jm-macros.m4 b/m4/jm-macros.m4 index 90c55bf..f14d6a3 100644 --- a/m4/jm-macros.m4 +++ b/m4/jm-macros.m4 @@ -52,6 +52,11 @@ AC_DEFUN([coreutils_MACROS], # Used by sort.c. AC_CHECK_FUNCS_ONCE([nl_langinfo]) + # Used by tail.c. + AC_CHECK_FUNCS([inotify_init], +[AC_DEFINE([HAVE_INOTIFY], [1], + [Define to 1 if you have usable inotify support.])]) + AC_CHECK_FUNCS_ONCE( \ endgrent \ endpwent \ diff --git a/src/tail.c b/src/tail.c index 9d416e1..3a7b022 100644 --- a/src/tail.c +++ b/src/tail.c @@ -46,6 +46,11 @@ #include xstrtol.h #include xstrtod.h +#ifdef HAVE_INOTIFY +# include hash.h +# include sys/inotify.h +#endif + /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME tail @@ -125,6 +130,17 @@ struct File_spec /* The value of errno seen last time we checked this file. */ int errnum; +#ifdef HAVE_INOTIFY + /* The watch descriptor used by inotify. */ + int wd; + + /* The parent directory watch descriptor. It is used only + * when Follow_name is used. */ + int parent_wd; + + /* Offset in NAME of the basename part. */ + size_t basename_start; +#endif }; /* Keep trying to open a file even if it is inaccessible when tail starts @@ -1117,6 +1133,219 @@ tail_forever (struct File_spec *f, int nfiles, double sleep_interval) } } +#ifdef HAVE_INOTIFY + +static size_t +wd_hasher (const void *entry, size_t tabsize) +{ + const struct File_spec *spec = entry; + return spec-wd % tabsize; +} + +static bool +wd_comparator (const void *e1, const void *e2) +{ + const struct File_spec *spec1 = e1; + const struct File_spec *spec2 = e2; + return spec1-wd == spec2-wd; +} + +/* Tail NFILES files forever, or until killed. + Check modifications using the inotify events system. */ +static void +tail_forever_inotify (int wd, struct File_spec *f, int nfiles) +{ + unsigned int i; + unsigned int max_realloc = 3; + Hash_table *wd_table; + + bool found_watchable = false; + size_t last; + size_t evlen = 0; + char *evbuff; + size_t evbuff_off = 0; + ssize_t len = 0; + + wd_table = hash_initialize (nfiles, NULL, wd_hasher, wd_comparator, NULL); + if (! wd_table) +xalloc_die (); + + for (i = 0; i nfiles; i++) +{ + if (!f[i].ignore) +{ + int old_wd = f[i].wd; + size_t fnlen = strlen (f[i].name); + if (evlen fnlen) +evlen = fnlen; + + f[i].wd = 0; + + if (follow_mode == Follow_name) +{ + size_t dirlen = dir_len (f[i].name); + char prev = f[i].name[dirlen]; + f[i].basename_start = last_component (f[i].name) - f[i].name; + + f[i].name[dirlen] = '\0'; + + /* It's fine to add the same directory more than once. + In that case the same watch descriptor is returned. */ + f[i].parent_wd = inotify_add_watch (wd, dirlen ? f[i].name : ., + IN_CREATE | IN_MOVED_TO); + + f[i].name[dirlen] = prev; + + if (f[i].parent_wd 0) +{ + error (0, errno, _(cannot watch parent directory of %s), + quote (f[i].name)); + continue; +} +} + + old_wd = f[i].wd; + f[i].wd
Re: inotify back end for tail -f on linux
Hi Jim, Jim Meyering j...@meyering.net writes: No test is too small or too obvious to add ;-) Adding this simple test would be great. Thanks! I found another problem. The inotify version can't be used when --pid is specified. There are other inotify events that I want to investigate better if they can be used with --pid and in this case we can add them later when the inotify back end is stable enough for me to break it again :) What do you think? This is the updated version, I integrated your test-case in the tests/tail-2/wait file and I added a new file (tests/tail-2/pid) with additional tests for the --pid option. Regards, Giuseppe From f0824b8b79c1fb22e9a48cbf831a87433fc4d3e8 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 2 Jun 2009 08:28:23 +0200 Subject: [PATCH] tail: Use inotify if it is available. * NEWS: Document the new feature. * configure.ac: Check if inotify is present. * src/tail.c (tail_forever_inotify): New function. (main): Use the inotify-based function, if possible. * tests/Makefile.am: Added new tests for tail. * tests/tail-2/pid: New file. * tests/tail-2/wait: New file. --- NEWS |2 + configure.ac |2 + src/tail.c| 245 - tests/Makefile.am |2 + tests/tail-2/pid | 81 ++ tests/tail-2/wait | 114 + 6 files changed, 445 insertions(+), 1 deletions(-) create mode 100644 tests/tail-2/pid create mode 100644 tests/tail-2/wait diff --git a/NEWS b/NEWS index 29b09a0..c5a2ef5 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ GNU coreutils NEWS-*- outline -*- sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. + tail uses inotify if it is present. + * Noteworthy changes in release 7.4 (2009-05-07) [stable] diff --git a/configure.ac b/configure.ac index 4eb640e..a63ac0c 100644 --- a/configure.ac +++ b/configure.ac @@ -410,6 +410,8 @@ AM_GNU_GETTEXT_VERSION([0.15]) # For a test of uniq: it uses the $LOCALE_FR envvar. gt_LOCALE_FR +AC_CHECK_FUNCS([inotify_init], [AC_DEFINE([HAVE_INOTIFY], [1], [Check for libinotify])]) + AC_CONFIG_FILES( Makefile doc/Makefile diff --git a/src/tail.c b/src/tail.c index 9d416e1..6d5f870 100644 --- a/src/tail.c +++ b/src/tail.c @@ -46,6 +46,11 @@ #include xstrtol.h #include xstrtod.h +#ifdef HAVE_INOTIFY +# include hash.h +# include sys/inotify.h +#endif + /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME tail @@ -125,6 +130,17 @@ struct File_spec /* The value of errno seen last time we checked this file. */ int errnum; +#ifdef HAVE_INOTIFY + /* The watch descriptor used by inotify. */ + int wd; + + /* The parent directory watch descriptor. It is used only + * when Follow_name is used. */ + int parent_wd; + + /* Offset in NAME of the basename part. */ + size_t basename_start; +#endif }; /* Keep trying to open a file even if it is inaccessible when tail starts @@ -1117,6 +1133,226 @@ tail_forever (struct File_spec *f, int nfiles, double sleep_interval) } } +#ifdef HAVE_INOTIFY + +static size_t +wd_hasher (const void *entry, size_t tabsize) +{ + const struct File_spec *spec = entry; + return spec-wd % tabsize; +} + +static bool +wd_comparator (const void *e1, const void *e2) +{ + const struct File_spec *spec1 = e1; + const struct File_spec *spec2 = e2; + return spec1-wd == spec2-wd; +} + +/* Tail NFILES files forever, or until killed. + Check modifications using the inotify events system. */ +static void +tail_forever_inotify (int wd, struct File_spec *f, int nfiles) +{ + unsigned int i; + unsigned int max_realloc = 3; + Hash_table *wd_table; + + bool found_watchable = false; + size_t last; + size_t evlen = 0; + char *evbuff; + size_t evbuff_off = 0; + ssize_t len = 0; + + wd_table = hash_initialize (nfiles, NULL, wd_hasher, wd_comparator, NULL); + if (! wd_table) +xalloc_die (); + + for (i = 0; i nfiles; i++) +{ + if (!f[i].ignore) +{ + size_t fnlen = strlen (f[i].name); + if (evlen fnlen) +evlen = fnlen; + + f[i].wd = 0; + + if (follow_mode == Follow_name) +{ + size_t dirlen = dir_len (f[i].name); + char prev = f[i].name[dirlen]; + f[i].basename_start = last_component (f[i].name) - f[i].name; + + f[i].name[dirlen] = '\0'; + + /* It's fine to add the same directory more than once. + In that case the same watch descriptor is returned. */ + f[i].parent_wd = inotify_add_watch (wd, dirlen ? f[i].name : ., + IN_CREATE | IN_MOVED_TO); + + f[i].name[dirlen] = prev; + + if (f[i].parent_wd 0
Re: inotify back end for tail -f on linux
Jim Meyering j...@meyering.net writes: A few suggestions: Thank you for all your suggestions, I'll keep them in mind. I used your modified patch as my starting point. I fixed the segfault problem and I added a new test case. Regards, Giuseppe From 159dad5939b3853789d9c3c5f30ed92d74812d10 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Tue, 2 Jun 2009 08:28:23 +0200 Subject: [PATCH] tail: Use inotify if it is available. * NEWS: Document the new feature. * configure.ac: Check if inotify is present. * src/tail.c (tail_forever_inotify): New function. (main): Use the inotify-based function, if possible. * tests/Makefile.am: Added new test for tail. * tests/tail-2/wait: New file. --- NEWS |2 + configure.ac |2 + src/tail.c| 245 - tests/Makefile.am |1 + tests/tail-2/wait | 72 5 files changed, 321 insertions(+), 1 deletions(-) create mode 100644 tests/tail-2/wait diff --git a/NEWS b/NEWS index 29b09a0..c5a2ef5 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ GNU coreutils NEWS-*- outline -*- sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. + tail uses inotify if it is present. + * Noteworthy changes in release 7.4 (2009-05-07) [stable] diff --git a/configure.ac b/configure.ac index 4eb640e..a63ac0c 100644 --- a/configure.ac +++ b/configure.ac @@ -410,6 +410,8 @@ AM_GNU_GETTEXT_VERSION([0.15]) # For a test of uniq: it uses the $LOCALE_FR envvar. gt_LOCALE_FR +AC_CHECK_FUNCS([inotify_init], [AC_DEFINE([HAVE_INOTIFY], [1], [Check for libinotify])]) + AC_CONFIG_FILES( Makefile doc/Makefile diff --git a/src/tail.c b/src/tail.c index 9d416e1..4b2b59d 100644 --- a/src/tail.c +++ b/src/tail.c @@ -46,6 +46,11 @@ #include xstrtol.h #include xstrtod.h +#ifdef HAVE_INOTIFY +# include hash.h +# include sys/inotify.h +#endif + /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME tail @@ -125,6 +130,17 @@ struct File_spec /* The value of errno seen last time we checked this file. */ int errnum; +#ifdef HAVE_INOTIFY + /* The watch descriptor used by inotify. */ + int wd; + + /* The parent directory watch descriptor. It is used only + * when Follow_name is used. */ + int parent_wd; + + /* Offset in NAME of the basename part. */ + size_t basename_start; +#endif }; /* Keep trying to open a file even if it is inaccessible when tail starts @@ -1117,6 +1133,226 @@ tail_forever (struct File_spec *f, int nfiles, double sleep_interval) } } +#ifdef HAVE_INOTIFY + +static size_t +wd_hasher (const void *entry, size_t tabsize) +{ + const struct File_spec *spec = entry; + return spec-wd % tabsize; +} + +static bool +wd_comparator (const void *e1, const void *e2) +{ + const struct File_spec *spec1 = e1; + const struct File_spec *spec2 = e2; + return spec1-wd == spec2-wd; +} + +/* Tail NFILES files forever, or until killed. + Check modifications using the inotify events system. */ +static void +tail_forever_inotify (int wd, struct File_spec *f, int nfiles) +{ + unsigned int i; + unsigned int max_realloc = 3; + Hash_table *wd_table; + + bool found_watchable = false; + size_t last; + size_t evlen = 0; + char *evbuff; + size_t evbuff_off = 0; + ssize_t len = 0; + + wd_table = hash_initialize (nfiles, NULL, wd_hasher, wd_comparator, NULL); + if (! wd_table) +xalloc_die (); + + for (i = 0; i nfiles; i++) +{ + if (!f[i].ignore) +{ + size_t fnlen = strlen (f[i].name); + if (evlen fnlen) +evlen = fnlen; + + f[i].wd = 0; + + if (follow_mode == Follow_name) +{ + size_t dirlen = dir_len (f[i].name); + char prev = f[i].name[dirlen]; + f[i].basename_start = last_component (f[i].name) - f[i].name; + + f[i].name[dirlen] = '\0'; + + /* Do not care if the directory was already added, in this + case the same watch descriptor will be returned. */ + f[i].parent_wd = inotify_add_watch (wd, f[i].name, + IN_CREATE | IN_MOVED_TO); + + f[i].name[dirlen] = prev; + + if (f[i].parent_wd 0) +{ + error (0, errno, _(cannot watch parent directory of %s), + quote (f[i].name)); + continue; +} +} + + if (f[i].errnum != ENOENT) +{ + int old_wd = f[i].wd; + f[i].wd = inotify_add_watch (wd, f[i].name, + (IN_MODIFY | IN_ATTRIB +| IN_DELETE_SELF | IN_MOVE_SELF)); + + if (last == old_wd) +last = f[i
Re: inotify back end for tail -f on linux
Hi Jim, Jim Meyering j...@meyering.net writes: Thanks for the suggestion. Yes, this has been discussed, e.g., in http://thread.gmane.org/gmane.comp.gnu.coreutils.bugs/15031/focus=15052 I like the idea and outlined how I'd like to see this functionality make it's way into the coreutils. I read this discussion but looking at the tail source code I didn't see any difficulty to use inotify. These are some tests results: test -e /tmp/files || mkdir /tmp/files; for i in $(seq 2000); do touch /tmp/files/$i; done my version: ./timeout -sINT 30s env time ./tail -f /tmp/files/* 0.02user 0.09system 0:30.00elapsed 0%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+232minor)pagefaults 0swaps system version: ./timeout -sINT 30s env time tail -f /tmp/files/* 0.05user 0.16system 0:29.99elapsed 0%CPU (0avgtext+0avgdata 0maxresident)k 0inputs+0outputs (0major+236minor)pagefaults 0swaps I am sure running the test for a longer time would give better results. The initial time using inotify is higher as there is need to create watch descriptors and an index I use for access to the File_spec in costant time. What do you think of the attached patch? Cheers, Giuseppe From aa815abdd3cbf2ad15d769a1f4cb6100e3a975e5 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 30 May 2009 13:31:58 +0200 Subject: [PATCH] tail: Use inotify if it is available. * NEWS: Document the new feature * configure.ac: Check if inotify is present. * src/tail.c (main): Use the tail_forever inotify version if it is possible. (tail_forever_inotify): Added new function. --- NEWS |2 + configure.ac |2 + src/tail.c | 103 - 3 files changed, 105 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 29b09a0..c5a2ef5 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ GNU coreutils NEWS-*- outline -*- sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. + tail uses inotify if it is present. + * Noteworthy changes in release 7.4 (2009-05-07) [stable] diff --git a/configure.ac b/configure.ac index 4eb640e..a63ac0c 100644 --- a/configure.ac +++ b/configure.ac @@ -410,6 +410,8 @@ AM_GNU_GETTEXT_VERSION([0.15]) # For a test of uniq: it uses the $LOCALE_FR envvar. gt_LOCALE_FR +AC_CHECK_FUNCS([inotify_init], [AC_DEFINE([HAVE_INOTIFY], [1], [Check for libinotify])]) + AC_CONFIG_FILES( Makefile doc/Makefile diff --git a/src/tail.c b/src/tail.c index fe34600..bb0950e 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1,5 +1,5 @@ /* tail -- output the last part of file(s) - Copyright (C) 1989, 90, 91, 1995-2006, 2008 Free Software Foundation, Inc. + Copyright (C) 1989, 90, 91, 1995-2006, 2008, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -45,6 +45,10 @@ #include xstrtol.h #include xstrtod.h +#ifdef HAVE_INOTIFY +#include sys/inotify.h +#endif + /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME tail @@ -1116,6 +1120,93 @@ tail_forever (struct File_spec *f, int nfiles, double sleep_interval) } } +#ifdef HAVE_INOTIFY +/* Tail NFILES files forever, or until killed. + Check modifications using the inotify events system. */ +static void +tail_forever_inotify (int wd, struct File_spec *f, int nfiles) +{ + /* Create an index to access File_spec by its watch descriptor + * in costant time. */ + struct File_spec **wd_index; + int i; + int *watch_fd = xmalloc (sizeof (int) * nfiles); + int min_wd = -1; + int max_wd = -1; + int watched = 0; + size_t last; + + for (i = 0; i nfiles; i++) +{ + watch_fd[i] = -1; + + if (!f[i].ignore) +{ + watch_fd[i] = inotify_add_watch (wd, f[i].name, + IN_MODIFY); + + if (watch_fd[i] 0) +{ + error (0, errno, _(cannot watch %s), quote (f[i].name)); + continue; +} + + watched++; + + if (watch_fd[i] max_wd || max_wd 0) + max_wd = watch_fd[i]; + + if (watch_fd[i] min_wd || min_wd 0) + min_wd = watch_fd[i]; +} +} + + if (!watched) +return; + + wd_index = xmalloc (sizeof (struct File_spec**) * (max_wd - min_wd + 1)); + + for (i = 0; i nfiles; i++) +if (watch_fd[i] 0) + wd_index[watch_fd[i] - min_wd] = (f[i]); + + free (watch_fd); + + last = max_wd + 1; + + while (1) +{ + size_t len; + struct inotify_event ev; + char const *name; + struct File_spec *fspec; + uintmax_t bytes_read; + + len = read (wd, ev, sizeof (struct inotify_event)); + if (!len errno == EINTR) +continue
Re: inotify back end for tail -f on linux
Jim Meyering j...@meyering.net writes: Note what happens whey you truncate a tracked file. With your version, nothing. With the original, it reports this: tail: F: file truncated Likewise, when tailing with e.g., tail -F FILE if you unlink FILE, you get this: tail: `FILE' has become inaccessible: No such file or directory and if you then recreate it: tail: `FILE' has appeared; following end of new file You get none of that with your patch. Thank you for the quick review. I added some messages that are possible in `Follow_descriptor' mode but I had to rollback to the old polling mechanism when `Follow_name' is used. It is not straightforward to emulate it using inotify because it is not possible to watch a file that doesn't exist yet or a file that is removed and re-created. It is possible to watch the parent directory if new files are created but then the problem is moved on the parent directory that can be removed too and so on (or am I misunderstanding something with inotify?). I tried `inotify_add_watch' with a file that doesn't exist and a file that exists, removed and re-created, registering them with IN_ALL_EVENTS. In the former case I get an error, in the latter I don't get any event when it is re-created as it is silently dropped from the watch list when the file is removed. Is there a way to monitor a path that doesn't exist yet and be alerted when it is created without polling? Giuseppe From b5cad2d1e51781b18e53eac7b102922319909f5b Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 30 May 2009 13:31:58 +0200 Subject: [PATCH] tail: Use inotify if it is available. * NEWS: Document the new feature * configure.ac: Check if inotify is present. * src/tail.c (main): Use the tail_forever inotify version if it is possible. (tail_forever_inotify): Added new function. --- NEWS |2 + configure.ac |2 + src/tail.c | 126 +- 3 files changed, 128 insertions(+), 2 deletions(-) diff --git a/NEWS b/NEWS index 29b09a0..c5a2ef5 100644 --- a/NEWS +++ b/NEWS @@ -14,6 +14,8 @@ GNU coreutils NEWS-*- outline -*- sort accepts a new option, --human-numeric-sort (-h): sort numbers while honoring human readable suffixes like KiB and MB etc. + tail uses inotify if it is present. + * Noteworthy changes in release 7.4 (2009-05-07) [stable] diff --git a/configure.ac b/configure.ac index 4eb640e..a63ac0c 100644 --- a/configure.ac +++ b/configure.ac @@ -410,6 +410,8 @@ AM_GNU_GETTEXT_VERSION([0.15]) # For a test of uniq: it uses the $LOCALE_FR envvar. gt_LOCALE_FR +AC_CHECK_FUNCS([inotify_init], [AC_DEFINE([HAVE_INOTIFY], [1], [Check for libinotify])]) + AC_CONFIG_FILES( Makefile doc/Makefile diff --git a/src/tail.c b/src/tail.c index fe34600..3f77ba9 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1,5 +1,5 @@ /* tail -- output the last part of file(s) - Copyright (C) 1989, 90, 91, 1995-2006, 2008 Free Software Foundation, Inc. + Copyright (C) 1989, 90, 91, 1995-2006, 2008, 2009 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -45,6 +45,10 @@ #include xstrtol.h #include xstrtod.h +#ifdef HAVE_INOTIFY +#include sys/inotify.h +#endif + /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME tail @@ -1116,6 +1120,114 @@ tail_forever (struct File_spec *f, int nfiles, double sleep_interval) } } +#ifdef HAVE_INOTIFY +/* Tail NFILES files forever, or until killed. + Check modifications using the inotify events system. */ +static void +tail_forever_inotify (int wd, struct File_spec *f, int nfiles) +{ + /* Create an index to access File_spec by its watch descriptor + * in costant time. */ + struct File_spec **wd_index; + int i; + int *watch_fd = xmalloc (sizeof (int) * nfiles); + int min_wd = -1; + int max_wd = -1; + int watched = 0; + size_t last; + + for (i = 0; i nfiles; i++) +{ + watch_fd[i] = -1; + + if (!f[i].ignore) +{ + watch_fd[i] = inotify_add_watch (wd, f[i].name, IN_MODIFY|IN_ATTRIB); + + if (watch_fd[i] 0) +{ + error (0, errno, _(cannot watch %s), quote (f[i].name)); + continue; +} + + watched++; + + if (watch_fd[i] max_wd || max_wd 0) + max_wd = watch_fd[i]; + + if (watch_fd[i] min_wd || min_wd 0) + min_wd = watch_fd[i]; +} +} + + if (!watched) +return; + + wd_index = xmalloc (sizeof (struct File_spec**) * (max_wd - min_wd + 1)); + + for (i = 0; i nfiles; i++) +if (watch_fd[i] 0) + wd_index[watch_fd[i] - min_wd] = (f[i]); + + free (watch_fd); + + last = max_wd + 1; + + while (1) +{ + size_t len; + struct inotify_event
[PATCH] tail -- close fd on a fstat failure
Hi, I noticed that the fd is not closed properly if the fstat call fails. This trivial patch solves it. Giuseppe From 0f93ba2a0673a689bbeaf747b876f6f8c4bc6cae Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano gscriv...@gnu.org Date: Sat, 30 May 2009 21:41:26 +0200 Subject: [PATCH] tail: don't leave open fd when fstat fails. * src/tail.c (tail_forever): close the fd before overwrite it. --- src/tail.c |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/src/tail.c b/src/tail.c index fe34600..3ec896a 100644 --- a/src/tail.c +++ b/src/tail.c @@ -1033,6 +1033,7 @@ tail_forever (struct File_spec *f, int nfiles, double sleep_interval) { if (fstat (fd, stats) != 0) { + close_fd (f[i].fd, pretty_name (f)); f[i].fd = -1; f[i].errnum = errno; error (0, errno, %s, name); -- 1.6.3.1 ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] chroot specify user/group feature
Hi Jim, Jim Meyering j...@meyering.net writes: setuidgid appears to be subsumed by chroot with the new options. If we can remove setuidgid.c, that code is no longer duplicated, so there's less (no?) motivation to move it into gnulib. If you want to remove setuidgid then I don't see any reason to move the shared code into gnulib. Though I have to admit it sounds a bit strange to use chroot to change uid/gid, maybe rewrite setuidgid as a wrapper around chroot? Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] chroot specify user/group feature
Eric Blake e...@byu.net writes: The point is that setuidgid is not installed. It exists only for the purposes of the testsuite. If it were an installed app, then yes it would make sense to keep it around, although perhaps rewritten as a wrapper around the new chroot functionality. But since setuidgid is not installed, while chroot is, there is no longer any incentive to even have setuidgid in the first place. By using chroot in the first place, the testsuite would be a) adding to the coverage of chroot features, and b) minimizing reliance on software that gets no testing outside of the testsuite, both of which are good moves. Well, in this case I agree with you. It is better to remove setuidgid and use the new chroot features. Less code means less problems. Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] chroot specify user/group feature
Hi Eric, Eric Blake e...@byu.net writes: Would it be worth starting to patch the testsuite to replace 'setuidgid -g list usr cmd arg' with 'chroot --user usr --groups=list / cmd arg' in order to give this feature more exposure and reduce our dependence on uninstalled apps? IMHO, since chroot now allows to specify users and groups by their names too, maybe it worths to move these functionalities in a gnulib module and share them with setuidgid. What all of you think? Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: Human readable sort
Pádraig Brady p...@draigbrady.com writes: + Assume that numbers are properly abbreviated. + i.e. input will never have both 5000K and 6M. */ I think this is a too strong assumption. I wouldn't be surprised to find, for example, both 1M and 1500K in a data set. Are there problems to normalize values using this pseudo-code? while (abs (a) 1000) //or 1024 { order_a += signum (a); a /= 1000; //or 1024 } do the same with b and only after compare them. Regards, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] chroot specify user/group feature
Pádraig Brady p...@draigbrady.com writes: +#ifndef MAXGID +# define MAXGID GID_T_MAX +#endif Why add the new MAXGID name? I took this code from the gnulib userspec.c file. I guess there are cases when MAXGID is defined and GID_T_MAX is not, and in such case it is better to use the real value of MAXGID than the maximum value allowed for the gid_t type. Can we assume that every time MAXGID is defined, GID_T_MAX has the right value too? If so, we can remove the same lines from gnulib too. Regards, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] chroot specify user/group feature
Jim Meyering j...@meyering.net writes: In the future, please send patches by mail to the bug-coreutils list. Most of the people who will review them here prefer that. Sorry, I thought it was redundant to post both on the ML and on savannah. I'll send the next version to the bug-coreutils ML. If you'd like to pursue, here are some suggestions: - MAXUID was defined but never used, so please remove it - there is a --help formatting nit exposed by failing make syntax-check: src/chroot.c:122: -u, --userspec specify the userspec in the form USER:GROUP\n\ maint.mk: help2man requires at least two spaces between maint.mk: an option and its description Thank you, I didn't know about it. make: *** [sc_two_space_separator_in_usage] Error 1 - do not add short-named options. People who want to abbreviate can use --u or --g. - exit right away upon encountering an invalid option. Make the option-processing loop look like most others, i.e., with a switch and anonymous enum values in place of your current 'u' and 'g'. What do you think if I change 'g' to 'G'? I looked under FreeBSD and they use -u, -g and -G, maybe it worths to keep the same name. Under OpenBSD it is not possible to specify additional groups, so only -u and -g are available. - add a NEWS entry - add documentation in coreutils.texi And for extra credit, - add a test or two to exercise the new functionality. It'll have to be a root-only test, of course. Sure, I thought about add all these things after you approved it. Regards, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
[bug #10384] chroot feature request: --user and --group parameters
Follow-up Comment #3, bug #10384 (project coreutils): what do you think about adding another argument --groups=g1,g2,...,gn? When it is specified supplementary groups are setted to the specified ones, if it is not specified then the original ones are kept. ___ Reply to this item at: http://savannah.gnu.org/bugs/?10384 ___ Messaggio inviato con/da Savannah http://savannah.gnu.org/ ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
[bug #10384] chroot feature request: --user and --group parameters
Follow-up Comment #5, bug #10384 (project coreutils): Here a new patch that adds the feature we discussed before. Now it is possible to specify a list of additional groups using -g/--groups. (file #18074) ___ Additional Item Attachment: File name: chroot_userspec.diff Size:4 KB ___ Reply to this item at: http://savannah.gnu.org/bugs/?10384 ___ Messaggio inviato con/da Savannah http://savannah.gnu.org/ ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
[bug #10384] chroot feature request: --user and --group parameters
Follow-up Comment #1, bug #10384 (project coreutils): In the attached patch I added the possibility to specify an user:group. `choot --userspec=user:group /foo/bar` The patch uses the gnulib userspec module. What do you think about it? (file #18071) ___ Additional Item Attachment: File name: chroot_userspec.diff Size:3 KB ___ Reply to this item at: http://savannah.gnu.org/bugs/?10384 ___ Messaggio inviato con/da Savannah http://savannah.gnu.org/ ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] Replacement for the sigs_to_ignore hack in timeout.c
Hello, Jim Meyering [EMAIL PROTECTED] writes: Giuseppe, Perhaps you misunderstood? Eric proposed that idea and I addressed the reply above to him. He then replied that he would indeed like to implement it. Sorry, I thought it was directed to both. Since you have spent time working on it, in spite of that, we are now in a bind. If it could be boring task for somebody already well introduced in coreutils, it was a good occasion for me to familiarize with this project. In case the patch will not be accepted I don't have any regrets for the time I spent on it. - you don't have a copyright assignment on file for coreutils, and those take at least 2-3 weeks to process. So even if we were to use your work, we'd have to wait. I know, last time it took more than one month. - it is considered poor form to jump in and write/submit a patch like that, when someone else is obviously planning to do the same thing. I got his answer after I submitted the patch. In the other case I think I was showing my patch just to him not on the ML. While the patch length is not trivial, the changes introduced by this patch are very trivial. It could be done easily with s/signal/my_signal_wrapper/ but IMHO `signal' calls in coreutils are not so many to require a wrapper around `sigaction'. If you'd like to contribute, please coordinate, especially if it's a task that's being actively discussed. I will look more in detail at this task as soon as possible and inform you if I will work on it. Regards, Giuseppe Scrivano ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
Re: [PATCH] Replacement for the sigs_to_ignore hack in timeout.c
Andreas Schwab [EMAIL PROTECTED] writes: + memset (sa, 0, sizeof sa); + sigemptyset (sa.sa_mask); I don't think you need the memset. and how reset the struct without a memset or using sa = {0,} as Pádraig suggested? Do you advise me to reset manually only members that really must be 0? Thanks, Giuseppe ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils
[PATCH] Replacement for the sigs_to_ignore hack in timeout.c
Hello, what do you think about the following way to remove the sigs_to_ignore hack in the timeout.c file? It ignores temporarily the signal inside the `send_sig' function instead of using the `sigs_to_ignore' array. Regards, Giuseppe Scrivano diff --git a/src/timeout.c b/src/timeout.c index 8b506f0..93b23f6 100644 --- a/src/timeout.c +++ b/src/timeout.c @@ -82,7 +82,8 @@ static int timed_out; static int term_signal = SIGTERM; /* same default as kill command. */ static int monitored_pid; -static int sigs_to_ignore[NSIG]; /* so monitor can ignore sigs it resends. */ + +static void cleanup (int sig); static struct option const long_options[] = { @@ -90,13 +91,25 @@ static struct option const long_options[] = {NULL, 0, NULL, 0} }; -/* send sig to group but not ourselves. - * FIXME: Is there a better way to achieve this? */ +/* send sig to group but not ourselves. */ static int send_sig (int where, int sig) { - sigs_to_ignore[sig] = 1; - return kill (where, sig); + int ret; + struct sigaction sa; + void (*handler)(int); + + sigaction (sig, NULL, sa); + handler = sa.sa_handler; + sa.sa_handler = SIG_IGN; + sigaction (sig, sa, NULL); + + ret = kill (where, sig); + + sa.sa_handler = handler; + sigaction (sig, sa, NULL); + + return ret; } static void @@ -109,11 +122,6 @@ cleanup (int sig) } if (monitored_pid) { - if (sigs_to_ignore[sig]) -{ - sigs_to_ignore[sig] = 0; - return; -} send_sig (0, sig); if (sig != SIGKILL sig != SIGCONT) send_sig (0, SIGCONT); ___ Bug-coreutils mailing list Bug-coreutils@gnu.org http://lists.gnu.org/mailman/listinfo/bug-coreutils