Jim Meyering wrote: > Pádraig Brady wrote: >> I was thinking a bit more about the patch just pushed. >> It sets the buffer size to 8*st_blksize which seems a >> little arbitrary, and also max buffer size is set to >> 32KiB even if the file system has a larger st_blksize. >> I'm not sure this is desired? >> >> How about making 32KiB the minimum as in the attached? >> The patch also changes `split` and `copy` to use the >> same IO size as `cat`. > ... >> +#define IO_BLKSIZE(statbuf) (MAX(32*1024, ST_BLKSIZE(statbuf))) > > That looks better. Thanks. > > But please make it a static inline function, not a macro. > That will be more debugger-friendly.
Updated one attached. Would you like this in 7.2? Note I did a quick audit of the IO in coreutils filters to see if this would be appropriate for other utils: ---------------------------------------------------------- filter read write stdio_r stdio_w ---------------------------------------------------------- ---- copying ---- cp 8192 8192 n n mv " " n n install " " n n ---- digests ---- wc 16384 n sum -r 4096 1 sum -s 8192 n md5sum 4096 4096 sha1sum " " sha224sum " " sha256sum " " sha384sum " " sha512sum " " ---- line oriented ---- sort fsize-fsize%4k 4096 fsize line shuf " " " " tsort 4096 line n n ptx fsize 4096 n line fold 4096 4096 1 line nl 4096 4096 1 line uniq " " " " comm " " " " join " " " " paste 4096 4096 1 1 cut " " " " fmt " " " " pr " " " " unexpand " " " " expand 8192 " " " ---- other ---- base64 4096 4096 3072 76 od 4096 4096 16 7 tr 8192 4096 n 8192 csplit 8191,8190,.. 8192 n line split 4096 4096 n n cat 4096 4096 n n tee 8192 8192 n 8192 tail -c 10000 8192 8192 n 8192 head -c 10000 " " " " tail 8192 4096 n 8192 head " " " " tac " " " " shred 12288 n n dd variable variable n n Notes... tee turns off buffering, so I can't really see why it uses stdio at all. I think I'll update it to use read/write directly and also change the buffer size to 32KiB to match cat et. al. There are multiple methods for reading lines that could be consolidated. getline: date, md5sum, dircolors getndelim2: cut (uses getc underneath) linebuffer: nl, join, comm, uniq internal: fold, head, tail, ... cheers, Pádraig.
>From 9c8de5e7240e96dffbd9701a41d9e2e96a7e69c9 Mon Sep 17 00:00:00 2001 From: =?utf-8?q?P=C3=A1draig=20Brady?= <[email protected]> Date: Fri, 6 Mar 2009 22:30:55 +0000 Subject: [PATCH] cat,cp,mv,install,split: Set the minimum IO block size used to 32KiB This is following on from 02c3dc9de8d3cf26b319b7a7b144878a2afb91bb which increased the IO block size used by cat by 8 times, but also capped it at 32KiB. Note the above mentioned utils all copy data in blocks and do not use stdio streams. * NEWS: Mention the change in behavior. * src/system.h: Add a new io_blksize() function that returns the max of ST_BLKSIZE or 32KiB, as this value was seen as a good value for a minimum block size to use to get good performance while minimizing system call overhead. * src/cat.c: Use it. * src/copy.c: ditto * src/split.c: ditto --- NEWS | 5 +++++ src/cat.c | 10 ++-------- src/copy.c | 13 ++----------- src/split.c | 2 +- src/system.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 58 insertions(+), 22 deletions(-) diff --git a/NEWS b/NEWS index fd101a4..2500280 100644 --- a/NEWS +++ b/NEWS @@ -24,6 +24,11 @@ GNU coreutils NEWS -*- outline -*- Previously -k1,1b would have caused leading space from field 2 to be included in the sort while -k2,3.0 would have not included field 3. +** Changes in behavior + + cp,mv,install,cat,split: now read and write a minimum 32KiB of data + at a time. This was seen to increase throughput. Up to 2 times + when reading cached files on linux for example. * Noteworthy changes in release 7.1 (2009-02-21) [stable] diff --git a/src/cat.c b/src/cat.c index 04eb204..18fa1f1 100644 --- a/src/cat.c +++ b/src/cat.c @@ -78,12 +78,6 @@ static char *line_num_end = line_buf + LINE_COUNTER_BUF_LEN - 3; /* Preserves the `cat' function's local `newlines' between invocations. */ static int newlines2 = 0; -static inline size_t -compute_buffer_size (struct stat st) -{ - return MIN (8 * ST_BLKSIZE (st), 32 * 1024); -} - void usage (int status) { @@ -642,7 +636,7 @@ main (int argc, char **argv) if (fstat (STDOUT_FILENO, &stat_buf) < 0) error (EXIT_FAILURE, errno, _("standard output")); - outsize = compute_buffer_size (stat_buf); + outsize = io_blksize (stat_buf); /* Input file can be output file for non-regular files. fstat on pipes returns S_IFSOCK on some systems, S_IFIFO on others, so the checking should not be done for those types, @@ -706,7 +700,7 @@ main (int argc, char **argv) ok = false; goto contin; } - insize = compute_buffer_size (stat_buf); + insize = io_blksize (stat_buf); /* Compare the device and i-node numbers of this input file with the corresponding values of the (output file associated with) diff --git a/src/copy.c b/src/copy.c index 1918671..e37fead 100644 --- a/src/copy.c +++ b/src/copy.c @@ -568,7 +568,7 @@ copy_reg (char const *src_name, char const *dst_name, /* Choose a suitable buffer size; it may be adjusted later. */ size_t buf_alignment = lcm (getpagesize (), sizeof (word)); size_t buf_alignment_slop = sizeof (word) + buf_alignment - 1; - size_t buf_size = ST_BLKSIZE (sb); + size_t buf_size = io_blksize (sb); /* Deal with sparse files. */ bool last_write_made_hole = false; @@ -596,21 +596,12 @@ copy_reg (char const *src_name, char const *dst_name, buffer size. */ if (! make_holes) { - /* These days there's no point ever messing with buffers smaller - than 8 KiB. It would be nice to configure SMALL_BUF_SIZE - dynamically for this host and pair of files, but there doesn't - seem to be a good way to get readahead info portably. */ - enum { SMALL_BUF_SIZE = 8 * 1024 }; - /* Compute the least common multiple of the input and output buffer sizes, adjusting for outlandish values. */ size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment_slop; - size_t blcm = buffer_lcm (ST_BLKSIZE (src_open_sb), buf_size, + size_t blcm = buffer_lcm (io_blksize (src_open_sb), buf_size, blcm_max); - /* Do not use a block size that is too small. */ - buf_size = MAX (SMALL_BUF_SIZE, blcm); - /* Do not bother with a buffer larger than the input file, plus one byte to make sure the file has not grown while reading it. */ if (S_ISREG (src_open_sb.st_mode) && src_open_sb.st_size < buf_size) diff --git a/src/split.c b/src/split.c index 1d8a94c..f8e2683 100644 --- a/src/split.c +++ b/src/split.c @@ -554,7 +554,7 @@ main (int argc, char **argv) if (fstat (STDIN_FILENO, &stat_buf) != 0) error (EXIT_FAILURE, errno, "%s", infile); - in_blk_size = ST_BLKSIZE (stat_buf); + in_blk_size = io_blksize (stat_buf); buf = ptr_align (xmalloc (in_blk_size + 1 + page_size - 1), page_size); diff --git a/src/system.h b/src/system.h index ba74da4..9dbf2ba 100644 --- a/src/system.h +++ b/src/system.h @@ -147,6 +147,9 @@ enum # define D_INO(dp) NOT_AN_INODE_NUMBER #endif +/* include here for SIZE_MAX. */ +#include <inttypes.h> + /* Get or fake the disk device blocksize. Usually defined by sys/param.h (if at all). */ #if !defined DEV_BSIZE && defined BSIZE @@ -218,6 +221,51 @@ enum # endif #endif +/* As of Mar 2009, 32KiB is determined to be the minimium + * blksize to best minimize system call overhead. + * This can be tested with this script with the results + * shown for a 1.7GHz pentium-m with 2GB of 400MHz DDR2 RAM: + + for i in $(seq 0 10); do + size=$((8*1024**3)) #ensure this is big enough + bs=$((1024*2**$i)) + printf "%7s=" $bs + dd bs=$bs if=/dev/zero of=/dev/null count=$(($size/$bs)) 2>&1 | + sed -n 's/.* \([0-9.]* [GM]B\/s\)/\1/p' + done + + 1024=734 MB/s + 2048=1.3 GB/s + 4096=2.4 GB/s + 8192=3.5 GB/s + 16384=3.9 GB/s + 32768=5.2 GB/s + 65536=5.3 GB/s + 131072=5.5 GB/s + 262144=5.7 GB/s + 524288=5.7 GB/s + 1048576=5.8 GB/s + + * Note that this is to minimize system call overhead. + * Other values may be appropriate to minimize file system + * or disk overhead. For example on my current linux system + * the readahead setting is 128KiB which was read using: + + file="." + device=$(df -P --local "$file" | tail -n1 | cut -d' ' -f1) + echo $(( $(blockdev --getra $device) * 512 )) + + * However there isn't a portable way to get the above. + * In the future we could use the above method if available + * and default to io_blksize() if not. + */ +#define IO_BUFSIZE (32*1024) +static inline size_t +io_blksize (struct stat sb) +{ + return MAX (IO_BUFSIZE, ST_BLKSIZE(sb)); +} + /* Redirection and wildcarding when done by the utility itself. Generally a noop, but used in particular for native VMS. */ #ifndef initialize_main @@ -228,8 +276,6 @@ enum #include "timespec.h" -#include <inttypes.h> - #include <ctype.h> #if ! (defined isblank || HAVE_DECL_ISBLANK) -- 1.5.3.6
_______________________________________________ Bug-coreutils mailing list [email protected] http://lists.gnu.org/mailman/listinfo/bug-coreutils
