Hi,
Here's a patch for cp which adds a new --preallocate option. When
specified, cp allocates disk space for the destination file before writing
data. It uses fallocate() with FALLOC_FL_KEEP_SIZE on Linux, falling back
to posix_fallocate() if that fails.
Benefits of preallocation:
- Disk fragmentation can be greatly reduced. That means faster file
access and less filesystem overhead (fewer extents).
- Recovering data after filesystem corruption should be more successful,
since files are more likely to be contiguous.
- If you're e.g. copying a virtual machine disk image file, the
destination should be (almost) contiguous, meaning that running a disk
optimiser/defragmenter in the guest OS would work as it should (i.e.
improve performance).
This is a very preliminary patch for testing. Hopefully someone will find
it useful. And hopefully someone who (a) has a clue when it comes to C
programming, and (b) is familiar with the coreutils source (I'm neither)
can work from this to produce something which could be included in a
future release.
Note that posix_fallocate() sets the destination file size. If your system
doesn't support fallocate() with FALLOC_FL_KEEP_SIZE, you can't e.g. do
"ls -l destfilename" to monitor the progress of a large file copy; the
length shown will always be the final length.
Pre-allocating space can defeat the object of --sparse=always (or the
default sparse-checking heuristic). If copying files with large holes you
probably won't want to use --preallocate. If you do, regions in the
destination corresponding to holes in the source will be allocated but
unwritten. You'll lose the disk-space-saving benefit, but keep the
fast-reading-of-holes benefit. On the other hand, that feature could be
useful sometimes.
In the general case of copying non-sparse files, it should be beneficial
to use --preallocate. However on some systems, when the destination
filesystem does not support pre-allocation (e.g. FAT32), the
implementation of posix_fallocate() might try to fill the region to be
pre-allocated with zeros. That would double copy time for no benefit.
To-do list:
- Add --preallocate option to mv as well
- Should the option name be changed to --pre-allocate?
- Maybe have an option to tell cp to pre-allocate space for all
destination files in one go, rather than pre-allocating space for each
individual file before copying?
- Check the error code that fallocate() returns. If it says the
filesystem does not support fallocate(), don't call it again for every
other file being copied.
- Better handling of sparse files, e.g. don't call fallocate() if source
file is sparse and --sparse=always is given.
- If pre-allocation fails due to insufficient disk space, cp prints a
message and continues. So typically it will fill up the disk then abort
with an out-of-disk-space error. It would be nice to be able to tell cp
to abort when a pre-allocation fails, so it can exit without wasting
time.
The attached patch is based on coreutils 8.17.
-- Mark
diff -Naur coreutils-8.17/src/copy.c my_coreutils-8.17/src/copy.c
--- coreutils-8.17/src/copy.c 2012-05-08 09:34:30.000000000 +0100
+++ my_coreutils-8.17/src/copy.c 2012-05-11 13:52:57.925208868 +0100
@@ -23,6 +23,17 @@
#include <sys/types.h>
#include <selinux/selinux.h>
+/* TODO: Make this a configure/makefile option. */
+#define USE_LINUX_FALLOCATE 1
+
+/* For FALLOC_FL_KEEP_SIZE definition. Don't include linux/falloc.h because
+ that might not exist on some systems. */
+#ifdef USE_LINUX_FALLOCATE
+#ifndef FALLOC_FL_KEEP_SIZE
+#define FALLOC_FL_KEEP_SIZE 1
+#endif
+#endif
+
#if HAVE_HURD_H
# include <hurd.h>
#endif
@@ -1050,6 +1061,37 @@
buf_alloc = xmalloc (buf_size + buf_alignment_slop);
buf = ptr_align (buf_alloc, buf_alignment);
+ /* Allocate space for the destination file if user specified the
+ --preallocate option.
+ TODO: Add an option to have cp abort if unable to allocate space,
+ instead of just printing a message. */
+
+ /* On Linux, we can use fallocate() with FALLOC_FL_KEEP_SIZE to pre-
+ allocate space without changing the apparent file size. */
+ if (x->preallocate)
+ {
+#ifdef USE_LINUX_FALLOCATE
+ if (fallocate(dest_desc, FALLOC_FL_KEEP_SIZE, 0,
+ src_open_sb.st_size) == 0)
+ goto allocated;
+ else
+ /* TODO: Handle different errors better. E.g. if the kernel does not
+ support FALLOC_FL_KEEP_SIZE we could retry without that flag, or
+ if the filesystem does not support fallocate() we could remember
+ that and not call fallocate() for every file copied. */
+ error (0, errno, _("pre-allocating space for %s"),
+ quote (dst_name));
+ /* If fallocate() with FALLOC_FL_KEEP_SIZE failed, fall back to
+ posix_fallocate(). */
+#else
+ if (posix_fallocate(dest_desc, 0, src_open_sb.st_size) != 0)
+ /* posix_fallocate() does not set errno */
+ fprintf (stderr, _("%s: Could not pre-allocate space for %s"),
+ program_name, quote (dst_name));
+#endif
+ }
+
+allocated:
if (sparse_src)
{
bool normal_copy_required;
diff -Naur coreutils-8.17/src/copy.h my_coreutils-8.17/src/copy.h
--- coreutils-8.17/src/copy.h 2012-03-24 18:22:13.000000000 +0000
+++ my_coreutils-8.17/src/copy.h 2012-05-11 13:08:42.788672598 +0100
@@ -242,6 +242,9 @@
such a symlink) and returns false. */
bool open_dangling_dest_symlink;
+ /* Use fallocate()/posix_fallocate() to pre-allocate space for files */
+ bool preallocate;
+
/* Control creation of COW files. */
enum Reflink_type reflink_mode;
diff -Naur coreutils-8.17/src/cp.c my_coreutils-8.17/src/cp.c
--- coreutils-8.17/src/cp.c 2012-05-01 21:55:08.000000000 +0100
+++ my_coreutils-8.17/src/cp.c 2012-05-11 13:53:21.089396098 +0100
@@ -80,7 +80,8 @@
REFLINK_OPTION,
SPARSE_OPTION,
STRIP_TRAILING_SLASHES_OPTION,
- UNLINK_DEST_BEFORE_OPENING
+ UNLINK_DEST_BEFORE_OPENING,
+ PREALLOCATE_OPTION
};
/* True if the kernel is SELinux enabled. */
@@ -90,6 +91,9 @@
as its destination instead of the usual "e_dir/e_file." */
static bool parents_option = false;
+/* For --preallocate option */
+static bool preallocate_option = false;
+
/* Remove any trailing slashes from each SOURCE argument. */
static bool remove_trailing_slashes;
@@ -130,6 +134,7 @@
{"one-file-system", no_argument, NULL, 'x'},
{"parents", no_argument, NULL, PARENTS_OPTION},
{"path", no_argument, NULL, PARENTS_OPTION}, /* Deprecated. */
+ {"preallocate", no_argument, NULL, PREALLOCATE_OPTION},
{"preserve", optional_argument, NULL, PRESERVE_ATTRIBUTES_OPTION},
{"recursive", no_argument, NULL, 'R'},
{"remove-destination", no_argument, NULL, UNLINK_DEST_BEFORE_OPENING},
@@ -195,6 +200,15 @@
-P, --no-dereference never follow symbolic links in SOURCE\n\
"), stdout);
fputs (_("\
+ --preallocate pre-allocate space for destination files
before\n\
+ copying data. This can significantly reduce\n\
+ fragmentation and allows an early exit if\n\
+ there would not be enough free space. It
can\n\
+ also increase the likelihood of successful\n\
+ data recovery after filesystem corruption\n\
+ since data is more likely to be contiguous.\n\
+"), stdout);
+ fputs (_("\
-p same as --preserve=mode,ownership,timestamps\n\
--preserve[=ATTR_LIST] preserve the specified attributes (default:\n\
mode,ownership,timestamps), if possible\n\
@@ -779,6 +793,7 @@
x->one_file_system = false;
x->reflink_mode = REFLINK_NEVER;
+ x->preallocate = false;
x->preserve_ownership = false;
x->preserve_links = false;
x->preserve_mode = false;
@@ -1040,6 +1055,10 @@
parents_option = true;
break;
+ case PREALLOCATE_OPTION:
+ x.preallocate = true;
+ break;
+
case 'r':
case 'R':
x.recursive = true;
diff -Naur coreutils-8.17/src/mv.c my_coreutils-8.17/src/mv.c
--- coreutils-8.17/src/mv.c 2012-05-01 21:55:08.000000000 +0100
+++ my_coreutils-8.17/src/mv.c 2012-05-11 13:09:54.069217905 +0100
@@ -132,6 +132,7 @@
x->stdin_tty = isatty (STDIN_FILENO);
x->open_dangling_dest_symlink = false;
+ x->preallocate = false; /* FIXME: Add support for --preallocate */
x->update = false;
x->verbose = false;
x->dest_info = NULL;