Hi All, This is the patchset for fiemap copy.
The 1st is to add 'fiemap.h' to 'coreutils/src'. The 2nd is the actuall fiemap copy code. I have combined both of them in one mail, maybe add the complexity for your guys review, sorry for that. :) >From 9021a61989c87e03bd5e0d5735bb710ae972fcbd Mon Sep 17 00:00:00 2001 From: Jie Liu <[email protected]> Date: Sun, 4 Apr 2010 21:32:50 +0800 Subject: [PATCH 1/2] Add fiemap.h for fiemap ioctl(2) support. It does not shipped by default, so I copy it from kernel at the moment. I have update its code style respect to GNU code requirements. Signed-off-by: Jie Liu <[email protected]> --- src/fiemap.h | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 102 insertions(+), 0 deletions(-) create mode 100644 src/fiemap.h diff --git a/src/fiemap.h b/src/fiemap.h new file mode 100644 index 0000000..d33293b --- /dev/null +++ b/src/fiemap.h @@ -0,0 +1,102 @@ +/* FS_IOC_FIEMAP ioctl infrastructure. + Some portions copyright (C) 2007 Cluster File Systems, Inc + Authors: Mark Fasheh <[email protected]> + Kalpak Shah <[email protected]> + Andreas Dilger <[email protected]>. */ + +/* Copy from kernel, modified to respect GNU code style by Jie Liu. */ + +#ifndef _LINUX_FIEMAP_H +# define _LINUX_FIEMAP_H + +# include <linux/types.h> + +struct fiemap_extent +{ + /* Logical offset in bytes for the start of the extent + from the beginning of the file. */ + uint64_t fe_logical; + + /* Physical offset in bytes for the start of the extent + from the beginning of the disk. */ + uint64_t fe_physical; + + /* Length in bytes for this extent. */ + uint64_t fe_length; + + uint64_t fe_reserved64[2]; + + /* FIEMAP_EXTENT_* flags for this extent. */ + uint32_t fe_flags; + + uint32_t fe_reserved[3]; +}; + +struct fiemap +{ + /* Logical offset(inclusive) at which to start mapping(in). */ + uint64_t fm_start; + + /* Logical length of mapping which userspace wants(in). */ + uint64_t fm_length; + + /* FIEMAP_FLAG_* flags for request(in/out). */ + uint32_t fm_flags; + + /* Number of extents that were mapped(out). */ + uint32_t fm_mapped_extents; + + /* Size of fm_extents array(in). */ + uint32_t fm_extent_count; + + uint32_t fm_reserved; + + /* Array of mapped extents(out). */ + struct fiemap_extent fm_extents[0]; +}; + +/* The maximum offset can be mapped for a file. */ +# define FIEMAP_MAX_OFFSET (~0ULL) + +/* Sync file data before map. */ +# define FIEMAP_FLAG_SYNC 0x00000001 + +/* Map extented attribute tree. */ +# define FIEMAP_FLAG_XATTR 0x00000002 + +# define FIEMAP_FLAGS_COMPAT (FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR) + +/* Last extent in file. */ +# define FIEMAP_EXTENT_LAST 0x00000001 + +/* Data location unknown. */ +# define FIEMAP_EXTENT_UNKNOWN 0x00000002 + +/* Location still pending, Sets EXTENT_UNKNOWN. */ +# define FIEMAP_EXTENT_DELALLOC 0x00000004 + +/* Data can not be read while fs is unmounted. */ +# define FIEMAP_EXTENT_ENCODED 0x00000008 + +/* Data is encrypted by fs. Sets EXTENT_NO_BYPASS. */ +# define FIEMAP_EXTENT_DATA_ENCRYPTED 0x00000080 + +/* Extent offsets may not be block aligned. */ +# define FIEMAP_EXTENT_NOT_ALIGNED 0x00000100 + +/* Data mixed with metadata. Sets EXTENT_NOT_ALIGNED. */ +# define FIEMAP_EXTENT_DATA_INLINE 0x00000200 + +/* Multiple files in block. Set EXTENT_NOT_ALIGNED. */ +# define FIEMAP_EXTENT_DATA_TAIL 0x00000400 + +/* Space allocated, but not data (i.e. zero). */ +# define FIEMAP_EXTENT_UNWRITTEN 0x00000800 + +/* File does not natively support extents. Result merged for efficiency. */ +# define FIEMAP_EXTENT_MERGED 0x00001000 + +/* Space shared with other files. */ +# define FIEMAP_EXTENT_SHARED 0x00002000 + +#endif -- 1.5.4.3 >From c13ede4188be648412742a9fb81565f3d8e9899e Mon Sep 17 00:00:00 2001 From: Jie Liu <[email protected]> Date: Sun, 4 Apr 2010 21:40:33 +0800 Subject: [PATCH 2/2] Add fiemap copy for cp(1). This feature is intended to for optimization of backup sparse files. Fiemap copy can be trigger via 'cp --fiemap=[WHEN]', if 'fiemap=auto' specify and the underlying FS does not support FIEMAP or fiemap copy failed, fall back to normal copy. Signed-off-by: Jie Liu <[email protected]> --- src/copy.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/copy.h | 27 ++++++++++ src/cp.c | 51 ++++++++++++++++++- 3 files changed, 242 insertions(+), 3 deletions(-) diff --git a/src/copy.c b/src/copy.c index 29f37c9..3d03b2e 100644 --- a/src/copy.c +++ b/src/copy.c @@ -65,6 +65,10 @@ # include <sys/ioctl.h> #endif +#ifndef HAVE_FIEMAP +# include "fiemap.h" +#endif + #ifndef HAVE_FCHOWN # define HAVE_FCHOWN false # define fchown(fd, uid, gid) (-1) @@ -151,6 +155,138 @@ clone_file (int dest_fd, int src_fd) #endif } +#ifdef __linux__ +# ifndef FS_IOC_FIEMAP +# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) +# endif +/* Perform FIEMAP(available in mainline 2.6.27) copy if possible. + Call ioctl(2) with FS_IOC_FIEMAP to efficiently map file allocation + excepts holes. So the overhead to deal with holes with lseek(2) in + normal copy could be saved. This would result in much faster backups + for any kind of sparse file. */ +static bool +fiemap_copy (int src_fd, int dest_fd, size_t optimal_buf_size, + off_t src_total_size, uint32_t fiemap_flags, + char const *src_name, char const *dst_name) +{ + int last = 0; + unsigned int i; + bool return_val = true; + char fiemap_buf[4096] = ""; + struct fiemap *fiemap = (struct fiemap *)fiemap_buf; + struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; + uint32_t count = (sizeof (fiemap_buf) - sizeof (*fiemap)) / + sizeof (struct fiemap_extent); + uint64_t last_ext_logical = 0; + uint64_t last_ext_len = 0; + uint64_t last_read_size = 0; + + memset (fiemap, 0, sizeof (*fiemap)); + + do + { + fiemap->fm_start = 0ULL; + fiemap->fm_length = FIEMAP_MAX_OFFSET; + fiemap->fm_flags |= fiemap_flags; + fiemap->fm_extent_count = count; + + /* If the underlaying filesystem does not support FIEMAP or + the fiemap flags specified, fall back to do normal copy + if fiemap_mode == FIEMAP_AUTO. */ + if (ioctl (src_fd, FS_IOC_FIEMAP, (unsigned long) fiemap) < 0) + return false; + + /* If 0 extents are returned, then more ioctls are not needed. */ + if (fiemap->fm_mapped_extents == 0) + return true; + + for (i = 0; i < fiemap->fm_mapped_extents; i++) + { + uint64_t ext_logical = fm_ext[i].fe_logical; + uint64_t ext_len = fm_ext[i].fe_length; + + if (lseek (src_fd, (off_t) ext_logical, SEEK_SET) < 0LL) + { + error (0, errno, _("cannot lseek %s"), quote (src_name)); + return_val = false; + } + + if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) + { + last_ext_logical = ext_logical; + last_ext_len = ext_len; + last = 1; + } + + char buf[optimal_buf_size]; + while (0 < ext_len) + { + memset (buf, 0, sizeof (buf)); + + /* Avoid reading into the holes if the left extent + length is shorter than the optimal buffer size. */ + if (ext_len < optimal_buf_size) + optimal_buf_size = ext_len; + + ssize_t n_read = read (src_fd, buf, optimal_buf_size); + if (n_read < 0) + { +#ifdef EINTR + if (errno == EINTR) + continue; +#endif + error (0, errno, _("reading %s"), quote (src_name)); + return_val = false; + } + + if (n_read == 0) + { + /* Figure out how many bytes read from the last extent. */ + last_read_size = last_ext_len - ext_len; + break; + } + + if (full_write (dest_fd, buf, n_read) != n_read) + { + error (0, errno, _("writing %s"), quote (dst_name)); + return_val = false; + } + + ext_len -= n_read; + } + + fiemap->fm_start = (fm_ext[i-1].fe_logical + fm_ext[i-1].fe_length); + } + } while (last == 0); + + /* FIEMAP only return the allocated extents info except holes for sparse files. + the extent length always aligned to the filesystem block size. In many cases, + the sum of the last extent logical offset and its length does not precise equal to + the file size in bytes. So we need to do something to record the length of the file. + On modern systems, calling ftruncate does the job. On systems without native + ftruncate support, we have to write a byte at the ending position. + Otherwise the kernel would truncate the file at the end of the last write operation. */ + if (last_ext_logical + last_read_size < src_total_size) + { + if (HAVE_FTRUNCATE + ? /* ftruncate sets the file size, + so there is no need for a write. */ + ftruncate (dest_fd, src_total_size) < 0 + : /* Seek backwards one character and write a null. */ + (lseek (dest_fd, (off_t) -1, SEEK_SET) < 0L + || full_write (dest_fd, "", 1) != 1)) + { + error (0, errno, _("writing %s"), quote (dst_name)); + return_val = false; + } + } + + return return_val; +} +#else +static bool fiemap_copy (ignored) { errno == ENOTSUP; return false; } +#endif + /* FIXME: describe */ /* FIXME: rewrite this to use a hash table so we avoid the quadratic performance hit that's probably noticeable only on trees deeper @@ -703,6 +839,29 @@ copy_reg (char const *src_name, char const *dst_name, buf_size = blcm; } + if (x->fiemap_mode) + { + uint32_t fiemap_flags = 0; + + if (x->fiemap_sync) + fiemap_flags |= FIEMAP_FLAG_SYNC; + + off_t src_total_size = src_open_sb.st_size; + bool fiemap_copy_ok = fiemap_copy (source_desc, dest_desc, buf_size, + src_total_size, fiemap_flags, src_name, + dst_name); + if (fiemap_copy_ok) + goto preserve_extra_info; + else + { + if (x->fiemap_mode == FIEMAP_ALWAYS) + { + error (0, errno, _("FIEMAP copy failed %s"), quote (src_name)); + goto close_src_and_dst_desc; + } + } + } + /* Make a buffer with space for a sentinel at the end. */ buf_alloc = xmalloc (buf_size + buf_alignment_slop); buf = ptr_align (buf_alloc, buf_alignment); @@ -813,6 +972,7 @@ copy_reg (char const *src_name, char const *dst_name, } } +preserve_extra_info: if (x->preserve_timestamps) { struct timespec timespec[2]; @@ -901,8 +1061,11 @@ close_src_desc: return_val = false; } - free (buf_alloc); - free (name_alloc); + if (buf_alloc) + free (buf_alloc); + if (name_alloc) + free (name_alloc); + return return_val; } diff --git a/src/copy.h b/src/copy.h index bd7359f..0bd407d 100644 --- a/src/copy.h +++ b/src/copy.h @@ -56,6 +56,19 @@ enum Reflink_type REFLINK_ALWAYS }; +/* Control of FIEMAP copy. */ +enum Fiemap_type +{ + /* Default to a standard copy. */ + FIEMAP_NEVER, + + /* Try a FIEMAP copy and fall back to a standard copy. */ + FIEMAP_AUTO, + + /* Require a FIEMAP copy and fail if not available. */ + FIEMAP_ALWAYS +}; + /* This type is used to help mv (via copy.c) distinguish these cases. */ enum Interactive { @@ -91,6 +104,11 @@ enum Dereference_symlink || (Mode) == REFLINK_AUTO \ || (Mode) == REFLINK_ALWAYS) +# define VALID_FIEMAP_MODE(Mode) \ + ((Mode) == FIEMAP_NEVER \ + || (Mode) == FIEMAP_AUTO \ + || (Mode) == FIEMAP_ALWAYS) + /* These options control how files are copied by at least the following programs: mv (when rename doesn't work), cp, install. So, if you add a new member, be sure to initialize it in @@ -237,9 +255,18 @@ struct cp_options such a symlink) and returns false. */ bool open_dangling_dest_symlink; + /* If true, set fiemap ioctl flags with FIEMAP_FLAG_SYNC. */ + bool fiemap_sync; + + /* If true, set fiemap ioctl flags with FIEMAP_FLAG_XATTR. */ + bool fiemap_xattr; + /* Control creation of COW files. */ enum Reflink_type reflink_mode; + /* Control of FIEMAP type file copy. */ + enum Fiemap_type fiemap_mode; + /* This is a set of destination name/inode/dev triples. Each such triple represents a file we have created corresponding to a source file name that was specified on the command line. Use it to avoid clobbering diff --git a/src/cp.c b/src/cp.c index cc958d1..d21198a 100644 --- a/src/cp.c +++ b/src/cp.c @@ -78,8 +78,11 @@ enum PRESERVE_ATTRIBUTES_OPTION, REFLINK_OPTION, SPARSE_OPTION, + FIEMAP_OPTION, STRIP_TRAILING_SLASHES_OPTION, - UNLINK_DEST_BEFORE_OPENING + UNLINK_DEST_BEFORE_OPENING, + FIEMAP_FLAG_SYNC_OPTION, + FIEMAP_FLAG_XATTR_OPTION }; /* True if the kernel is SELinux enabled. */ @@ -112,6 +115,16 @@ static enum Reflink_type const reflink_type[] = }; ARGMATCH_VERIFY (reflink_type_string, reflink_type); +static char const *const fiemap_type_string[] = +{ + "auto", "always", NULL +}; +static enum Fiemap_type const fiemap_type[] = +{ + FIEMAP_AUTO, FIEMAP_ALWAYS +}; +ARGMATCH_VERIFY (fiemap_type_string, fiemap_type); + static struct option const long_opts[] = { {"archive", no_argument, NULL, 'a'}, @@ -133,6 +146,8 @@ static struct option const long_opts[] = {"remove-destination", no_argument, NULL, UNLINK_DEST_BEFORE_OPENING}, {"sparse", required_argument, NULL, SPARSE_OPTION}, {"reflink", optional_argument, NULL, REFLINK_OPTION}, + {"fiemap", optional_argument, NULL, FIEMAP_OPTION}, + {"fiemap-sync", optional_argument, NULL, FIEMAP_FLAG_SYNC_OPTION}, {"strip-trailing-slashes", no_argument, NULL, STRIP_TRAILING_SLASHES_OPTION}, {"suffix", required_argument, NULL, 'S'}, {"symbolic-link", no_argument, NULL, 's'}, @@ -212,6 +227,10 @@ Mandatory arguments to long options are mandatory for short options too.\n\ argument\n\ "), stdout); fputs (_("\ + --fiemap[=WHEN] control fiemap copies. See below\n\ + --fiemap-sync sync file data before fiemap\n\ +"), stdout); + fputs (_("\ -s, --symbolic-link make symbolic links instead of copying\n\ -S, --suffix=SUFFIX override the usual backup suffix\n\ -t, --target-directory=DIRECTORY copy all SOURCE arguments into DIRECTORY\n\ @@ -237,6 +256,10 @@ Use --sparse=never to inhibit creation of sparse files.\n\ When --reflink[=always] is specified, perform a lightweight copy, where the\n\ data blocks are copied only when modified. If this is not possible the copy\n\ fails, or if --reflink=auto is specified, fall back to a standard copy.\n\ +\n\ +When --fiemap[=always] is specified, perform a fiemap copy, where the\n\ +allocated data blocks are copied except holes. If this is not possible the\n\ +copy fails, or if --fiemap=auto is specified, fall back to a standard copy.\n\ "), stdout); fputs (_("\ \n\ @@ -770,6 +793,8 @@ cp_option_init (struct cp_options *x) x->move_mode = false; x->one_file_system = false; x->reflink_mode = REFLINK_NEVER; + x->fiemap_mode = FIEMAP_NEVER; + x->fiemap_sync = false; x->preserve_ownership = false; x->preserve_links = false; @@ -942,6 +967,18 @@ main (int argc, char **argv) reflink_type_string, reflink_type); break; + case FIEMAP_OPTION: + if (optarg == NULL) + x.fiemap_mode = FIEMAP_ALWAYS; + else + x.fiemap_mode = XARGMATCH ("--fiemap", optarg, + fiemap_type_string, fiemap_type); + break; + + case FIEMAP_FLAG_SYNC_OPTION: + x.fiemap_sync = true; + break; + case 'a': /* Like -dR --preserve=all with reduced failure diagnostics. */ x.dereference = DEREF_NEVER; x.preserve_links = true; @@ -1108,6 +1145,18 @@ main (int argc, char **argv) usage (EXIT_FAILURE); } + if (x.fiemap_mode == FIEMAP_ALWAYS && x.sparse_mode == SPARSE_NEVER) + { + error (0, 0, _("--fiemap can not be used with --sparse=never")); + usage (EXIT_FAILURE); + } + + if (x.fiemap_sync && !x.fiemap_mode) + { + error (0, 0, _("--fiemap-sync can be used only with --fiemap=[WHEN]")); + usage (EXIT_FAILURE); + } + if (backup_suffix_string) simple_backup_suffix = xstrdup (backup_suffix_string); -- 1.5.4.3 Best Regards, -Jeff
