Hello All, Below is my patches to isolate the extents scan and fetch functions in a new module to improve its extendibility.
It introduce a new file 'src/extent-scan.c' to place those functions, and then call those functions from extents_copy() at copy_reg() to process the regular file copy. In addition to this, another major change is to copy all data if '--sparse=never' option is specified. It write all data to destination file but using extents_copy() if available for efficient read source file, and it try to figure out the holes between the previous and current extents, and call fill_with_holes_ok() to write zeros as holes to the destination file if it is. Call file_with_holes_ok() to write zeros up to the source file size if hit the last extent of the source file and there is a hole behind it. I have not implement the solaris lseek(2) at the moment for lack of solaris environment, it need to delay a period of time. According to my tryout, it works for those 4 filesystems in common use, you all know. As usual, any comments are welcome! >From 70773fdf1d85ba070e054b0467a7a0e1e2b00ea8 Mon Sep 17 00:00:00 2001 From: Jie Liu <[email protected]> Date: Tue, 20 Jul 2010 20:35:25 +0800 Subject: [PATCH 1/3] cp: delete fiemap_copy() related stuff from copy.c * delete fiemap_copy(), now it is implemented as a module. Signed-off-by: Jie Liu <[email protected]> --- src/copy.c | 171 ------------------------------------------------------------ 1 files changed, 0 insertions(+), 171 deletions(-) diff --git a/src/copy.c b/src/copy.c index f48c74d..171499c 100644 --- a/src/copy.c +++ b/src/copy.c @@ -63,10 +63,6 @@ #include <sys/ioctl.h> -#ifndef HAVE_FIEMAP -# include "fiemap.h" -#endif - #ifndef HAVE_FCHOWN # define HAVE_FCHOWN false # define fchown(fd, uid, gid) (-1) @@ -153,153 +149,6 @@ clone_file (int dest_fd, int src_fd) #endif } -#ifdef __linux__ -# ifndef FS_IOC_FIEMAP -# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) -# endif -/* Perform a FIEMAP copy, if possible. - Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to - obtain a map of file extents excluding holes. This avoids the - overhead of detecting holes in a hole-introducing/preserving copy, - and thus makes copying sparse files much more efficient. Upon a - successful copy, return true. If the initial ioctl fails, set - *NORMAL_COPY_REQUIRED to true and return false. Upon any other - failure, set *NORMAL_COPY_REQUIRED to false and return false. */ -static bool -fiemap_copy (int src_fd, int dest_fd, size_t buf_size, - off_t src_total_size, char const *src_name, - char const *dst_name, bool *normal_copy_required) -{ - bool last = false; - union { struct fiemap f; char c[4096]; } fiemap_buf; - struct fiemap *fiemap = &fiemap_buf.f; - struct fiemap_extent *fm_ext = &fiemap->fm_extents[0]; - enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_ext }; - verify (count != 0); - - off_t last_ext_logical = 0; - uint64_t last_ext_len = 0; - uint64_t last_read_size = 0; - unsigned int i = 0; - *normal_copy_required = false; - - /* This is required at least to initialize fiemap->fm_start, - but also serves (in mid 2010) to appease valgrind, which - appears not to know the semantics of the FIEMAP ioctl. */ - memset (&fiemap_buf, 0, sizeof fiemap_buf); - - do - { - fiemap->fm_length = FIEMAP_MAX_OFFSET; - fiemap->fm_flags = FIEMAP_FLAG_SYNC; - fiemap->fm_extent_count = count; - - /* When ioctl(2) fails, fall back to the normal copy only if it - is the first time we met. */ - if (ioctl (src_fd, FS_IOC_FIEMAP, fiemap) < 0) - { - /* If the first ioctl fails, tell the caller that it is - ok to proceed with a normal copy. */ - if (i == 0) - *normal_copy_required = true; - else - { - /* If the second or subsequent ioctl fails, diagnose it, - since it ends up causing the entire copy/cp to fail. */ - error (0, errno, _("%s: FIEMAP ioctl failed"), quote (src_name)); - } - return false; - } - - /* If 0 extents are returned, then more ioctls are not needed. */ - if (fiemap->fm_mapped_extents == 0) - break; - - for (i = 0; i < fiemap->fm_mapped_extents; i++) - { - assert (fm_ext[i].fe_logical <= OFF_T_MAX); - - off_t ext_logical = fm_ext[i].fe_logical; - uint64_t ext_len = fm_ext[i].fe_length; - - if (lseek (src_fd, ext_logical, SEEK_SET) < 0) - { - error (0, errno, _("cannot lseek %s"), quote (src_name)); - return false; - } - - if (lseek (dest_fd, ext_logical, SEEK_SET) < 0) - { - error (0, errno, _("cannot lseek %s"), quote (dst_name)); - return false; - } - - if (fm_ext[i].fe_flags & FIEMAP_EXTENT_LAST) - { - last_ext_logical = ext_logical; - last_ext_len = ext_len; - last = true; - } - - while (ext_len) - { - char buf[buf_size]; - - /* Avoid reading into the holes if the left extent - length is shorter than the buffer size. */ - if (ext_len < buf_size) - buf_size = ext_len; - - ssize_t n_read = read (src_fd, buf, buf_size); - if (n_read < 0) - { -#ifdef EINTR - if (errno == EINTR) - continue; -#endif - error (0, errno, _("reading %s"), quote (src_name)); - return false; - } - - if (n_read == 0) - { - /* Figure out how many bytes read from the last extent. */ - last_read_size = last_ext_len - ext_len; - break; - } - - if (full_write (dest_fd, buf, n_read) != n_read) - { - error (0, errno, _("writing %s"), quote (dst_name)); - return false; - } - - ext_len -= n_read; - } - } - - fiemap->fm_start = fm_ext[i - 1].fe_logical + fm_ext[i - 1].fe_length; - - } while (! last); - - /* If a file ends up with holes, the sum of the last extent logical offset - and the read-returned size will be shorter than the actual size of the - file. Use ftruncate to extend the length of the destination file. */ - if (last_ext_logical + last_read_size < src_total_size) - { - if (ftruncate (dest_fd, src_total_size) < 0) - { - error (0, errno, _("failed to extend %s"), quote (dst_name)); - return false; - } - } - - return true; -} -#else -static bool fiemap_copy (ignored) { errno == ENOTSUP; return false; } -#endif - /* FIXME: describe */ /* FIXME: rewrite this to use a hash table so we avoid the quadratic performance hit that's probably noticeable only on trees deeper @@ -830,25 +679,6 @@ copy_reg (char const *src_name, char const *dst_name, #endif } - if (make_holes) - { - bool require_normal_copy; - /* Perform efficient FIEMAP copy for sparse files, fall back to the - standard copy only if the ioctl(2) fails. */ - if (fiemap_copy (source_desc, dest_desc, buf_size, - src_open_sb.st_size, src_name, - dst_name, &require_normal_copy)) - goto preserve_metadata; - else - { - if (! require_normal_copy) - { - return_val = false; - goto close_src_and_dst_desc; - } - } - } - /* If not making a sparse file, try to use a more-efficient buffer size. */ if (! make_holes) @@ -977,7 +807,6 @@ copy_reg (char const *src_name, char const *dst_name, } } -preserve_metadata: if (x->preserve_timestamps) { struct timespec timespec[2]; -- 1.5.4.3 >From f083f1a52ec5baba90aa228c1053f4a32127b3b2 Mon Sep 17 00:00:00 2001 From: Jie Liu <[email protected]> Date: Tue, 20 Jul 2010 23:10:29 +0800 Subject: [PATCH 2/3] cp: add a new module for scanning extents * src/extent-scan.c: Source code for scanning extents. Call init_extent_scan() to return an extents map. Call get_next_extent() to get the next extent for each iteration. Call close_extent_scan() to tear down the scan, it now do nothing. * src/extent-scan.h: Header file of extent-scan.c. * src/Makefile.am: Reference it. Signed-off-by: Jie Liu <[email protected]> --- src/Makefile.am | 3 +- src/extent-scan.c | 137 +++++++++++++++++++++++++++++++++++++++++++++++++++++ src/extent-scan.h | 25 ++++++++++ 3 files changed, 164 insertions(+), 1 deletions(-) create mode 100644 src/extent-scan.c create mode 100644 src/extent-scan.h diff --git a/src/Makefile.am b/src/Makefile.am index 7d56312..fb8186c 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -145,6 +145,7 @@ noinst_HEADERS = \ copy.h \ cp-hash.h \ dircolors.h \ + extent-scan.h \ fiemap.h \ fs.h \ group-list.h \ @@ -459,7 +460,7 @@ uninstall-local: fi; \ fi -copy_sources = copy.c cp-hash.c +copy_sources = copy.c cp-hash.c extent-scan.c # Use `ginstall' in the definition of PROGRAMS and in dependencies to avoid # confusion with the `install' target. The install rule transforms `ginstall' diff --git a/src/extent-scan.c b/src/extent-scan.c new file mode 100644 index 0000000..c4085e0 --- /dev/null +++ b/src/extent-scan.c @@ -0,0 +1,137 @@ +/* extent-scan.c -- core functions for scanning extents + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + + Written by Jie Liu ([email protected]). */ + +#include <config.h> +#include <stdio.h> +#include <sys/types.h> +#include <sys/ioctl.h> + +#include "system.h" +#include "extent-scan.h" +#include "error.h" +#include "quote.h" + +#ifndef HAVE_FIEMAP +# include "fiemap.h" +#endif + +/* The number of extents currently scan returned. */ +static size_t current_scanned_extents_count = 0; + +#ifdef __linux__ +# ifndef FS_IOC_FIEMAP +# define FS_IOC_FIEMAP _IOWR ('f', 11, struct fiemap) +# endif +/* Call ioctl(2) with FS_IOC_FIEMAP (available in linux 2.6.27) to + obtain a map of file extents excluding holes. */ +extern void * +init_extent_scan (int src_fd, const char *src_name, + bool *normal_copy_required, + bool *hit_last_extent) +{ + union { struct fiemap f; char c[4096]; } fiemap_buf; + struct fiemap *fiemap = &fiemap_buf.f; + struct fiemap_extent *fm_extents = &fiemap->fm_extents[0]; + enum { count = (sizeof fiemap_buf - sizeof *fiemap) / sizeof *fm_extents }; + verify (count != 0); + static uint64_t next_map_start = 0; + + /* This is required at least to initialize fiemap->fm_start, + * but also serves (in mid 2010) to appease valgrind, which + * appears not to know the semantics of the FIEMAP ioctl. */ + memset (&fiemap_buf, 0, sizeof fiemap_buf); + + fiemap->fm_start = next_map_start; + fiemap->fm_flags = FIEMAP_FLAG_SYNC; + fiemap->fm_extent_count = count; + fiemap->fm_length = FIEMAP_MAX_OFFSET - next_map_start; + + /* When ioctl(2) fails, fall back to the normal copy only if it + is the first time we met. */ + if (ioctl (src_fd, FS_IOC_FIEMAP, fiemap) < 0) + { + error (0, errno, _("%s: FIEMAP ioctl failed"), quote (src_name)); + + if (next_map_start == 0) + *normal_copy_required = true; + + return NULL; + } + + /* If 0 extents are returned, then more init_extent_scan() are not needed. */ + if (fiemap->fm_mapped_extents == 0) + { + *hit_last_extent = true; + return NULL; + } + + current_scanned_extents_count = fiemap->fm_mapped_extents; + unsigned int last_extent_index = current_scanned_extents_count - 1; + + if (fm_extents[last_extent_index].fe_flags & FIEMAP_EXTENT_LAST) + { + *hit_last_extent = true; + return fm_extents; + } + + next_map_start = fm_extents[last_extent_index].fe_logical + + fm_extents[last_extent_index].fe_length; + + return fm_extents; +} + +/* Return an extent's logical offset and length for each iteration. */ +extern bool +get_next_extent (void *scanned_extents, off_t *extent_logical, + uint64_t *extent_length) +{ + static size_t i = 0; + struct fiemap_extent *fm_extents = (struct fiemap_extent *) scanned_extents; + + if (i < current_scanned_extents_count) + { + *extent_logical = fm_extents[i].fe_logical; + *extent_length = fm_extents[i].fe_length; + i++; + return true; + } + + return false; +} + +extern void +close_extent_scan (void) +{ + return ; +} +#else +extent void * +init_extent_scan (int src_fd, const char *src_name, + bool *hit_last_extent, bool *normal_copy_required) +{ + *normal_copy_required = true; + errno = ENOTSUP; + (void) src_fd; + (void) src_name; + (void) hit_last_extent; + (void) normal_copy_required; + return false; +} +extern bool get_next_extent (ignored) { errno = ENOTSUP; return false; } +extern void close_extent_scan (ignored) { error = ENOTSUP; return ; } +#endif diff --git a/src/extent-scan.h b/src/extent-scan.h new file mode 100644 index 0000000..e7e373f --- /dev/null +++ b/src/extent-scan.h @@ -0,0 +1,25 @@ +/* core functions for efficient reading sparse files + Copyright (C) 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#ifndef SPARSE_CORE_H +# define SPARSE_CORE_H + +void *init_extent_scan (int src_fd, const char *src_name, + bool *normal_copy_required, + bool *hit_last_extent); +bool get_next_extent (void *scanned_extents, off_t *extent_logical, uint64_t *extent_length); +void close_extent_scan (void); +#endif /* SPARSE_CORE_H */ -- 1.5.4.3 >From f23169c2c1721b8888dccb77000f79ddf9804df0 Mon Sep 17 00:00:00 2001 From: Jie Liu <[email protected]> Date: Tue, 20 Jul 2010 23:11:08 +0800 Subject: [PATCH 3/3] cp: add extents_copy() for efficient sparse file copy * src/copy.c (copy_reg): Now, `cp' attempt to make use of the new extent scan module for efficient sparse file copy through extents_copy(), fall back to a normal copy if the underlaying file system does not support it. We honor --sparse=never by writing all data but use extents scan if available to read source file for the efficiently read. * src/copy.c: Add fill_with_holes_ok() for write zeros as holes to destination file. * po/POTFILES.in: add extent-scan.c to it. Signed-off-by: Jie Liu <[email protected]> --- po/POTFILES.in | 1 + src/copy.c | 190 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 191 insertions(+), 0 deletions(-) diff --git a/po/POTFILES.in b/po/POTFILES.in index c862877..2ac1993 100644 --- a/po/POTFILES.in +++ b/po/POTFILES.in @@ -60,6 +60,7 @@ src/echo.c src/env.c src/expand.c src/expr.c +src/extent-scan.c src/factor.c src/false.c src/fmt.c diff --git a/src/copy.c b/src/copy.c index 171499c..6d89bbe 100644 --- a/src/copy.c +++ b/src/copy.c @@ -36,6 +36,7 @@ #include "copy.h" #include "cp-hash.h" #include "error.h" +#include "extent-scan.h" #include "fcntl--.h" #include "file-set.h" #include "filemode.h" @@ -149,6 +150,176 @@ clone_file (int dest_fd, int src_fd) #endif } +/* Write zeros as holes to the destination file. */ +static bool +fill_with_holes_ok (int dest_fd, const char *dst_name, + char *buf, size_t buf_size, + uint64_t holes_len) +{ + while (buf_size < holes_len) + { + if (full_write (dest_fd, buf, buf_size) != buf_size) + { + error (0, errno, _("writing %s"), quote (dst_name)); + return false; + } + holes_len -= buf_size; + } + + if (0 < holes_len) + { + if (full_write (dest_fd, buf, holes_len) != holes_len) + { + error (0, errno, _("writing %s"), quote (dst_name)); + return false; + } + } + + return true; +} + +/* Perform an efficient extents copy, if possible. This avoids + the overhead of detecting holes in hole-introducing/preserving + copy, and thus make copying sparse files much more efficient. + Upon a successful copy, return true. If the first initial extent + scan fails, set *NORMAL_COPY_REQUIRED to true and return false. + Upon any other failure, set *NORMAL_COPY_REQUIRED to false and + return false. */ +static bool +extents_copy (int source_desc, int dest_desc, + char *buf, size_t buf_size, + const char *src_name, const char *dst_name, + bool make_holes, size_t src_total_size, + bool *require_normal_copy) +{ + bool init_extent_scan_failed = false; + bool hit_last_extent = false; + void *scanned_extents; + off_t last_ext_logical = 0; + off_t ext_logical = 0; + uint64_t last_ext_len = 0; + uint64_t ext_len = 0; + uint64_t holes_len = 0; + uint64_t last_read_size = 0; + + if (! make_holes) + memset (buf, 0, buf_size); + + do + { + scanned_extents = init_extent_scan (source_desc, src_name, + &init_extent_scan_failed, + &hit_last_extent); + if (init_extent_scan_failed) + { + *require_normal_copy = true; + return false; + } + + while (get_next_extent (scanned_extents, &ext_logical, &ext_len)) + { + assert (ext_logical <= OFF_T_MAX); + + if (lseek (source_desc, ext_logical, SEEK_SET) < 0) + { + error (0, errno, _("cannot lseek %s"), quote (src_name)); + return false; + } + + if (make_holes) + { + if (lseek (dest_desc, ext_logical, SEEK_SET) < 0) + { + error (0, errno, _("cannot lseek %s"), quote (dst_name)); + return false; + } + } + else + { + /* If not making a sparse file, write zeros to the destination + file if there is a hole between the last and current extent. */ + if (last_ext_logical + last_ext_len < ext_logical) + { + holes_len = ext_logical - last_ext_logical - last_ext_len; + if (! fill_with_holes_ok (dest_desc, dst_name, buf, buf_size, holes_len)) + return false; + } + } + + last_ext_logical = ext_logical; + last_ext_len = ext_len; + + last_read_size = 0; + while (ext_len) + { + /* Avoid reading into the holes if the left extent + length is shorter than the buffer size. */ + if (ext_len < buf_size) + buf_size = ext_len; + + ssize_t n_read = read (source_desc, buf, buf_size); + if (n_read < 0) + { +#ifdef EINTR + if (errno == EINTR) + continue; +#endif + error (0, errno, _("reading %s"), quote (src_name)); + return false; + } + + if (n_read == 0) + { + last_read_size = last_ext_len - ext_len; + break; + } + + if (full_write (dest_desc, buf, n_read) != n_read) + { + error (0, errno, _("writing %s"), quote (dst_name)); + return false; + } + + ext_len -= n_read; + last_read_size += n_read; + } + } + } while (! hit_last_extent); + + close_extent_scan (); + + /* If a file ends up with holes, the sum of the last extent logical offset + and the read-returned size or the last extent length will be shorter than + the actual size of the file. Use ftruncate to extend the length of the + destination file if make_holes, or write zeros up to the actual size of the + file. */ + if (make_holes) + { + if (last_ext_logical + last_read_size < src_total_size) + { + if (ftruncate (dest_desc, src_total_size) < 0) + { + error (0, errno, _("failed to extend %s"), quote (dst_name)); + return false; + } + } + } + else + { + if (last_ext_logical + last_ext_len < src_total_size) + { + holes_len = src_total_size - last_ext_logical - last_ext_len; + if (0 < holes_len) + { + if (! fill_with_holes_ok (dest_desc, dst_name, buf, buf_size, holes_len)) + return false; + } + } + } + + return true; +} + /* FIXME: describe */ /* FIXME: rewrite this to use a hash table so we avoid the quadratic performance hit that's probably noticeable only on trees deeper @@ -707,6 +878,24 @@ copy_reg (char const *src_name, char const *dst_name, buf_alloc = xmalloc (buf_size + buf_alignment_slop); buf = ptr_align (buf_alloc, buf_alignment); + bool require_normal_copy; + /* Perform efficient extents copy for sparse files, fall back to the + standard copy only if the first initial extent scan fails. If the + `--sparse=never' option was specified, we writing all data but use + extents copy if available to efficiently read. */ + if (extents_copy (source_desc, dest_desc, buf, buf_size, + src_name, dst_name, make_holes, + src_open_sb.st_size, &require_normal_copy)) + goto preserve_metadata; + else + { + if (! require_normal_copy) + { + return_val = false; + goto close_src_and_dst_desc; + } + } + while (true) { word *wp = NULL; @@ -807,6 +996,7 @@ copy_reg (char const *src_name, char const *dst_name, } } +preserve_metadata: if (x->preserve_timestamps) { struct timespec timespec[2]; -- 1.5.4.3 Thanks, -Jeff -- The knowledge you get, no matter how much it is, must be possessed yourself and nourished with your own painstaking efforts and be your achievement through hard work.
