How hard is it to port sparse file support to UWIN/Windows?
Irek
---------- Forwarded message ----------
From: Roland Mainz <[email protected]>
Date: Tue, Oct 1, 2013 at 1:31 PM
Subject: Re: [ast-developers] [ast-users] Sparse file support for AST
cp/mv/ln ? / was: Re: Fwd: Re: Implementing SEEK_HOLE, SEEK_DATA in
AST cp, mv, pax
To: "[email protected]" <[email protected]>
On Tue, Oct 1, 2013 at 5:50 AM, Roland Mainz <[email protected]> wrote:
> On Mon, Sep 30, 2013 at 7:06 PM, Roland Mainz <[email protected]>
> wrote:
>> On Mon, Sep 30, 2013 at 5:12 PM, Roland Mainz <[email protected]>
>> wrote:
>>> On Mon, Sep 30, 2013 at 4:17 PM, Dan Shelton <[email protected]>
>>> wrote:
>>>> I'm just forwarding the old conversation as a reminder - AST pax still
>>>> does not support SEEK_HOLE or SEEK_DATA (nor does it SUN.holesdata pax
>>>> header), nor do AST cp and mv support files with holes.
>>>>
>>>> As consequence neither AST pax, cp or mv are competitive to any of
>>>> such implementations which support SEEK_HOLE and SEEK_DATA.
>>>>
>>>> For example moving a 200GB file with 99% holes with GNU mv (GNU
>>>> supports SEEK_HOLE/SEEK_DATA since 2010) across filesystem takes less
>>>> than a 4 seconds with GNU mv but takes a WHOPPING 18 minutes with AST
>>>> mv.
>>> [snip]
>>>
>>> Erm...
>>> ... AFAIK a copy-file-data algorithm would be just this:
>>> 1. Test whether $ getconf MIN_HOLE_SIZE <srcpath> # returns a value > 0
>>> 2. If [1] is true then check whether the file has at least one hole
>>> (via |SEEK_HOLE|)
>>> 3. If [2] is true switch to a special version of the data copying code
>>> which "simply" copies data via |write()| until it hits a hole and then
>>> uses |lseek()| to seek forward to the next position and then uses
>>> |write()| again.
>>>
>>> Glenn: Does that sound correct ?
>>
>> Some notes for myself:
> [snip]
>
> Attached (as "astksh20130926_sparsefile_cp001.diff.txt") is a
> prototype patch which adds sparse file support to AST
> cp(1)/mv(1)/ln(1) via using the |SEEK_HOLE|/|SEEK_DATA| API from
> POSIX.
> Additionally I've attached "lsholes.c.txt" which is a small test
> application to show the hole/data layout of a sparse file.
>
> * Notes:
> - |sfmove()| seems to turn longer sequences of '\0\ data into holes.
> While this is usefull _sometimes_ its devastating for databases&&other
> software which depend on an exact replication of the layout of the
> holes (and real data which are mostly of the value '\0'.
> - Erm... it's 5:35h AM here... any idea how |sfmove()| figures out if
> data are all zero bytes and should be skipped ?
[snip]
It turns out there is a sfio flag called |SF_WHOLE| to disable the
"turn zero bytes into hols"-functionality...
... attached (as "astksh20130926_sparsefile_cp002.diff.txt") is a
patch which exactly does that...
... AFAIK only open question before commiting it to ast-ksh is the
--sparse option and how to name the keys for it...
----
Bye,
Roland
--
__ . . __
(o.\ \/ /.o) [email protected]
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)
_______________________________________________
ast-developers mailing list
[email protected]
http://lists.research.att.com/mailman/listinfo/ast-developers
--
Irek
diff -r -u original/src/lib/libcmd/cp.c build_cpsparse/src/lib/libcmd/cp.c
--- src/lib/libcmd/cp.c 2013-07-16 23:45:26.000000000 +0200
+++ src/lib/libcmd/cp.c 2013-10-01 13:08:30.382728681 +0200
@@ -228,6 +228,169 @@
}
}
+#if defined(SEEK_HOLE) && defined(SEEK_DATA)
+#define SPARSEFILE_SUPPORT 1
+#endif
+
+#ifdef SPARSEFILE_SUPPORT
+static
+bool supports_seek_hole(int fd)
+{
+ off_t pos;
+
+/* Linux does not support |_PC_MIN_HOLE_SIZE| */
+#ifdef _PC_MIN_HOLE_SIZE
+ if (fpathconf(fd, _PC_MIN_HOLE_SIZE) < 0)
+ return (false);
+#endif
+
+ /*
+ * Test two error conditions:
+ * 1. we have been compiled on an OS revision that
+ * supports |SEEK_HOLE| but run on an OS revision
+ * that does not support |SEEK_HOLE|, we get |EINVAL|.
+ * 2. the underlying filesystem does not support
+ * |SEEK_HOLE|, we get |ENOTSUP|.
+ */
+ pos = lseek(fd, 0LL, SEEK_HOLE);
+ if (pos < 0LL)
+ {
+ if ((errno == EINVAL) || (errno == ENOTSUP))
+ return (false);
+ }
+
+ /* Do the same for |SEEK_DATA| */
+ pos = lseek(fd, 0LL, SEEK_DATA);
+ if (pos < 0LL)
+ {
+ if ((errno == EINVAL) || (errno == ENOTSUP))
+ return (false);
+ }
+
+ return (true);
+}
+
+#if 1
+#define D(x)
+#else
+#define D(x) x
+#endif
+
+typedef struct _sparsefiledatarec
+{
+ enum
+ {
+ SPFDREC_UNDEFINED = 0,
+ SPFDREC_DATA = 1,
+ SPFDREC_HOLE = 2
+ } type;
+ off_t begin;
+ off_t end;
+} sparsefiledatarec;
+
+static
+sparsefiledatarec *sparsefile_enumerate_holes(int fd, ssize_t *res_numrec)
+{
+ off_t data_pos,
+ hole_pos,
+ pos;
+ struct stat st;
+ D(int saved_errno);
+ sparsefiledatarec *rec = NULL;
+ size_t numrec = 0UL;
+
+ *res_numrec = -1L;
+
+ if (fstat(fd, &st) < 0)
+ return (NULL);
+
+ /* special case for files with zero size */
+ if (st.st_size == 0)
+ {
+ rec = malloc(sizeof(sparsefiledatarec));
+ if (!rec)
+ return (NULL);
+ rec->type = SPFDREC_DATA;
+ rec->begin = 0;
+ rec->end = 0;
+ *res_numrec = 0;
+ return (rec);
+ }
+
+ for (hole_pos = data_pos = pos = 0LL ; pos < st.st_size ; )
+ {
+ data_pos = lseek(fd, pos, SEEK_DATA);
+ D(saved_errno=errno;(void)printf("# data pos = %8ld\n",
data_pos);errno=saved_errno);
+ if (data_pos < 0)
+ {
+ if (errno == ENXIO)
+ {
+ /* final data block */
+ }
+ else
+ {
+ free(rec);
+ return (NULL);
+ }
+ }
+
+ hole_pos = lseek(fd, pos, SEEK_HOLE);
+ D(saved_errno=errno;(void)printf("# hole pos = %8ld\n",
hole_pos);errno=saved_errno);
+ if (hole_pos < 0)
+ {
+ if (errno == ENXIO)
+ {
+ /* final hole block */
+ }
+ else
+ {
+ free(rec);
+ return (NULL);
+ }
+ }
+
+ if (data_pos == pos)
+ {
+ D((void)printf("#data from %8ld to %8ld (size %8ld)\n",
+ data_pos, hole_pos, (hole_pos - data_pos)));
+ pos = hole_pos;
+
+ rec = realloc(rec,
sizeof(sparsefiledatarec)*(numrec+1));
+ if (!rec)
+ return (NULL);
+ rec[numrec].type = SPFDREC_DATA;
+ rec[numrec].begin = data_pos;
+ rec[numrec].end = hole_pos;
+ numrec++;
+ }
+ else if (hole_pos == pos)
+ {
+ D((void)printf("#hole from %8ld to %8ld (size %8ld)\n",
+ hole_pos, data_pos, (data_pos - hole_pos)));
+ pos = data_pos;
+
+ rec = realloc(rec,
sizeof(sparsefiledatarec)*(numrec+1));
+ if (!rec)
+ return (NULL);
+ rec[numrec].type = SPFDREC_HOLE;
+ rec[numrec].begin = hole_pos;
+ rec[numrec].end = data_pos;
+ numrec++;
+ }
+ else
+ {
+ free(rec);
+ return (NULL);
+ }
+ }
+
+ *res_numrec = numrec;
+
+ return (rec);
+}
+#endif /* SPARSEFILE_SUPPORT */
+
+
/*
* visit a single file and state.op to the destination
*/
@@ -605,6 +768,19 @@
}
else if (rfd >= 0)
{
+#ifdef SPARSEFILE_SUPPORT
+ sparsefiledatarec *sprec;
+ ssize_t spnumrec = 0L;
+ sprec = sparsefile_enumerate_holes(rfd,
&spnumrec);
+ if (lseek(rfd, 0LL, SEEK_SET) < 0)
+ {
+ error(ERROR_SYSTEM|2, "%s: %s read
stream seek error", ent->fts_path, state->path);
+ close(rfd);
+ close(wfd);
+ return 0;
+ }
+#endif /* SPARSEFILE_SUPPORT */
+
if (!(ip = sfnew(NiL, NiL, SF_UNBOUND, rfd,
SF_READ)))
{
error(ERROR_SYSTEM|2, "%s: %s read
stream error", ent->fts_path, state->path);
@@ -612,7 +788,20 @@
close(wfd);
return 0;
}
- if (!(op = sfnew(NiL, NiL, SF_UNBOUND, wfd,
SF_WRITE)))
+ if (!(op = sfnew(NiL, NiL, SF_UNBOUND, wfd,
SF_WRITE
+#ifdef SPARSEFILE_SUPPORT
+ /*
+ * Use real |SEEK_HOLE|/|SEEK_DATA|
support if we have
+ * it and don't try to turn innocent
'\0\-byte
+ * sequences into holes (which can
corrupt databases,
+ * simulations or special boot binaries
among other
+ * things. In the future we may have an
option which
+ * selects the mode (default should be
+ * |SEEK_HOLE|/|SEEK_DATA|))
+ */
+ |SF_WHOLE
+#endif
+ )))
{
error(ERROR_SYSTEM|2, "%s: %s write
stream error", ent->fts_path, state->path);
close(wfd);
@@ -620,10 +809,58 @@
return 0;
}
n = 0;
- if (sfmove(ip, op, (Sfoff_t)SF_UNBOUND, -1) < 0)
- n |= 3;
- if (!sfeof(ip))
- n |= 1;
+#ifdef SPARSEFILE_SUPPORT
+ if (sprec)
+ {
+ ssize_t i;
+
+ for (i=0 ; (i < spnumrec) && (n == 0) ;
i++)
+ {
+ Sfoff_t movesize = sprec[i].end
- sprec[i].begin;
+ switch(sprec[i].type)
+ {
+ case SPFDREC_DATA:
+ /*
+ * fixme:
|sfmove()| seems to optimise
+ * longer
sequences of '\0' away and
+ * turns them
into holes, too... this
+ * MUST not
happen with native
+ *
|SEEK_HOLE|/|SEEK_DATA|
+ * support
+ */
+ if (sfmove(ip,
op, movesize, -1) < 0)
+ n |= 3;
+ break;
+ case SPFDREC_HOLE:
+ if (sfseek(ip,
movesize, SEEK_CUR) < 0)
+ n |= 1;
+ if (sfseek(op,
movesize, SEEK_CUR) < 0)
+ n |= 2;
+ break;
+ }
+ }
+
+ /*
+ * Just seeking to a new postion does
not set
+ * the sfio-internal eof flag. If the
file
+ * ends with a hole we explicitly have
to read
+ * something to get the EOF (or not)
+ */
+ if ((n == 0) && (sfgetc(ip) != EOF))
+ {
+ n |= 1;
+ }
+
+ free(sprec);
+ }
+ else
+#endif /* SPARSEFILE_SUPPORT */
+ {
+ if (sfmove(ip, op, (Sfoff_t)SF_UNBOUND,
-1) < 0)
+ n |= 3;
+ if (!sfeof(ip))
+ n |= 1;
+ }
if (sfsync(op) || state->sync && fsync(wfd) ||
sfclose(op))
n |= 2;
if (sfclose(ip))
_______________________________________________
uwin-users mailing list
[email protected]
http://lists.research.att.com/mailman/listinfo/uwin-users