On Sat, Sep 09, 2017 at 11:08:42AM +0200, Silvan Jegen wrote:
> From: Jim Beveridge <[email protected]>
> 
> The original code is by Jim Beveridge working on Fuchsia. I merged it
> with slight changes.
> 

To be clear: is it under the sbase LICENSE?

> Time to tar two 1GB files:
> 
> Before patch:
> 
> real  0m6.428s
> user  0m0.245s
> sys   0m4.881s
> 
> real  0m6.454s
> user  0m0.239s
> sys   0m4.883s
> 
> real  0m6.515s
> user  0m0.259s
> sys   0m4.839s
> 
> After patch:
> 
> real  0m4.755s
> user  0m0.026s
> sys   0m1.598s
> 
> real  0m4.788s
> user  0m0.063s
> sys   0m1.578s
> 
> real  0m4.822s
> user  0m0.007s
> sys   0m1.662s
> 
> A similar speedup can be observed for untaring.
> 
> In addition to the buffer size increase we change the code to only create
> directories for non-compliant tar files and we check for st to be NULL
> in the recursive copy function.
> ---
>  tar.c | 72 
> +++++++++++++++++++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 55 insertions(+), 17 deletions(-)
> 
> diff --git a/tar.c b/tar.c
> index 53a737c..8cd1abe 100644
> --- a/tar.c
> +++ b/tar.c
> @@ -16,6 +16,8 @@
>  #include "util.h"
>  
>  #define BLKSIZ 512
> +// COPY_CHUNK_SIZE must be a power of 2
> +#define COPY_CHUNK_SIZE 8192
>  

Instead of COPY_CHUNK_SIZE is might be worthwhile to query the pagesize, but
i've not tested it.

>  enum Type {
>       REG       = '0',
> @@ -236,10 +238,13 @@ archive(const char *path)
>       ewrite(tarfd, b, BLKSIZ);
>  
>       if (fd != -1) {
> -             while ((l = eread(fd, b, BLKSIZ)) > 0) {
> -                     if (l < BLKSIZ)
> -                             memset(b + l, 0, BLKSIZ - l);
> -                     ewrite(tarfd, b, BLKSIZ);
> +             char chunk[COPY_CHUNK_SIZE];
> +             while ((l = eread(fd, chunk, COPY_CHUNK_SIZE)) > 0) {
> +                     // Ceiling to BLKSIZ boundary
> +                     int ceilsize = (l + (BLKSIZ-1)) & ~(BLKSIZ-1);
> +                     if (l < ceilsize)
> +                             memset(chunk + l, 0, ceilsize - l);
> +                     ewrite(tarfd, chunk, ceilsize);
>               }
>               close(fd);
>       }
> @@ -250,7 +255,7 @@ archive(const char *path)
>  static int
>  unarchive(char *fname, ssize_t l, char b[BLKSIZ])
>  {
> -     char lname[101], *tmp, *p;
> +     char lname[101], *p;
>       long mode, major, minor, type, mtime, uid, gid;
>       struct header *h = (struct header *)b;
>       int fd = -1;
> @@ -261,9 +266,13 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
>       if (remove(fname) < 0 && errno != ENOENT)
>               weprintf("remove %s:", fname);
>  
> -     tmp = estrdup(fname);
> -     mkdirp(dirname(tmp), 0777, 0777);
> -     free(tmp);
> +     // tar files normally create the directory chain. This is a fallback
> +     // for noncompliant tar files.
> +     if (h->type != DIRECTORY) {
> +             char* tmp = estrdup(fname);
> +             mkdirp(dirname(tmp), 0777, 0777);
> +             free(tmp);
> +     }
>  
>       switch (h->type) {
>       case REG:
> @@ -319,9 +328,25 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
>               eprintf("strtol %s: invalid number\n", h->gid);
>  
>       if (fd != -1) {
> -             for (; l > 0; l -= BLKSIZ)
> -                     if (eread(tarfd, b, BLKSIZ) > 0)
> -                             ewrite(fd, b, MIN(l, BLKSIZ));
> +             // Ceiling to BLKSIZ boundary
> +             int readsize = (l + (BLKSIZ-1)) & ~(BLKSIZ-1);
> +             char chunk[COPY_CHUNK_SIZE];
> +             int lastread = 0;
> +
> +             for (; readsize > 0; l -= lastread, readsize -= lastread) {
> +                     int chunk_size = MIN(readsize, COPY_CHUNK_SIZE);
> +                     // Short reads are legal, so don't expect to read
> +                     // everything that was requested.
> +                     lastread = eread(tarfd, chunk, chunk_size);
> +                     if (lastread == 0) {
> +                             close(fd);
> +                             remove(fname);

Do all the tar tools remove the file in this case? It might be better to not
remove it.

> +                             eprintf("unexpected end of file reading %s.\n",
> +                                     fname);
> +                     }
> +
> +                     ewrite(fd, chunk, MIN(l, lastread));
> +             }
>               close(fd);
>       }
>  
> @@ -331,7 +356,7 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
>       times[0].tv_sec = times[1].tv_sec = mtime;
>       times[0].tv_nsec = times[1].tv_nsec = 0;
>       if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) < 
> 0)
> -             weprintf("utimensat %s:\n", fname);
> +             weprintf("utimensat %s %d:\n", fname, errno);
>       if (h->type == SYMLINK) {
>               if (!getuid() && lchown(fname, uid, gid))
>                       weprintf("lchown %s:\n", fname);
> @@ -349,10 +374,23 @@ static void
>  skipblk(ssize_t l)
>  {
>       char b[BLKSIZ];
> -
> -     for (; l > 0; l -= BLKSIZ)
> -             if (!eread(tarfd, b, BLKSIZ))
> -                     break;
> +     int lastread = 0;
> +     // Ceiling to BLKSIZ boundary
> +     int ceilsize = (l + (BLKSIZ-1)) & ~(BLKSIZ-1);
> +
> +     off_t offset = lseek(tarfd, ceilsize, SEEK_CUR);
> +     if (offset >= ceilsize)
> +             return;
> +     if (errno != ESPIPE)
> +             eprintf("unexpected end of file.\n");
> +
> +     // This is a pipe, socket or FIFO. Fall back to a sequential read.
> +     for (; ceilsize > 0; ceilsize -= lastread) {
> +             int chunk_size = MIN(ceilsize, BLKSIZ);
> +             lastread = eread(tarfd, b, chunk_size);
> +             if (lastread == 0)
> +                     eprintf("unexpected end of file %d.\n", errno);
> +        }
>  }
>  
>  static int
> @@ -370,7 +408,7 @@ c(const char *path, struct stat *st, void *data, struct 
> recursor *r)
>       if (vflag)
>               puts(path);
>  
> -     if (S_ISDIR(st->st_mode))
> +     if (st && S_ISDIR(st->st_mode))
>               recurse(path, NULL, r);
>  }
>  
> -- 
> 2.7.4
> 
> 

The C++ comments should be changed to /* */. I'd also prefer all variable
declarations at the top and the other notes above.

Nice work though :)

-- 
Kind regards,
Hiltjo

Reply via email to