Re: [hackers] [sbase][PATCH] tar: support extracting long paths, link targets, and times.

2022-05-05 Thread Andrew Chambers




Thanks for the patches, they look good to me be I'll do a proper review
this weekend if nobody else did before.

Cheers!




It is worth nothing that I also talked to mcf off list and he said 
getting a finished implementation of pax might be an even better option 
as that is a posix standard we don't implement. The idea would then be 
to share the code from pax and tar somehow.


Before that happens it might be relatively harmless to merge these though.

thanks :)




Re: [hackers] [sbase][PATCH] tar: support extracting long paths, link targets, and times.

2022-05-04 Thread Quentin Rameau
Hi Andrew,

> Posix tarballs use extended headers to represent paths and values that do
> not fit in the original ustar header format. This patch implements parsing
> and handling of a subset of these extended headers.  The motivating
> tarball was the gcc source code, which exceeds the original path limit.

Thanks for the patches, they look good to me be I'll do a proper review
this weekend if nobody else did before.

Cheers!



[hackers] [sbase][PATCH] tar: support extracting long paths, link targets, and times.

2022-05-01 Thread Andrew Chambers
Posix tarballs use extended headers to represent paths and values that do
not fit in the original ustar header format. This patch implements parsing
and handling of a subset of these extended headers.  The motivating
tarball was the gcc source code, which exceeds the original path limit.
---
 tar.c | 171 +++---
 1 file changed, 153 insertions(+), 18 deletions(-)

diff --git a/tar.c b/tar.c
index b74c134..31592c0 100644
--- a/tar.c
+++ b/tar.c
@@ -33,6 +33,19 @@ enum Type {
RESERVED  = '7'
 };
 
+struct xheader {
+   int valid : 1;
+   int has_atime : 1;
+   int has_ctime : 1;
+   int has_mtime : 1;
+   struct timespec atime;
+   struct timespec ctime;
+   struct timespec mtime;
+   char *path;
+   char *linkpath;
+   char *buf; /* Backing buffer during read. */
+};
+
 struct header {
char name[100];
char mode[8];
@@ -252,9 +265,9 @@ archive(const char *path)
 }
 
 static int
-unarchive(char *fname, ssize_t l, char b[BLKSIZ])
+unarchive(char *fname, ssize_t l, char b[BLKSIZ], struct xheader *xhdr)
 {
-   char lname[101], *tmp, *p;
+   char linkbuf[101], *linkpath, *tmp, *p;
long mode, major, minor, type, mtime, uid, gid;
struct header *h = (struct header *)b;
int fd = -1;
@@ -281,12 +294,17 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
break;
case HARDLINK:
case SYMLINK:
-   snprintf(lname, sizeof(lname), "%.*s", (int)sizeof(h->linkname),
-h->linkname);
-   if (((h->type == HARDLINK) ? link : symlink)(lname, fname) < 0)
+   if (xhdr && xhdr->linkpath) {
+   linkpath = xhdr->linkpath;
+   } else {
+   snprintf(linkbuf, sizeof(linkbuf), "%.*s", 
(int)sizeof(h->linkname),
+h->linkname);
+   linkpath = linkbuf;
+   }
+   if (((h->type == HARDLINK) ? link : symlink)(linkpath, fname) < 
0)
eprintf("%s %s -> %s:",
(h->type == HARDLINK) ? "link" : "symlink",
-   fname, lname);
+   fname, linkpath);
break;
case DIRECTORY:
if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0')
@@ -334,6 +352,13 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
 
times[0].tv_sec = times[1].tv_sec = mtime;
times[0].tv_nsec = times[1].tv_nsec = 0;
+   if (xhdr && xhdr->has_mtime) {
+   times[0] = times[1] = xhdr->mtime;
+   }
+   if (xhdr && xhdr->has_atime) {
+   times[0] = xhdr->atime;
+   }
+
if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) < 
0)
weprintf("utimensat %s:", fname);
if (h->type == SYMLINK) {
@@ -359,8 +384,104 @@ skipblk(ssize_t l)
break;
 }
 
+static void
+xhdrtime(struct timespec *t, char *s)
+{
+   size_t i;
+   char *pns, *pdot;
+
+   t->tv_sec = strtoul(s, NULL, 10);
+   t->tv_nsec = 0;
+   if ((pdot = strchr(s, '.'))) {
+   pns = pdot+1;
+   for (i = 0; i < 9 && pns[i]; i++) {
+   t->tv_nsec *= 10;
+   t->tv_nsec += pns[i] - '0';
+   }
+   for (; i < 9; i++) {
+   t->tv_nsec *= 10;
+   }
+   }
+   
+}
+
+static void
+readxhdr(struct xheader *xhdr , ssize_t l)
+{
+   char b[BLKSIZ];
+   char *reason, *buf;
+   char *p, *pend, *lenstr, *k, *v, *vend;
+
+   buf = xhdr->buf;
+   memset(xhdr, 0, sizeof(struct xheader));
+   xhdr->valid = 1;
+   xhdr->buf = erealloc(buf, l);
+
+   if (!eread(tarfd, xhdr->buf, l)) {
+   reason = "truncated";
+   goto bad;
+   }
+   if (l % BLKSIZ)
+   eread(tarfd, b, BLKSIZ-(l % BLKSIZ));
+
+   p = xhdr->buf;
+   pend = p + l;
+
+   while (p < pend) {
+   lenstr = p;
+   while (p < pend && (*p >= '0' && *p <= '9')) {
+   p++;
+   }
+   if (p >= pend || *p != ' ') {
+   reason = "corrupt length";
+   goto bad;
+   }
+   *p++ = 0;
+   k = p;
+   while (p < pend && *p != '=') {
+   p++;
+   }
+   if (p >= pend) {
+   reason = "corrupt keyword";
+   goto bad;
+   }
+   *p++ = 0;
+   v = p;
+   vend = lenstr + strtoul(lenstr, NULL, 10) - 1;
+   if (vend >= pend || vend <= p || *vend != '\n') {
+   reason = "length mismatch";
+   goto bad;
+   }
+   *vend