Posix tarballs use extended headers to represent paths and values that do
not fit in the original ustar header format. This patch implements parsing
and handling of a subset of these extended headers. The motivating
tarball was the gcc source code, which exceeds the original path limit.
---
tar.c | 171 +++---
1 file changed, 153 insertions(+), 18 deletions(-)
diff --git a/tar.c b/tar.c
index b74c134..31592c0 100644
--- a/tar.c
+++ b/tar.c
@@ -33,6 +33,19 @@ enum Type {
RESERVED = '7'
};
+struct xheader {
+ int valid : 1;
+ int has_atime : 1;
+ int has_ctime : 1;
+ int has_mtime : 1;
+ struct timespec atime;
+ struct timespec ctime;
+ struct timespec mtime;
+ char *path;
+ char *linkpath;
+ char *buf; /* Backing buffer during read. */
+};
+
struct header {
char name[100];
char mode[8];
@@ -252,9 +265,9 @@ archive(const char *path)
}
static int
-unarchive(char *fname, ssize_t l, char b[BLKSIZ])
+unarchive(char *fname, ssize_t l, char b[BLKSIZ], struct xheader *xhdr)
{
- char lname[101], *tmp, *p;
+ char linkbuf[101], *linkpath, *tmp, *p;
long mode, major, minor, type, mtime, uid, gid;
struct header *h = (struct header *)b;
int fd = -1;
@@ -281,12 +294,17 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
break;
case HARDLINK:
case SYMLINK:
- snprintf(lname, sizeof(lname), "%.*s", (int)sizeof(h->linkname),
-h->linkname);
- if (((h->type == HARDLINK) ? link : symlink)(lname, fname) < 0)
+ if (xhdr && xhdr->linkpath) {
+ linkpath = xhdr->linkpath;
+ } else {
+ snprintf(linkbuf, sizeof(linkbuf), "%.*s",
(int)sizeof(h->linkname),
+h->linkname);
+ linkpath = linkbuf;
+ }
+ if (((h->type == HARDLINK) ? link : symlink)(linkpath, fname) <
0)
eprintf("%s %s -> %s:",
(h->type == HARDLINK) ? "link" : "symlink",
- fname, lname);
+ fname, linkpath);
break;
case DIRECTORY:
if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0')
@@ -334,6 +352,13 @@ unarchive(char *fname, ssize_t l, char b[BLKSIZ])
times[0].tv_sec = times[1].tv_sec = mtime;
times[0].tv_nsec = times[1].tv_nsec = 0;
+ if (xhdr && xhdr->has_mtime) {
+ times[0] = times[1] = xhdr->mtime;
+ }
+ if (xhdr && xhdr->has_atime) {
+ times[0] = xhdr->atime;
+ }
+
if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) <
0)
weprintf("utimensat %s:", fname);
if (h->type == SYMLINK) {
@@ -359,8 +384,104 @@ skipblk(ssize_t l)
break;
}
+static void
+xhdrtime(struct timespec *t, char *s)
+{
+ size_t i;
+ char *pns, *pdot;
+
+ t->tv_sec = strtoul(s, NULL, 10);
+ t->tv_nsec = 0;
+ if ((pdot = strchr(s, '.'))) {
+ pns = pdot+1;
+ for (i = 0; i < 9 && pns[i]; i++) {
+ t->tv_nsec *= 10;
+ t->tv_nsec += pns[i] - '0';
+ }
+ for (; i < 9; i++) {
+ t->tv_nsec *= 10;
+ }
+ }
+
+}
+
+static void
+readxhdr(struct xheader *xhdr , ssize_t l)
+{
+ char b[BLKSIZ];
+ char *reason, *buf;
+ char *p, *pend, *lenstr, *k, *v, *vend;
+
+ buf = xhdr->buf;
+ memset(xhdr, 0, sizeof(struct xheader));
+ xhdr->valid = 1;
+ xhdr->buf = erealloc(buf, l);
+
+ if (!eread(tarfd, xhdr->buf, l)) {
+ reason = "truncated";
+ goto bad;
+ }
+ if (l % BLKSIZ)
+ eread(tarfd, b, BLKSIZ-(l % BLKSIZ));
+
+ p = xhdr->buf;
+ pend = p + l;
+
+ while (p < pend) {
+ lenstr = p;
+ while (p < pend && (*p >= '0' && *p <= '9')) {
+ p++;
+ }
+ if (p >= pend || *p != ' ') {
+ reason = "corrupt length";
+ goto bad;
+ }
+ *p++ = 0;
+ k = p;
+ while (p < pend && *p != '=') {
+ p++;
+ }
+ if (p >= pend) {
+ reason = "corrupt keyword";
+ goto bad;
+ }
+ *p++ = 0;
+ v = p;
+ vend = lenstr + strtoul(lenstr, NULL, 10) - 1;
+ if (vend >= pend || vend <= p || *vend != '\n') {
+ reason = "length mismatch";
+ goto bad;
+ }
+ *vend