Most tar implementations (GNU, BusyBox, toybox, libarchive) recognize leading spaces as equivalent to leading zeroes in numeric fields. This leads to some archives begin recognized as malformed. This fixes it by replacing leading spaces by leading zeroes in sanitize(), and in chktar(), too. --- tar.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-)
diff --git a/tar.c b/tar.c index d3a9f3b..92fe61b 100644 --- a/tar.c +++ b/tar.c @@ -382,6 +382,7 @@ static void sanitize(struct header *h) { size_t i, j; + int leading; struct { char *f; size_t l; @@ -399,10 +400,17 @@ sanitize(struct header *h) /* Numeric fields can be terminated with spaces instead of * NULs as per the ustar specification. Patch all of them to * use NULs so we can perform string operations on them. */ + + /* Most tar implementations also recognize leading spaces + * in numeric fields. Some tar archives use ' 's instead of + * '0's for them. So patch those too, so we can correctly + * recognize them. */ for (i = 0; i < LEN(fields); i++) - for (j = 0; j < fields[i].l; j++) + for (leading = 1, j = 0; j < fields[i].l; j++) if (fields[i].f[j] == ' ') - fields[i].f[j] = '\0'; + fields[i].f[j] = leading ? '0' : '\0'; + else + leading = 0; } static void @@ -411,6 +419,7 @@ chktar(struct header *h) char tmp[8], *err, *p = (char *)h; const char *reason; long s1, s2, i; + int leading; if (h->prefix[0] == '\0' && h->name[0] == '\0') { reason = "empty filename"; @@ -421,9 +430,11 @@ chktar(struct header *h) goto bad; } memcpy(tmp, h->chksum, sizeof(tmp)); - for (i = 0; i < sizeof(tmp); i++) + for (leading = 1, i = 0; i < sizeof(tmp); i++) if (tmp[i] == ' ') - tmp[i] = '\0'; + tmp[i] = leading ? '0' : '\0'; + else + leading = 0; s1 = strtol(tmp, &err, 8); if (s1 < 0 || *err != '\0') { reason = "invalid checksum"; -- 2.43.0