Author: bodewig
Date: Sat Jul 23 05:03:52 2011
New Revision: 1149799
URL: http://svn.apache.org/viewvc?rev=1149799&view=rev
Log:
detect sparse entries in tar and allow users to skip them. Submitted by
Patrick Dreyer. COMPRESS-145 (forgot to save two buffers, oops)
Modified:
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
Modified:
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
URL:
http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java?rev=1149799&r1=1149798&r2=1149799&view=diff
==============================================================================
---
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
(original)
+++
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveEntry.java
Sat Jul 23 05:03:52 2011
@@ -19,6 +19,7 @@
package org.apache.commons.compress.archivers.tar;
import java.io.File;
+import java.nio.ByteBuffer;
import java.util.Date;
import java.util.Locale;
@@ -78,6 +79,30 @@ import org.apache.commons.compress.archi
* New-style GNU tar files are slightly different from the above.
* </pre>
*
+ * <p>
+ * The C structure for a old GNU Tar Entry's header is:
+ * <pre>
+ * struct oldgnu_header {
+ * char unused_pad1[345]; // TarConstants.PAD1LEN_GNU - offset 0
+ * char atime[12]; // TarConstants.ATIMELEN_GNU - offset 345
+ * char ctime[12]; // TarConstants.CTIMELEN_GNU - offset 357
+ * char offset[12]; // TarConstants.OFFSETLEN_GNU - offset 369
+ * char longnames[4]; // TarConstants.LONGNAMESLEN_GNU - offset 381
+ * char unused_pad2; // TarConstants.PAD2LEN_GNU - offset 385
+ * struct sparse sp[4]; // TarConstants.SPARSELEN_GNU - offset 386
+ * char isextended; // TarConstants.ISEXTENDEDLEN_GNU - offset 482
+ * char realsize[12]; // TarConstants.REALSIZELEN_GNU - offset 483
+ * char unused_pad[17]; // TarConstants.PAD3LEN_GNU - offset 495
+ * };
+ * </pre>
+ * Whereas, "struct sparse" is:
+ * <pre>
+ * struct sparse {
+ * char offset[12]; // offset 0
+ * char numbytes[12]; // offset 12
+ * };
+ * </pre>
+ *
* @NotThreadSafe
*/
@@ -123,6 +148,12 @@ public class TarArchiveEntry implements
/** The entry's minor device number. */
private int devMinor;
+ /** If an extension sparse header follows. */
+ private boolean isExtended;
+
+ /** The entry's real size in case of a sparse file. */
+ private long realSize;
+
/** The entry's file reference */
private File file;
@@ -228,7 +259,7 @@ public class TarArchiveEntry implements
public TarArchiveEntry(File file) {
this(file, normalizeFileName(file.getPath(), false));
}
-
+
/**
* Construct an entry for a file. File is set to file, and the
* header is constructed from information from the file.
@@ -251,7 +282,7 @@ public class TarArchiveEntry implements
if (nameLength == 0 || fileName.charAt(nameLength - 1) != '/') {
this.name = fileName + "/";
} else {
- this.name = fileName;
+ this.name = fileName;
}
this.size = 0;
} else {
@@ -538,6 +569,33 @@ public class TarArchiveEntry implements
this.size = size;
}
+ /**
+ * Indicates in case of a sparse file if an extension sparse header
+ * follows.
+ *
+ * @return true if an extension sparse header follows.
+ */
+ public boolean isExtended() {
+ return isExtended;
+ }
+
+ /**
+ * Get this entry's real file size in case of a sparse file.
+ *
+ * @return This entry's real file size.
+ */
+ public long getRealSize() {
+ return realSize;
+ }
+
+ /**
+ * Indicate if this entry is a GNU sparse block
+ *
+ * @return true if this is a sparse extension provided by GNU tar
+ */
+ public boolean isGNUSparse() {
+ return linkFlag == LF_GNUTYPE_SPARSE;
+ }
/**
* Indicate if this entry is a GNU long name block
@@ -749,13 +807,34 @@ public class TarArchiveEntry implements
offset += DEVLEN;
devMinor = (int) TarUtils.parseOctal(header, offset, DEVLEN);
offset += DEVLEN;
- String prefix = TarUtils.parseName(header, offset, PREFIXLEN);
- // SunOS tar -E does not add / to directory names, so fix up to be
consistent
- if (isDirectory() && !name.endsWith("/")){
- name = name + "/";
+
+ int type = evaluateType(header);
+ switch (type) {
+ case FORMAT_OLDGNU: {
+ offset += ATIMELEN_GNU;
+ offset += CTIMELEN_GNU;
+ offset += OFFSETLEN_GNU;
+ offset += LONGNAMESLEN_GNU;
+ offset += PAD2LEN_GNU;
+ offset += SPARSELEN_GNU;
+ isExtended = TarUtils.parseBoolean(header, offset);
+ offset += ISEXTENDEDLEN_GNU;
+ realSize = TarUtils.parseOctal(header, offset, REALSIZELEN_GNU);
+ offset += REALSIZELEN_GNU;
+ break;
+ }
+ case FORMAT_POSIX:
+ default: {
+ String prefix = TarUtils.parseName(header, offset, PREFIXLEN);
+ // SunOS tar -E does not add / to directory names, so fix
+ // up to be consistent
+ if (isDirectory() && !name.endsWith("/")){
+ name = name + "/";
+ }
+ if (prefix.length() > 0){
+ name = prefix + "/" + name;
+ }
}
- if (prefix.length() >0){
- name = prefix + "/" + name;
}
}
@@ -801,5 +880,20 @@ public class TarArchiveEntry implements
}
return fileName;
}
+
+ /**
+ * Evaluate an entry's header format from a header buffer.
+ *
+ * @param header The tar entry header buffer to evaluate the format for.
+ * @return format type
+ */
+ private int evaluateType(byte[] header) {
+ final ByteBuffer magic = ByteBuffer.wrap(header, MAGIC_OFFSET,
MAGICLEN);
+ if (magic.compareTo(ByteBuffer.wrap(MAGIC_GNU.getBytes())) == 0)
+ return FORMAT_OLDGNU;
+ if (magic.compareTo(ByteBuffer.wrap(MAGIC_POSIX.getBytes())) == 0)
+ return FORMAT_POSIX;
+ return 0;
+ }
}
Modified:
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
URL:
http://svn.apache.org/viewvc/commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java?rev=1149799&r1=1149798&r2=1149799&view=diff
==============================================================================
---
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
(original)
+++
commons/proper/compress/trunk/src/main/java/org/apache/commons/compress/archivers/tar/TarArchiveInputStream.java
Sat Jul 23 05:03:52 2011
@@ -185,23 +185,18 @@ public class TarArchiveInputStream exten
readBuf = null;
}
- byte[] headerBuf = buffer.readRecord();
-
- if (headerBuf == null) {
- hasHitEOF = true;
- } else if (buffer.isEOFRecord(headerBuf)) {
- hasHitEOF = true;
- }
+ byte[] headerBuf = getRecord();
if (hasHitEOF) {
currEntry = null;
- } else {
- currEntry = new TarArchiveEntry(headerBuf);
- entryOffset = 0;
- entrySize = currEntry.getSize();
+ return null;
}
- if (currEntry != null && currEntry.isGNULongNameEntry()) {
+ currEntry = new TarArchiveEntry(headerBuf);
+ entryOffset = 0;
+ entrySize = currEntry.getSize();
+
+ if (currEntry.isGNULongNameEntry()) {
// read in the name
StringBuffer longName = new StringBuffer();
byte[] buf = new byte[SMALL_BUFFER_SIZE];
@@ -223,13 +218,45 @@ public class TarArchiveInputStream exten
currEntry.setName(longName.toString());
}
- if (currEntry != null && currEntry.isPaxHeader()){ // Process Pax
headers
+ if (currEntry.isPaxHeader()){ // Process Pax headers
paxHeaders();
}
+ if (currEntry.isGNUSparse()){ // Process sparse files
+ readGNUSparse();
+ }
+
return currEntry;
}
+ /**
+ * Get the next record in this tar archive. This will skip
+ * over any remaining data in the current entry, if there
+ * is one, and place the input stream at the header of the
+ * next entry.
+ * If there are no more entries in the archive, null will
+ * be returned to indicate that the end of the archive has
+ * been reached.
+ *
+ * @return The next header in the archive, or null.
+ * @throws IOException on error
+ */
+ private byte[] getRecord() throws IOException {
+ if (hasHitEOF) {
+ return null;
+ }
+
+ byte[] headerBuf = buffer.readRecord();
+
+ if (headerBuf == null) {
+ hasHitEOF = true;
+ } else if (buffer.isEOFRecord(headerBuf)) {
+ hasHitEOF = true;
+ }
+
+ return hasHitEOF ? null : headerBuf;
+ }
+
private void paxHeaders() throws IOException{
BufferedReader br = new BufferedReader(new InputStreamReader(this,
"UTF-8"));
Map headers = new HashMap();
@@ -301,6 +328,35 @@ public class TarArchiveInputStream exten
}
}
+ /**
+ * Adds the sparse chunks from the current entry to the sparse chunks,
+ * including any additional sparse entries following the current entry.
+ *
+ * @throws IOException on error
+ *
+ * @todo Sparse files get not yet really processed.
+ */
+ private void readGNUSparse() throws IOException {
+ /* we do not really process sparse files yet
+ sparses = new ArrayList();
+ sparses.addAll(currEntry.getSparses());
+ */
+ if (currEntry.isExtended()) {
+ TarArchiveSparseEntry entry;
+ do {
+ byte[] headerBuf = getRecord();
+ if (hasHitEOF) {
+ currEntry = null;
+ break;
+ }
+ entry = new TarArchiveSparseEntry(headerBuf);
+ /* we do not really process sparse files yet
+ sparses.addAll(entry.getSparses());
+ */
+ } while (entry.isExtended());
+ }
+ }
+
public ArchiveEntry getNextEntry() throws IOException {
return getNextTarEntry();
}
@@ -385,6 +441,19 @@ public class TarArchiveInputStream exten
return totalRead;
}
+ /**
+ * Whether this class is able to read the given entry.
+ *
+ * <p>May return false if the current entry is a sparse file.</p>
+ */
+ public boolean canReadEntryData(ArchiveEntry ae) {
+ if (ae instanceof TarArchiveEntry) {
+ TarArchiveEntry te = (TarArchiveEntry) ae;
+ return !te.isGNUSparse();
+ }
+ return false;
+ }
+
protected final TarArchiveEntry getCurrentEntry() {
return currEntry;
}