Author: nick
Date: Tue Mar 15 12:30:45 2016
New Revision: 1735069
URL: http://svn.apache.org/viewvc?rev=1735069&view=rev
Log:
Before opening an OOXML file, check the first few bytes, and give helpful
exceptions for common mistakes
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java
poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
Modified: poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java?rev=1735069&r1=1735068&r2=1735069&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java
(original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java Tue
Mar 15 12:30:45 2016
@@ -28,6 +28,7 @@ import java.util.zip.ZipOutputStream;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
@@ -194,6 +195,7 @@ public final class ZipPackage extends Pa
// At this point, we should have loaded the content type part
if (this.contentTypeManager == null) {
// Is it a different Zip-based format?
+ int numEntries = 0;
boolean hasMimetype = false;
boolean hasSettingsXML = false;
entries = this.zipArchive.getEntries();
@@ -205,12 +207,18 @@ public final class ZipPackage extends Pa
if (entry.getName().equals("settings.xml")) {
hasSettingsXML = true;
}
+ numEntries++;
}
if (hasMimetype && hasSettingsXML) {
throw new ODFNotOfficeXmlFileException(
"The supplied data appears to be in ODF (Open Document)
Format. " +
"Formats like these (eg ODS, ODP) are not supported, try
Apache ODFToolkit");
}
+ if (numEntries == 0) {
+ throw new NotOfficeXmlFileException(
+ "No valid entries or contents found, this is not a valid
OOXML " +
+ "(Office Open XML) file");
+ }
// Fallback exception
throw new InvalidFormatException(
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java?rev=1735069&r1=1735068&r2=1735069&view=diff
==============================================================================
---
poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
(original)
+++
poi/trunk/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/ZipHelper.java
Tue Mar 15 12:30:45 2016
@@ -18,8 +18,10 @@
package org.apache.poi.openxml4j.opc.internal;
import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.PushbackInputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Enumeration;
@@ -27,12 +29,18 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipInputStream;
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
+import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.ZipPackage;
import org.apache.poi.openxml4j.util.ZipSecureFile;
import org.apache.poi.openxml4j.util.ZipSecureFile.ThresholdInputStream;
+import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.poifs.storage.HeaderBlockConstants;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.LittleEndian;
public final class ZipHelper {
/**
@@ -144,6 +152,67 @@ public final class ZipHelper {
return null;
}
}
+
+ /**
+ * Verifies that the given stream starts with a Zip structure.
+ *
+ * Warning - this will consume the first few bytes of the stream,
+ * you should push-back or reset the stream after use!
+ */
+ public static void verifyZipHeader(InputStream stream)
+ throws NotOfficeXmlFileException, IOException {
+ // Grab the first 8 bytes
+ byte[] data = new byte[8];
+ IOUtils.readFully(stream, data);
+
+ // OLE2?
+ long signature = LittleEndian.getLong(data);
+ if (signature == HeaderBlockConstants._signature) {
+ throw new OLE2NotOfficeXmlFileException(
+ "The supplied data appears to be in the OLE2 Format. " +
+ "You are calling the part of POI that deals with OOXML "+
+ "(Office Open XML) Documents. You need to call a different " +
+ "part of POI to process this data (eg HSSF instead of XSSF)");
+ }
+
+ // Raw XML?
+ byte[] RAW_XML_FILE_HEADER = POIFSConstants.RAW_XML_FILE_HEADER;
+ if (data[0] == RAW_XML_FILE_HEADER[0] &&
+ data[1] == RAW_XML_FILE_HEADER[1] &&
+ data[2] == RAW_XML_FILE_HEADER[2] &&
+ data[3] == RAW_XML_FILE_HEADER[3] &&
+ data[4] == RAW_XML_FILE_HEADER[4]) {
+ throw new NotOfficeXmlFileException(
+ "The supplied data appears to be a raw XML file. " +
+ "Formats such as Office 2003 XML are not supported");
+ }
+
+ // Don't check for a Zip header, as to maintain backwards
+ // compatibility we need to let them seek over junk at the
+ // start before beginning processing.
+
+ // Put things back
+ if (stream instanceof PushbackInputStream) {
+ ((PushbackInputStream)stream).unread(data);
+ } else if (stream.markSupported()) {
+ stream.reset();
+ } else if (stream instanceof FileInputStream) {
+ // File open check, about to be closed, nothing to do
+ } else {
+ // Oh dear... I hope you know what you're doing!
+ }
+ }
+
+ private static InputStream prepareToCheckHeader(InputStream stream) {
+ if (stream instanceof PushbackInputStream) {
+ return stream;
+ }
+ if (stream.markSupported()) {
+ stream.mark(8);
+ return stream;
+ }
+ return new PushbackInputStream(stream, 8);
+ }
/**
* Opens the specified stream as a secure zip
@@ -153,7 +222,12 @@ public final class ZipHelper {
* @return The zip stream freshly open.
*/
public static ThresholdInputStream openZipStream(InputStream stream)
throws IOException {
- InputStream zis = new ZipInputStream(stream);
+ // Peek at the first few bytes to sanity check
+ InputStream checkedStream = prepareToCheckHeader(stream);
+ verifyZipHeader(checkedStream);
+
+ // Open as a proper zip stream
+ InputStream zis = new ZipInputStream(checkedStream);
ThresholdInputStream tis = ZipSecureFile.addThreshold(zis);
return tis;
}
@@ -170,7 +244,13 @@ public final class ZipHelper {
if (!file.exists()) {
return null;
}
+
+ // Peek at the first few bytes to sanity check
+ FileInputStream input = new FileInputStream(file);
+ verifyZipHeader(input);
+ input.close();
+ // Open as a proper zip file
return new ZipSecureFile(file);
}
Modified:
poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java?rev=1735069&r1=1735068&r2=1735069&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
(original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackage.java
Tue Mar 15 12:30:45 2016
@@ -51,6 +51,7 @@ import org.apache.poi.POIXMLException;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
+import org.apache.poi.openxml4j.exceptions.NotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.ODFNotOfficeXmlFileException;
import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
import org.apache.poi.openxml4j.opc.internal.ContentTypeManager;
@@ -679,14 +680,38 @@ public final class TestPackage {
POIDataSamples files = POIDataSamples.getSpreadSheetInstance();
// OLE2 - Stream
-// try {
-// OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
-// fail("Shouldn't be able to open OLE2");
-// } catch (OLE2NotOfficeXmlFileException e) {
-// // TODO Check details
-// }
-
+ try {
+ OPCPackage.open(files.openResourceAsStream("SampleSS.xls"));
+ fail("Shouldn't be able to open OLE2");
+ } catch (OLE2NotOfficeXmlFileException e) {
+ assertTrue(e.getMessage().indexOf("The supplied data appears to be
in the OLE2 Format") > -1);
+ assertTrue(e.getMessage().indexOf("You are calling the part of POI
that deals with OOXML") > -1);
+ }
// OLE2 - File
+ try {
+ OPCPackage.open(files.getFile("SampleSS.xls"));
+ fail("Shouldn't be able to open OLE2");
+ } catch (OLE2NotOfficeXmlFileException e) {
+ assertTrue(e.getMessage().indexOf("The supplied data appears to be
in the OLE2 Format") > -1);
+ assertTrue(e.getMessage().indexOf("You are calling the part of POI
that deals with OOXML") > -1);
+ }
+
+ // Raw XML - Stream
+ try {
+ OPCPackage.open(files.openResourceAsStream("SampleSS.xml"));
+ fail("Shouldn't be able to open XML");
+ } catch (NotOfficeXmlFileException e) {
+ assertTrue(e.getMessage().indexOf("The supplied data appears to be
a raw XML file") > -1);
+ assertTrue(e.getMessage().indexOf("Formats such as Office 2003
XML") > -1);
+ }
+ // Raw XML - File
+ try {
+ OPCPackage.open(files.getFile("SampleSS.xml"));
+ fail("Shouldn't be able to open XML");
+ } catch (NotOfficeXmlFileException e) {
+ assertTrue(e.getMessage().indexOf("The supplied data appears to be
a raw XML file") > -1);
+ assertTrue(e.getMessage().indexOf("Formats such as Office 2003
XML") > -1);
+ }
// ODF / ODS - Stream
try {
@@ -707,9 +732,6 @@ public final class TestPackage {
// Plain Text - Stream
// Plain Text - File
-
- // Raw XML - Stream
- // Raw XML - File
}
@Test(expected=IOException.class)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]