Author: nick
Date: Fri Feb 26 23:32:17 2016
New Revision: 1732579
URL: http://svn.apache.org/viewvc?rev=1732579&view=rev
Log:
Refactor to pull out the list of Excel 97+ directory entry names to a common
place, avoiding duplication. Also starts on unit testing #59074
Modified:
poi/trunk/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
poi/trunk/src/java/org/apache/poi/hssf/model/InternalWorkbook.java
poi/trunk/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
poi/trunk/src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java
Modified:
poi/trunk/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java?rev=1732579&r1=1732578&r2=1732579&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
(original)
+++ poi/trunk/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
Fri Feb 26 23:32:17 2016
@@ -25,6 +25,7 @@ import org.apache.poi.hssf.eventusermode
import org.apache.poi.hssf.record.*;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import static
org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
/**
* Low level event based HSSF reader. Pass either a DocumentInputStream to
@@ -59,20 +60,20 @@ public class HSSFEventFactory {
*/
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir)
throws IOException {
// some old documents have "WORKBOOK" or "BOOK"
- final String name;
+ String name = null;
Set<String> entryNames = dir.getEntryNames();
- if (entryNames.contains("Workbook")) {
- name = "Workbook";
- } else if (entryNames.contains("WORKBOOK")) {
- name = "WORKBOOK";
- } else if (entryNames.contains("BOOK")) {
- name = "BOOK";
- } else {
- name = "Workbook";
+ for (String potentialName : WORKBOOK_DIR_ENTRY_NAMES) {
+ if (entryNames.contains(potentialName)) {
+ name = potentialName;
+ break;
+ }
+ }
+ // If in doubt, go for the default
+ if (name == null) {
+ name = WORKBOOK_DIR_ENTRY_NAMES[0];
}
InputStream in = dir.createDocumentInputStream(name);
-
processEvents(req, in);
}
Modified: poi/trunk/src/java/org/apache/poi/hssf/model/InternalWorkbook.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/model/InternalWorkbook.java?rev=1732579&r1=1732578&r2=1732579&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/model/InternalWorkbook.java
(original)
+++ poi/trunk/src/java/org/apache/poi/hssf/model/InternalWorkbook.java Fri Feb
26 23:32:17 2016
@@ -123,6 +123,16 @@ public final class InternalWorkbook {
*/
private static final int MAX_SENSITIVE_SHEET_NAME_LEN = 31;
+ /**
+ * Normally, the Workbook will be in a POIFS Stream called
+ * "Workbook". However, some weird XLS generators use "WORKBOOK"
+ * or "BOOK".
+ */
+ public static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
+ "Workbook", // as per BIFF8 spec
+ "WORKBOOK", // Typically from third party programs
+ "BOOK", // Typically odd Crystal Reports exports
+ };
private static final POILogger log =
POILogFactory.getLogger(InternalWorkbook.class);
private static final int DEBUG = POILogger.DEBUG;
Modified: poi/trunk/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java?rev=1732579&r1=1732578&r2=1732579&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
(original)
+++ poi/trunk/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java Fri Feb
26 23:32:17 2016
@@ -17,6 +17,8 @@
package org.apache.poi.hssf.usermodel;
+import static
org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
+
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
@@ -95,7 +97,6 @@ import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
-
/**
* High level representation of a workbook. This is the first object most
users
* will construct whether they are reading or writing a workbook. It is also
the
@@ -243,17 +244,6 @@ public final class HSSFWorkbook extends
this(fs.getRoot(), fs, preserveNodes);
}
- /**
- * Normally, the Workbook will be in a POIFS Stream
- * called "Workbook". However, some weird XLS generators use "WORKBOOK"
- */
- private static final String[] WORKBOOK_DIR_ENTRY_NAMES = {
- "Workbook", // as per BIFF8 spec
- "WORKBOOK", // Typically from third party programs
- "BOOK", // Typically odd Crystal Reports exports
- };
-
-
public static String getWorkbookDirEntryName(DirectoryNode directory) {
for (int i = 0; i < WORKBOOK_DIR_ENTRY_NAMES.length; i++) {
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java?rev=1732579&r1=1732578&r2=1732579&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
(original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java Fri
Feb 26 23:32:17 2016
@@ -66,6 +66,8 @@ import org.apache.poi.xwpf.extractor.XWP
import org.apache.poi.xwpf.usermodel.XWPFRelation;
import org.apache.xmlbeans.XmlException;
+import static
org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAMES;
+
/**
* Figures out the correct POITextExtractor for your supplied
* document, and returns it.
@@ -301,13 +303,13 @@ public class ExtractorFactory {
{
// Look for certain entries in the stream, to figure it
// out from
- if (poifsDir.hasEntry("Workbook") ||
- // some XLS files have different entry-names
- poifsDir.hasEntry("WORKBOOK") || poifsDir.hasEntry("BOOK")) {
- if (getPreferEventExtractor()) {
- return new EventBasedExcelExtractor(poifsDir);
+ for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
+ if (poifsDir.hasEntry(workbookName)) {
+ if (getPreferEventExtractor()) {
+ return new EventBasedExcelExtractor(poifsDir);
+ }
+ return new ExcelExtractor(poifsDir);
}
- return new ExcelExtractor(poifsDir);
}
if (poifsDir.hasEntry("WordDocument")) {
Modified:
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=1732579&r1=1732578&r2=1732579&view=diff
==============================================================================
---
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
(original)
+++
poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
Fri Feb 26 23:32:17 2016
@@ -54,6 +54,7 @@ import org.apache.poi.xssf.extractor.XSS
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.xmlbeans.XmlException;
import org.junit.BeforeClass;
+import org.junit.Ignore;
import org.junit.Test;
/**
@@ -920,4 +921,23 @@ public class TestExtractorFactory {
}
}
}
+
+ /**
+ * #59074 - No supported documents found in the OLE2 stream on
+ * a valid Excel file
+ */
+ @Ignore
+ @Test
+ public void a() throws Exception {
+ POITextExtractor ext = ExtractorFactory.createExtractor(
+ POIDataSamples.getSpreadSheetInstance().getFile("59074.xls"));
+ assertNotNull(ext);
+
+ String text = ext.getText();
+ ext.close();
+
+System.err.println(text);
+ assertNotNull(text);
+ assertTrue(text.contains("test"));
+ }
}
Modified: poi/trunk/src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java?rev=1732579&r1=1732578&r2=1732579&view=diff
==============================================================================
--- poi/trunk/src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java
(original)
+++ poi/trunk/src/testcases/org/apache/poi/hssf/dev/TestBiffViewer.java Fri Feb
26 23:32:17 2016
@@ -36,6 +36,7 @@ public class TestBiffViewer extends Base
EXCLUDED.add("43493.xls"); // HSSFWorkbook cannot open it
as well
EXCLUDED.add("password.xls");
EXCLUDED.add("46904.xls");
+ EXCLUDED.add("59074.xls"); // Biff 5 / Excel 95
EXCLUDED.add("35897-type4.xls"); // unsupported crypto api header
EXCLUDED.add("xor-encryption-abc.xls"); // unsupported
XOR-encryption
EXCLUDED.add("testEXCEL_2.xls"); // Biff 2 / Excel 2, pre-OLE2
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]