Author: centic
Date: Wed Oct 4 19:54:21 2017
New Revision: 1811144
URL: http://svn.apache.org/viewvc?rev=1811144&view=rev
Log:
Add some more code from the separate integration test project to be able to
publish the femaining functionality as separate project at some point
Added:
poi/trunk/src/integrationtest/org/apache/poi/BaseIntegrationTest.java
poi/trunk/src/integrationtest/org/apache/poi/stress/FileHandlerFactory.java
Modified:
poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java
Added: poi/trunk/src/integrationtest/org/apache/poi/BaseIntegrationTest.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/BaseIntegrationTest.java?rev=1811144&view=auto
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/BaseIntegrationTest.java
(added)
+++ poi/trunk/src/integrationtest/org/apache/poi/BaseIntegrationTest.java Wed
Oct 4 19:54:21 2017
@@ -0,0 +1,138 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
+import org.apache.poi.hssf.OldExcelFormatException;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
+import org.apache.poi.stress.*;
+import org.junit.Assume;
+
+import java.io.*;
+import java.util.zip.ZipException;
+
+import static org.junit.Assert.assertNotNull;
+
+public class BaseIntegrationTest {
+ private final File rootDir;
+ private String file;
+ private FileHandler handler;
+
+ public BaseIntegrationTest(File rootDir, String file, FileHandler
handler) {
+ this.rootDir = rootDir;
+ this.file = file;
+ this.handler = handler;
+ }
+
+ public void test() throws Exception {
+ assertNotNull("Unknown file extension for file: " + file + ": " +
TestAllFiles.getExtension(file), handler);
+
+ File inputFile = new File(rootDir, file);
+ try {
+ handleFile(inputFile);
+ } catch (OfficeXmlFileException e) {
+ // check if the file-extension is wrong
+ if(!e.getMessage().contains("data appears to be in the Office
2007")) {
+ throw e;
+ }
+
+ // use XWPF instead of HWPF and XSSF instead of HSSF as the
file seems to have the wrong extension
+ handleWrongExtension(inputFile, e);
+ } catch (OldWordFileFormatException | OldExcelFormatException |
OldPowerPointFormatException e) {
+ // at least perform extracting tests on these old files
+ } catch (OldFileFormatException e) {
+ // Not even text extraction is supported for these:
handler.handleExtracting(inputFile);
+ //noinspection ConstantConditions
+ Assume.assumeFalse("File " + file + " excluded because
it is unsupported old Excel format", true);
+ } catch (EncryptedDocumentException e) {
+ // Do not try to read encrypted files
+ //noinspection ConstantConditions
+ Assume.assumeFalse("File " + file + " excluded because
it is password-encrypted", true);
+ } catch (ZipException e) {
+ // some files are corrupted
+ if (e.getMessage().equals("unexpected EOF")) {
+ //noinspection ConstantConditions
+ Assume.assumeFalse("File " + file + " excluded
because the Zip file is incomplete", true);
+ }
+
+ throw e;
+ } catch (IOException e) {
+ // sometimes binary format has XML-format-extension...
+ if(e.getMessage().contains("rong file format or file
extension for OO XML file")) {
+ handleWrongExtension(inputFile, e);
+ } else {
+ throw e;
+ }
+ } catch (IllegalArgumentException e) {
+ // ignore errors for documents with incorrect extension
+ String message = e.getMessage();
+ if(message != null && (message.equals("The document is
really a RTF file") ||
+ message.equals("The document is really a PDF
file") ||
+ message.equals("The document is really
a HTML file"))) {
+ //noinspection ConstantConditions
+ Assume.assumeFalse("File " + file + " excluded
because it is actually a PDF/RTF file", true);
+ }
+
+ if(e.getMessage().equals("The document is really a
OOXML file")) {
+ handleWrongExtension(inputFile, e);
+ } else {
+ throw e;
+ }
+ }
+
+ try {
+ handler.handleExtracting(inputFile);
+ } catch (EncryptedDocumentException e) {
+ // Do not try to read encrypted files
+ //noinspection ConstantConditions
+ Assume.assumeFalse("File " + file + " excluded because
it is password-encrypted", true);
+ }
+ }
+
+ void handleWrongExtension(File inputFile, Exception e) throws Exception
{
+ // use XWPF instead of HWPF and XSSF instead of HSSF as the
file seems to have the wrong extension
+ if (handler instanceof HWPFFileHandler) {
+ handler = TestAllFiles.HANDLERS.get(".docx");
+ handleFile(inputFile);
+ } else if (handler instanceof HSSFFileHandler) {
+ handler = TestAllFiles.HANDLERS.get(".xlsx");
+ handleFile(inputFile);
+ } else if (handler instanceof HSLFFileHandler) {
+ handler = TestAllFiles.HANDLERS.get(".pptx");
+ handleFile(inputFile);
+ // and the other way around, use HWPF instead of XWPF and so
forth
+ } else if(handler instanceof XWPFFileHandler) {
+ handler = TestAllFiles.HANDLERS.get(".doc");
+ handleFile(inputFile);
+ } else if(handler instanceof XSSFFileHandler) {
+ handler = TestAllFiles.HANDLERS.get(".xls");
+ handleFile(inputFile);
+ } else if(handler instanceof XSLFFileHandler) {
+ handler = TestAllFiles.HANDLERS.get(".ppt");
+ handleFile(inputFile);
+ } else {
+ throw e;
+ }
+ }
+
+ private void handleFile(File inputFile) throws Exception {
+ try (InputStream newStream = new BufferedInputStream(new
FileInputStream(inputFile), 64*1024)) {
+ handler.handleFile(newStream,
inputFile.getAbsolutePath());
+ }
+ }
+}
Modified: poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java?rev=1811144&r1=1811143&r2=1811144&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java Wed Oct 4
19:54:21 2017
@@ -91,13 +91,13 @@ import org.junit.runners.Parameterized.P
public class TestAllFiles {
private static final File ROOT_DIR = new File("test-data");
- static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**",
"lost+found", "**/.git/**" };
+ public static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**",
"lost+found", "**/.git/**" };
private static final Map<String,String> FILE_PASSWORD;
// map file extensions to the actual mappers
- static final Map<String, FileHandler> HANDLERS = new HashMap<>();
+ public static final Map<String, FileHandler> HANDLERS = new HashMap<>();
static {
// Excel
HANDLERS.put(".xls", new HSSFFileHandler());
@@ -443,7 +443,7 @@ public class TestAllFiles {
handler.handleAdditional(inputFile);
}
- static String getExtension(String file) {
+ public static String getExtension(String file) {
int pos = file.lastIndexOf('.');
if(pos == -1 || pos == file.length()-1) {
return file;
@@ -452,7 +452,7 @@ public class TestAllFiles {
return file.substring(pos).toLowerCase(Locale.ROOT);
}
- private static class NullFileHandler implements FileHandler {
+ public static class NullFileHandler implements FileHandler {
@Override
public void handleFile(InputStream stream, String path) throws
Exception {
}
Added:
poi/trunk/src/integrationtest/org/apache/poi/stress/FileHandlerFactory.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/stress/FileHandlerFactory.java?rev=1811144&view=auto
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/stress/FileHandlerFactory.java
(added)
+++ poi/trunk/src/integrationtest/org/apache/poi/stress/FileHandlerFactory.java
Wed Oct 4 19:54:21 2017
@@ -0,0 +1,120 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+public class FileHandlerFactory {
+ // map from patterns for mimetypes to the FileHandlers that should be able
to
+ // work with that file
+ // use a Set<Pair> to have a defined order of applying the matches
+ private static final Map<Pattern, FileHandler> MIME_TYPES = new
HashMap<>();
+ static {
+ ////////////////// Word
+
+
MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.document.macroenabled.12"),
new XWPFFileHandler());
+
MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.template.macroenabled.12"),
new XWPFFileHandler());
+
+ // application/msword
+ MIME_TYPES.put(Pattern.compile(".*msword.*"), new HWPFFileHandler());
+ // application/vnd.ms-word
+ MIME_TYPES.put(Pattern.compile(".*ms-word.*"), new HWPFFileHandler());
+
+ //
application/vnd.openxmlformats-officedocument.wordprocessingml.document
+ MIME_TYPES.put(Pattern.compile(".*wordprocessingml.*"), new
XWPFFileHandler());
+
+ ////////////////// Excel
+
MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.addin.macroEnabled.12"),
new XSSFFileHandler());
+
MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.sheet.binary.macroEnabled.12"),
new XSSFFileHandler());
+
+ // application/msexcel
+ MIME_TYPES.put(Pattern.compile(".*msexcel.*"), new HSSFFileHandler());
+ // application/vnd.ms-excel
+ MIME_TYPES.put(Pattern.compile(".*ms-excel.*"), new HSSFFileHandler());
+
+ // application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+ MIME_TYPES.put(Pattern.compile(".*spreadsheetml.*"), new
XSSFFileHandler());
+
+ ////////////////// Powerpoint
+
+ // application/vnd.ms-powerpoint
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint"), new
HSLFFileHandler());
+ // application/vnd.ms-officetheme
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-officetheme"), new
HSLFFileHandler());
+
+ //
application/vnd.openxmlformats-officedocument.presentationml.presentation
+ MIME_TYPES.put(Pattern.compile(".*presentationml.*"), new
XSLFFileHandler());
+ // application/vnd.ms-powerpoint.presentation.macroenabled.12
+
MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.presentation.macroenabled.12"),
new XSLFFileHandler());
+ // application/vnd.ms-powerpoint.slideshow.macroenabled.12
+
MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.slideshow.macroenabled.12"),
new XSLFFileHandler());
+
+ ////////////////// Mail/TNEF
+
+ // application/vnd.ms-tnef
+ MIME_TYPES.put(Pattern.compile(".*ms-tnef.*"), new HMEFFileHandler());
+
+ // application/vnd.ms-outlook
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-outlook"), new
HSMFFileHandler());
+
+ ////////////////// Visio
+
+ // application/vnd.visio
+ MIME_TYPES.put(Pattern.compile("application/vnd.visio.*"), new
HDGFFileHandler());
+
+ // application/vnd.ms-visio.drawing
+ MIME_TYPES.put(Pattern.compile(".*vnd.ms-visio\\."), new
XDGFFileHandler());
+
+ //application/vnd.ms-visio.viewer
+ MIME_TYPES.put(Pattern.compile(".*visio.*"), new HDGFFileHandler());
+
+
+ ////////////////// Publisher
+
+ // application/x-mspublisher
+ MIME_TYPES.put(Pattern.compile("application/x-mspublisher"), new
HPBFFileHandler());
+
+
+ ////////////////// Others
+
+ // special type used by Tika
+ MIME_TYPES.put(Pattern.compile("application/x-tika-ooxml.*"), new
OPCFileHandler());
+ // special type used by Tika
+ MIME_TYPES.put(Pattern.compile("application/x-tika-msoffice.*"), new
POIFSFileHandler());
+
+ // application/x-tika-old-excel
+ MIME_TYPES.put(Pattern.compile("application/x-tika-old-excel"), new
POIFSFileHandler());
+
+ // application/vnd.openxmlformats-officedocument.drawingml.chart+xml
+ // ?!MIME_TYPES.put(Pattern.compile(".*drawingml.*"), ".dwg");
+
+ // application/vnd.openxmlformats-officedocument.vmlDrawing
+ // ?!MIME_TYPES.put(Pattern.compile(".*vmlDrawing.*"), ".dwg");
+ }
+
+ public static FileHandler getHandler(String mimeType) {
+ for(Map.Entry<Pattern,FileHandler> entry : MIME_TYPES.entrySet()) {
+ if(entry.getKey().matcher(mimeType).matches()) {
+ return entry.getValue();
+ }
+ }
+
+ return null;
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]