Author: centic
Date: Mon Mar 16 19:55:04 2020
New Revision: 1875267
URL: http://svn.apache.org/viewvc?rev=1875267&view=rev
Log:
Adjust POIFileScanner for mass regression testing
Fix closing file-handles when scanning for files to test
Refactor to make it easier to test
Some more tests
Modified:
poi/trunk/src/integrationtest/org/apache/poi/POIFileScanner.java
Modified: poi/trunk/src/integrationtest/org/apache/poi/POIFileScanner.java
URL:
http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/POIFileScanner.java?rev=1875267&r1=1875266&r2=1875267&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/POIFileScanner.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/POIFileScanner.java Mon Mar 16
19:55:04 2020
@@ -23,10 +23,15 @@ import org.apache.poi.hssf.usermodel.HSS
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.poifs.filesystem.FileMagic;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.stress.FileHandler;
+import org.apache.poi.stress.XSSFFileHandler;
+import org.apache.poi.util.SuppressForbidden;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.tools.ant.DirectoryScanner;
+import org.junit.Ignore;
+import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
@@ -37,6 +42,8 @@ import java.util.Collection;
import java.util.List;
import java.util.Map;
+import static org.junit.Assert.assertEquals;
+
/**
* Helper class to scan a folder for files and return a collection of
* found files together with the matching {@link FileHandler}.
@@ -55,7 +62,6 @@ public class POIFileScanner {
* @throws IOException If determining the file-type fails
*/
public static Collection<Map.Entry<String, FileHandler>> scan(File
rootDir) throws IOException {
-
DirectoryScanner scanner = new DirectoryScanner();
scanner.setBasedir(rootDir);
@@ -65,10 +71,11 @@ public class POIFileScanner {
scanner.scan();
- System.out.println("Handling " + scanner.getIncludedFiles().length + "
files");
+ String[] includedFiles = scanner.getIncludedFiles();
+ System.out.println("Handling " + includedFiles.length + " files");
List<Map.Entry<String, FileHandler>> files = new ArrayList<>();
- for(String file : scanner.getIncludedFiles()) {
+ for(String file : includedFiles) {
// breaks files with slash in their name on Linux:
// file = file.replace('\\', '/'); // ... failures/handlers lookup
doesn't work on windows otherwise
@@ -100,71 +107,99 @@ public class POIFileScanner {
protected static FileHandler getFileHandler(File rootDir, String file)
throws IOException {
FileHandler fileHandler =
TestAllFiles.HANDLERS.get(TestAllFiles.getExtension(file));
if(fileHandler == null) {
- File testFile = new File(rootDir, file);
- FileMagic magic = FileMagic.valueOf(testFile);
- // if we have a file-type that we can read, but no extension, we
try to determine the
- // file type manually
+ // we could not detect a type of file based on the extension, so we
+ // need to take a close look at the file
+ fileHandler = detectUnnamedFile(rootDir, file);
+ }
+ return fileHandler;
+ }
- switch(magic) {
- case OLE2: {
- try {
- try (POIFSFileSystem fs = new
POIFSFileSystem(testFile, true)) {
- HSSFWorkbook.getWorkbookDirEntryName(fs.getRoot());
- }
+ private static FileHandler detectUnnamedFile(File rootDir, String file)
throws IOException {
+ File testFile = new File(rootDir, file);
- // we did not get an exception, so it seems this is a
HSSFWorkbook
- fileHandler = TestAllFiles.HANDLERS.get(".xls");
- } catch (IOException | RuntimeException e) {
- try {
- try (FileInputStream istream = new
FileInputStream(testFile)) {
- try (HWPFDocument ignored = new
HWPFDocument(istream)) {
- // seems to be a valid document
- fileHandler =
TestAllFiles.HANDLERS.get(".doc");
- }
+ // find out if it looks like OLE2 (HSSF, HSLF, HWPF, ...) or OOXML
(XSSF, XSLF, XWPF, ...)
+ // and then determine the file type accordingly
+ FileMagic magic = FileMagic.valueOf(testFile);
+ switch (magic) {
+ case OLE2: {
+ try {
+ try (POIFSFileSystem fs = new POIFSFileSystem(testFile,
true)) {
+ HSSFWorkbook.getWorkbookDirEntryName(fs.getRoot());
+ }
+
+ // we did not get an exception, so it seems this is a
HSSFWorkbook
+ return TestAllFiles.HANDLERS.get(".xls");
+ } catch (IOException | RuntimeException e) {
+ try {
+ try (FileInputStream istream = new
FileInputStream(testFile)) {
+ try (HWPFDocument ignored = new
HWPFDocument(istream)) {
+ // seems to be a valid document
+ return TestAllFiles.HANDLERS.get(".doc");
}
- } catch (IOException | RuntimeException e2) {
- System.out.println("Could not open POIFSFileSystem
for OLE2 file " + testFile + ": " + e + " and " + e2);
- fileHandler = new TestAllFiles.NullFileHandler();
}
+ } catch (IOException | RuntimeException e2) {
+ System.out.println("Could not open POIFSFileSystem for
OLE2 file " + testFile + ": " + e + " and " + e2);
+ return new TestAllFiles.NullFileHandler();
}
- break;
}
- case OOXML: {
- try {
- WorkbookFactory.create(testFile);
-
+ }
+ case OOXML: {
+ try {
+ try (Workbook ignored = WorkbookFactory.create(testFile,
null, true)) {
// seems to be a valid workbook
- fileHandler = TestAllFiles.HANDLERS.get(".xlsx");
- } catch (IOException | RuntimeException e) {
- try {
- try (FileInputStream is = new
FileInputStream(testFile)) {
- try (XWPFDocument ignored = new
XWPFDocument(is)) {
- // seems to be a valid document
- fileHandler =
TestAllFiles.HANDLERS.get(".docx");
- }
+ return TestAllFiles.HANDLERS.get(".xlsx");
+ }
+ } catch (IOException | RuntimeException e) {
+ try {
+ try (FileInputStream is = new
FileInputStream(testFile)) {
+ try (XWPFDocument ignored = new XWPFDocument(is)) {
+ // seems to be a valid document
+ return TestAllFiles.HANDLERS.get(".docx");
}
- } catch (IOException | RuntimeException e2) {
- System.out.println("Could not open POIFSFileSystem
for OOXML file " + testFile + ": " + e + " and " + e2);
- fileHandler = new TestAllFiles.NullFileHandler();
}
+ } catch (IOException | RuntimeException e2) {
+ System.out.println("Could not open POIFSFileSystem for
OOXML file " + testFile + ": " + e + " and " + e2);
+ return new TestAllFiles.NullFileHandler();
}
- break;
}
-
- // do not warn about a few detected file types
- case RTF:
- case PDF:
- case HTML:
- fileHandler = new TestAllFiles.NullFileHandler();
- break;
}
- if(fileHandler == null) {
- System.out.println("Did not get a handler for extension " +
TestAllFiles.getExtension(file) +
- " of file " + file + ": " + magic);
- fileHandler = new TestAllFiles.NullFileHandler();
- }
+ // do not warn about a few detected file types
+ case RTF:
+ case PDF:
+ case HTML:
+ return new TestAllFiles.NullFileHandler();
}
- return fileHandler;
+
+ System.out.println("Did not get a handler for extension " +
TestAllFiles.getExtension(file) +
+ " of file " + file + ": " + magic);
+ return new TestAllFiles.NullFileHandler();
+ }
+
+ @Ignore
+ @Test
+ @SuppressForbidden("Just an ignored test")
+ public void testInvalidFile() throws IOException, InterruptedException {
+ FileHandler fileHandler = POIFileScanner.getFileHandler(new
File("/usbc/CommonCrawl"),
+
"www.bgs.ac.uk_downloads_directdownload.cfm_id=2362&noexcl=true&t=west_20sussex_20-_20building_20stone_20quarries");
+
+ assertEquals(XSSFFileHandler.class, fileHandler.getClass());
+
+ // to show the output from ZipFile() from commons-compress
+ // although I did not find out yet why the ZipFile is not closed here
+ System.gc();
+ Thread.sleep(1000);
+ System.gc();
+ Thread.sleep(1000);
+ }
+
+ @Test
+ public void testDetectUnnamedFile() throws IOException {
+ POIFileScanner.detectUnnamedFile(new File("test-data/spreadsheet"),
"49156.xlsx");
+ }
+
+ @Test
+ public void test() throws IOException {
+ POIFileScanner.scan(new File("test-data"));
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]