Author: tallison Date: Tue Aug 5 19:02:11 2014 New Revision: 1615980 URL: http://svn.apache.org/r1615980 Log: TIKA-1380; fix cases where ole.getLabel() == null for ole attachments
Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1615980&r1=1615979&r2=1615980&view=diff ============================================================================== --- tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java (original) +++ tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java Tue Aug 5 19:02:11 2014 @@ -25,6 +25,7 @@ import org.apache.commons.io.FileUtils; import org.junit.After; import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertFalse; import org.junit.Before; import org.junit.Test; @@ -236,7 +237,7 @@ public class TikaCLITest { // Image of the ChemDraw molecule File expectedIMG = new File(tempFile, "file4.png"); // OLE10Native - File expectedOLE10 = new File(tempFile, "MBD002B0FA6"); + File expectedOLE10 = new File(tempFile, "MBD002B0FA6_file5.bin"); // Something that really isnt a text file... Not sure what it is??? File expected262FE3 = new File(tempFile, "MBD00262FE3.txt"); // Image of one of the embedded resources @@ -252,10 +253,16 @@ public class TikaCLITest { } } protected static void assertExtracted(File f, String allFiles) { + assertTrue( "File " + f.getName() + " not found in " + allFiles, f.exists() ); + + assertFalse( + "File " + f.getName() + " is a directory!", f.isDirectory() + ); + assertTrue( "File " + f.getName() + " wasn't extracted with contents", f.length() > 0 Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java?rev=1615980&r1=1615979&r2=1615980&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java Tue Aug 5 19:02:11 2014 @@ -149,8 +149,9 @@ abstract class AbstractPOIFSExtractor { try { // Try to un-wrap the OLE10Native record: Ole10Native ole = Ole10Native.createFromEmbeddedOleObject((DirectoryNode)dir); - metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '/' + ole.getLabel()); - + if (ole.getLabel() != null) { + metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + '/' + ole.getLabel()); + } byte[] data = ole.getDataBuffer(); embedded = TikaInputStream.get(data); } catch (Ole10NativeException ex) { Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java?rev=1615980&r1=1615979&r2=1615980&view=diff ============================================================================== --- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java (original) +++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java Tue Aug 5 19:02:11 2014 @@ -248,7 +248,9 @@ public abstract class AbstractOOXMLExtra // TIKA-704: OLE 1.0 embedded document Ole10Native ole = Ole10Native.createFromEmbeddedOleObject(fs); - metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel()); + if (ole.getLabel() != null) { + metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel()); + } byte[] data = ole.getDataBuffer(); if (data != null) { stream = TikaInputStream.get(data);