Author: tallison
Date: Tue Aug  5 19:02:11 2014
New Revision: 1615980

URL: http://svn.apache.org/r1615980
Log:
TIKA-1380; fix cases where ole.getLabel() == null for ole attachments

Modified:
    tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java

Modified: tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java?rev=1615980&r1=1615979&r2=1615980&view=diff
==============================================================================
--- tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java 
(original)
+++ tika/trunk/tika-app/src/test/java/org/apache/tika/cli/TikaCLITest.java Tue 
Aug  5 19:02:11 2014
@@ -25,6 +25,7 @@ import org.apache.commons.io.FileUtils;
 
 import org.junit.After;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertFalse;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -236,7 +237,7 @@ public class TikaCLITest {
             // Image of the ChemDraw molecule
             File expectedIMG = new File(tempFile, "file4.png");
             // OLE10Native
-            File expectedOLE10 = new File(tempFile, "MBD002B0FA6");
+            File expectedOLE10 = new File(tempFile, "MBD002B0FA6_file5.bin");
             // Something that really isnt a text file... Not sure what it is???
             File expected262FE3 = new File(tempFile, "MBD00262FE3.txt");
             // Image of one of the embedded resources
@@ -252,10 +253,16 @@ public class TikaCLITest {
         }
     }
     protected static void assertExtracted(File f, String allFiles) {
+
         assertTrue(
                 "File " + f.getName() + " not found in " + allFiles,
                 f.exists()
         );
+
+        assertFalse(
+                "File " + f.getName() + " is a directory!", f.isDirectory()
+        );
+
         assertTrue(
                 "File " + f.getName() + " wasn't extracted with contents",
                 f.length() > 0

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java?rev=1615980&r1=1615979&r2=1615980&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/AbstractPOIFSExtractor.java
 Tue Aug  5 19:02:11 2014
@@ -149,8 +149,9 @@ abstract class AbstractPOIFSExtractor {
                 try {
                     // Try to un-wrap the OLE10Native record:
                     Ole10Native ole = 
Ole10Native.createFromEmbeddedOleObject((DirectoryNode)dir);
-                    metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() + 
'/' + ole.getLabel());
-                    
+                    if (ole.getLabel() != null) {
+                        metadata.set(Metadata.RESOURCE_NAME_KEY, dir.getName() 
+ '/' + ole.getLabel());
+                    }
                     byte[] data = ole.getDataBuffer();
                     embedded = TikaInputStream.get(data);
                 } catch (Ole10NativeException ex) {

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java?rev=1615980&r1=1615979&r2=1615980&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/AbstractOOXMLExtractor.java
 Tue Aug  5 19:02:11 2014
@@ -248,7 +248,9 @@ public abstract class AbstractOOXMLExtra
                 // TIKA-704: OLE 1.0 embedded document
                 Ole10Native ole =
                         Ole10Native.createFromEmbeddedOleObject(fs);
-                metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
+                if (ole.getLabel() != null) {
+                    metadata.set(Metadata.RESOURCE_NAME_KEY, ole.getLabel());
+                }
                 byte[] data = ole.getDataBuffer();
                 if (data != null) {
                     stream = TikaInputStream.get(data);


Reply via email to