Author: tallison
Date: Mon May 12 15:14:09 2014
New Revision: 1593996

URL: http://svn.apache.org/r1593996
Log:
TIKA-1231: added more null checks after underlying fix was made in PDFBox-1.8.5

Modified:
    tika/trunk/CHANGES.txt
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1593996&r1=1593995&r2=1593996&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon May 12 15:14:09 2014
@@ -1,5 +1,7 @@
 Release 1.6 - ??/??/2014
 
+  * Upgrade to PDFBox 1.8.5 (TIKA-1290, TIKA-1231, TIKA-1233)
+
   * Zip Container Detection for DWFX and XPS formats, which are OPC
     based (TIKA-1204, TIKA-1221)
 

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java?rev=1593996&r1=1593995&r2=1593996&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java 
(original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDF2XHTML.java 
Mon May 12 15:14:09 2014
@@ -440,8 +440,15 @@ class PDF2XHTML extends PDFTextStripper 
         EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor();
         for (Map.Entry<String,COSObjectable> ent : 
embeddedFileNames.entrySet()) {
             PDComplexFileSpecification spec = (PDComplexFileSpecification) 
ent.getValue();
+            if (spec == null) {
+                //skip silently
+                continue;
+            }
             PDEmbeddedFile file = spec.getEmbeddedFile();
-
+            if (file == null) {
+                //skip silently
+                continue;
+            }
             Metadata metadata = new Metadata();
             // TODO: other metadata?
             metadata.set(Metadata.RESOURCE_NAME_KEY, ent.getKey());


Reply via email to