Repository: tika
Updated Branches:
  refs/heads/master 07aea36f7 -> 415381212


TIKA-2013 -- upgrade to POI 3.15-final, make sure to add new close() throughout 
for MAPIMessage and NPOIFS


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/cc6f6dcc
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/cc6f6dcc
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/cc6f6dcc

Branch: refs/heads/master
Commit: cc6f6dcc8fed2826ae8093b7a4aed0ddee74dc40
Parents: 07aea36
Author: tballison <talli...@mitre.org>
Authored: Wed Sep 21 13:23:49 2016 -0400
Committer: tballison <talli...@mitre.org>
Committed: Wed Sep 21 13:23:49 2016 -0400

----------------------------------------------------------------------
 CHANGES.txt                                     |  4 +--
 tika-bundle/pom.xml                             |  2 +-
 tika-parsers/pom.xml                            |  2 +-
 .../parser/microsoft/JackcessExtractor.java     |  5 +--
 .../tika/parser/microsoft/OfficeParser.java     | 38 ++++++++++++--------
 .../tika/parser/microsoft/OutlookExtractor.java | 12 +++++--
 6 files changed, 40 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tika/blob/cc6f6dcc/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 6f0fda6..6597dc9 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
 Release 1.14 - ???
 
+  * Upgrade to POI.3-15 (TIKA-2013).
+
   * Upgrade to PDFBox 2.0.3 (TIKA-2051).
 
   * Fix hyperlinks with formatting in DOC and DOCX (TIKA-1255
@@ -42,8 +44,6 @@ Release 1.14 - ???
      * MBOX (TIKA-2042)
      * Stata DTA (TIKA-2064)
 
-  * Upgrade to PDFBox 2.0.2 (TIKA-1996).
-
   * Add configurable maximum threshold for number of events extracted
     from the XMP Media Management Schema in JempboxExtractor (TIKA-1999).
 

http://git-wip-us.apache.org/repos/asf/tika/blob/cc6f6dcc/tika-bundle/pom.xml
----------------------------------------------------------------------
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index d350f10..02247f1 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -126,7 +126,7 @@
             <Embed-Dependency>
               tika-parsers;inline=true,
               commons-compress, xz, commons-codec, commons-csv,
-              commons-io, commons-exec, junrar,
+              commons-io, commons-exec, commons-collections4, junrar,
               
pdfbox,pdfbox-tools,pdfbox-debugger,fontbox,jempbox,bcmail-jdk15on,bcprov-jdk15on,bcpkix-jdk15on,
               poi,poi-scratchpad,poi-ooxml,poi-ooxml-schemas,
               curvesapi,

http://git-wip-us.apache.org/repos/asf/tika/blob/cc6f6dcc/tika-parsers/pom.xml
----------------------------------------------------------------------
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index 06fec12..06e2520 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -35,7 +35,7 @@
   <url>http://tika.apache.org/</url>
 
   <properties>
-    <poi.version>3.15-beta1</poi.version>
+    <poi.version>3.15</poi.version>
     <!-- NOTE: sync codec version with POI -->
     <codec.version>1.10</codec.version>
     <!-- NOTE: sync tukaani version with commons-compress in tika-parent-->

http://git-wip-us.apache.org/repos/asf/tika/blob/cc6f6dcc/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
index 11a88c2..4d45059 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/JackcessExtractor.java
@@ -326,8 +326,9 @@ class JackcessExtractor extends AbstractPOIFSExtractor {
     }
 
     private void handleCompoundContent(OleBlob.CompoundContent cc, 
XHTMLContentHandler xhtml) throws IOException, SAXException, TikaException {
-        NPOIFSFileSystem nfs = new NPOIFSFileSystem(cc.getStream());
-        handleEmbeddedOfficeDoc(nfs.getRoot(), xhtml);
+        try (NPOIFSFileSystem nfs = new NPOIFSFileSystem(cc.getStream())) {
+            handleEmbeddedOfficeDoc(nfs.getRoot(), xhtml);
+        }
     }
 
     String formatCurrency(Double d, DataType type) {

http://git-wip-us.apache.org/repos/asf/tika/blob/cc6f6dcc/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
index f5f9f3e..b6681aa 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
@@ -35,6 +35,7 @@ import org.apache.poi.poifs.filesystem.DirectoryNode;
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.IOUtils;
 import org.apache.tika.exception.EncryptedDocumentException;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TikaInputStream;
@@ -95,26 +96,33 @@ public class OfficeParser extends AbstractParser {
 
         final DirectoryNode root;
         TikaInputStream tstream = TikaInputStream.cast(stream);
-        if (tstream == null) {
-            root = new NPOIFSFileSystem(new 
CloseShieldInputStream(stream)).getRoot();
-        } else {
-            final Object container = tstream.getOpenContainer();
-            if (container instanceof NPOIFSFileSystem) {
-                root = ((NPOIFSFileSystem) container).getRoot();
-            } else if (container instanceof DirectoryNode) {
-                root = (DirectoryNode) container;
+        NPOIFSFileSystem mustCloseFs = null;
+        try {
+            if (tstream == null) {
+                mustCloseFs = new NPOIFSFileSystem(new 
CloseShieldInputStream(stream));
+                root = mustCloseFs.getRoot();
             } else {
-                NPOIFSFileSystem fs;
-                if (tstream.hasFile()) {
-                    fs = new NPOIFSFileSystem(tstream.getFile(), true);
+                final Object container = tstream.getOpenContainer();
+                if (container instanceof NPOIFSFileSystem) {
+                    root = ((NPOIFSFileSystem) container).getRoot();
+                } else if (container instanceof DirectoryNode) {
+                    root = (DirectoryNode) container;
                 } else {
-                    fs = new NPOIFSFileSystem(new 
CloseShieldInputStream(tstream));
+                    NPOIFSFileSystem fs = null;
+                    if (tstream.hasFile()) {
+                        fs = new NPOIFSFileSystem(tstream.getFile(), true);
+                    } else {
+                        fs = new NPOIFSFileSystem(new 
CloseShieldInputStream(tstream));
+                    }
+                    //tstream will close the fs, no need to close this below
+                    tstream.setOpenContainer(fs);
+                    root = fs.getRoot();
                 }
-                tstream.setOpenContainer(fs);
-                root = fs.getRoot();
             }
+            parse(root, context, metadata, xhtml);
+        } finally {
+            IOUtils.closeQuietly(mustCloseFs);
         }
-        parse(root, context, metadata, xhtml);
         xhtml.endDocument();
     }
 

http://git-wip-us.apache.org/repos/asf/tika/blob/cc6f6dcc/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
----------------------------------------------------------------------
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 14397b9..c40a3f4 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -16,6 +16,8 @@
  */
 package org.apache.tika.parser.microsoft;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
@@ -60,8 +62,6 @@ import org.apache.tika.sax.EmbeddedContentHandler;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.SAXException;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 /**
  * Outlook Message Parser.
  */
@@ -254,6 +254,14 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
             }
         } catch (ChunkNotFoundException e) {
             throw new TikaException("POI MAPIMessage broken - didn't return 
null on missing chunk", e);
+        } finally {
+            if (msg != null) {
+                try {
+                    msg.close();
+                } catch (IOException e) {
+                    //swallow
+                }
+            }
         }
     }
 

Reply via email to