tika git commit: TIKA-2246 and TIKA-2247 -add parsers for EMF and WMF

2017-02-06 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/2.x d9f376c12 -> 6bfe5d565


TIKA-2246 and TIKA-2247 -add parsers for EMF and WMF


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/6bfe5d56
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/6bfe5d56
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/6bfe5d56

Branch: refs/heads/2.x
Commit: 6bfe5d565bd3fbf55a538c39047294814cae0767
Parents: d9f376c
Author: tballison 
Authored: Mon Feb 6 14:31:52 2017 -0500
Committer: tballison 
Committed: Mon Feb 6 14:31:52 2017 -0500

--
 CHANGES.txt |   2 +
 .../org/apache/tika/module/office/BundleIT.java |   2 +-
 .../apache/tika/parser/microsoft/EMFParser.java | 163 +++
 .../apache/tika/parser/microsoft/WMFParser.java |  98 +++
 .../services/org.apache.tika.parser.Parser  |   2 +
 .../tika/parser/microsoft/EMFParserTest.java|  68 
 .../tika/parser/microsoft/WMFParserTest.java|  42 +
 .../apache/tika/parser/rtf/RTFParserTest.java   |  40 ++---
 .../testEXCEL_embeddedPDF_mac.xls   | Bin 0 -> 69632 bytes
 .../testEXCEL_embeddedPDF_mac.xlsx  | Bin 0 -> 80578 bytes
 .../testEXCEL_embeddedPDF_windows.xls   | Bin 0 -> 61952 bytes
 .../testEXCEL_embeddedPDF_windows.xlsx  | Bin 0 -> 49843 bytes
 12 files changed, 396 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/tika/blob/6bfe5d56/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 45e9651..72fc96c 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -17,6 +17,8 @@ Release 2.0 - ???
 
 Release 1.15 -???
 
+  * Add parsers for EMF/WMF files (TIKA-2246/TIKA-2247).
+
   * Official mime types for BMP, EMF and WMF have been registered with
 IANA, so switch to these (image/bmp image/emf image/wmf) (TIKA-2250)
 

http://git-wip-us.apache.org/repos/asf/tika/blob/6bfe5d56/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java
--
diff --git 
a/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java
 
b/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java
index 43d55dc..943589b 100644
--- 
a/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java
+++ 
b/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java
@@ -80,6 +80,6 @@ public class BundleIT {
 @Test
 public void testServicesCreated() throws Exception {
 ServiceReference[] services = 
bc.getAllServiceReferences(Parser.class.getName(), null);
-assertEquals("Not all Services have started", 29, services.length);
+assertEquals("Not all Services have started", 31, services.length);
 }
 }

http://git-wip-us.apache.org/repos/asf/tika/blob/6bfe5d56/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java
--
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java
 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java
new file mode 100644
index 000..be4bc14
--- /dev/null
+++ 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.poi.hemf.extractor.HemfExtractor;
+import org.apache.poi.hemf.record.AbstractHemfComment;
+import 

tika git commit: TIKA-2247 and TIKA-2246 -- add parsers for EMF/WMF

2017-02-06 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/master 27e026eff -> b9befb427


TIKA-2247 and TIKA-2246 -- add parsers for EMF/WMF


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/b9befb42
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/b9befb42
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/b9befb42

Branch: refs/heads/master
Commit: b9befb4272cf8b2bda3b3ea25b0511bbabfdeded
Parents: 27e026e
Author: tballison 
Authored: Mon Feb 6 14:31:09 2017 -0500
Committer: tballison 
Committed: Mon Feb 6 14:31:09 2017 -0500

--
 CHANGES.txt |   2 +
 .../apache/tika/parser/microsoft/EMFParser.java | 163 +++
 .../apache/tika/parser/microsoft/WMFParser.java |  98 +++
 .../services/org.apache.tika.parser.Parser  |   4 +-
 .../tika/parser/microsoft/EMFParserTest.java|  66 
 .../tika/parser/microsoft/WMFParserTest.java|  42 +
 .../apache/tika/parser/rtf/RTFParserTest.java   |  40 ++---
 .../testEXCEL_embeddedPDF_mac.xls   | Bin 0 -> 69632 bytes
 .../testEXCEL_embeddedPDF_mac.xlsx  | Bin 0 -> 80578 bytes
 .../testEXCEL_embeddedPDF_windows.xls   | Bin 0 -> 61952 bytes
 .../testEXCEL_embeddedPDF_windows.xlsx  | Bin 0 -> 49843 bytes
 11 files changed, 394 insertions(+), 21 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/tika/blob/b9befb42/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index bfe817d..b8e2dec 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,5 +1,7 @@
 Release 1.15 - ??
 
+  * Add parsers for EMF/WMF files (TIKA-2246/TIKA-2247).
+
   * Official mime types for BMP, EMF and WMF have been registered with
 IANA, so switch to these (image/bmp image/emf image/wmf) (TIKA-2250)
 

http://git-wip-us.apache.org/repos/asf/tika/blob/b9befb42/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java
--
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java
new file mode 100644
index 000..be4bc14
--- /dev/null
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java
@@ -0,0 +1,163 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.microsoft;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.poi.hemf.extractor.HemfExtractor;
+import org.apache.poi.hemf.record.AbstractHemfComment;
+import org.apache.poi.hemf.record.HemfCommentPublic;
+import org.apache.poi.hemf.record.HemfCommentRecord;
+import org.apache.poi.hemf.record.HemfRecord;
+import org.apache.poi.hemf.record.HemfRecordType;
+import org.apache.poi.hemf.record.HemfText;
+import org.apache.poi.util.RecordFormatException;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.extractor.EmbeddedDocumentExtractor;
+import org.apache.tika.extractor.EmbeddedDocumentUtil;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.EmbeddedContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Extracts files embedded in EMF and offers a
+ * very rough capability to extract text if there
+ * is text stored in the EMF.
+ * 
+ * To improve text extraction, we'd have to implement
+ * quite a bit more at the POI level.  We'd want to track changes
+ * in font and use that information for identifying character sets,
+ * inserting spaces and new lines.
+ */
+public class EMFParser extends AbstractParser {
+
+private static final MediaType 

tika git commit: TIKA-2134 - remove npe catch after upgrade to POI 3.16.beta2

2017-02-06 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/2.x 0d7f5bad0 -> d9f376c12


TIKA-2134 - remove npe catch after upgrade to POI 3.16.beta2


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/d9f376c1
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/d9f376c1
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/d9f376c1

Branch: refs/heads/2.x
Commit: d9f376c12fe1c0c56c96c866ba62fff3e6ebca2f
Parents: 0d7f5ba
Author: tballison 
Authored: Mon Feb 6 10:35:35 2017 -0500
Committer: tballison 
Committed: Mon Feb 6 10:35:35 2017 -0500

--
 .../parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java   | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/tika/blob/d9f376c1/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
--
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
index 45a6a84..f3d7377 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
@@ -159,12 +159,7 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
 for (String footer : sheetExtractor.footers) {
 extractHeaderFooter(footer, xhtml);
 }
-List shapes = null;
-try {
-shapes = iter.getShapes();
-} catch (NullPointerException e) {
-//missing shape
-}
+List shapes = iter.getShapes();
 processShapes(shapes, xhtml);
 
 //for now dump sheet hyperlinks at bottom of page



tika git commit: TIKA-2134 -- remove npe catch after upgrade to POI 3.16.beta2

2017-02-06 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/master bc3b26369 -> 27e026eff


TIKA-2134 -- remove npe catch after upgrade to POI 3.16.beta2


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/27e026ef
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/27e026ef
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/27e026ef

Branch: refs/heads/master
Commit: 27e026eff589f5829ddb4374de0d85daf37a9d2a
Parents: bc3b263
Author: tballison 
Authored: Mon Feb 6 10:34:25 2017 -0500
Committer: tballison 
Committed: Mon Feb 6 10:34:25 2017 -0500

--
 .../parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java   | 7 +--
 1 file changed, 1 insertion(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/tika/blob/27e026ef/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
--
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
index 45a6a84..f3d7377 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java
@@ -159,12 +159,7 @@ public class XSSFExcelExtractorDecorator extends 
AbstractOOXMLExtractor {
 for (String footer : sheetExtractor.footers) {
 extractHeaderFooter(footer, xhtml);
 }
-List shapes = null;
-try {
-shapes = iter.getShapes();
-} catch (NullPointerException e) {
-//missing shape
-}
+List shapes = iter.getShapes();
 processShapes(shapes, xhtml);
 
 //for now dump sheet hyperlinks at bottom of page



tika git commit: TIKA-2198 - add null check to Tika after upgrade to POI 3.16.beta2

2017-02-06 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/master 0d54f07fa -> bc3b26369


TIKA-2198 - add null check to Tika after upgrade to POI 3.16.beta2


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/bc3b2636
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/bc3b2636
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/bc3b2636

Branch: refs/heads/master
Commit: bc3b26369ffbeff83a6e27f57a564089661e5030
Parents: 0d54f07
Author: tballison 
Authored: Mon Feb 6 10:27:41 2017 -0500
Committer: tballison 
Committed: Mon Feb 6 10:27:41 2017 -0500

--
 .../main/java/org/apache/tika/parser/microsoft/ListManager.java  | 4 
 1 file changed, 4 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/tika/blob/bc3b2636/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
--
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
index a0f19e1..504573c 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
@@ -81,6 +81,10 @@ public class ListManager extends AbstractListManager {
 
 if (lc == null) {
 ListData listData = 
listTables.getListData(paragraph.getList().getLsid());
+if (listData == null) {
+//silently skip
+return "";
+}
 LevelTuple[] levelTuples = new 
LevelTuple[listData.getLevels().length];
 for (int i = 0; i < listData.getLevels().length; i++) {
 levelTuples[i] = buildTuple(i, listData.getLevels()[i]);



tika git commit: TIKA-2198 - add null check to Tika after upgrade to POI 3.16-beta2

2017-02-06 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/2.x 27e81b97a -> 0d7f5bad0


TIKA-2198 - add null check to Tika after upgrade to POI 3.16-beta2


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0d7f5bad
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0d7f5bad
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0d7f5bad

Branch: refs/heads/2.x
Commit: 0d7f5bad0ff61b14a1fad7816e109e1190b17bab
Parents: 27e81b9
Author: tballison 
Authored: Mon Feb 6 10:29:37 2017 -0500
Committer: tballison 
Committed: Mon Feb 6 10:29:37 2017 -0500

--
 .../main/java/org/apache/tika/parser/microsoft/ListManager.java  | 4 
 1 file changed, 4 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/tika/blob/0d7f5bad/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
--
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
index a0f19e1..504573c 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java
@@ -81,6 +81,10 @@ public class ListManager extends AbstractListManager {
 
 if (lc == null) {
 ListData listData = 
listTables.getListData(paragraph.getList().getLsid());
+if (listData == null) {
+//silently skip
+return "";
+}
 LevelTuple[] levelTuples = new 
LevelTuple[listData.getLevels().length];
 for (int i = 0; i < listData.getLevels().length; i++) {
 levelTuples[i] = buildTuple(i, listData.getLevels()[i]);



tika git commit: TIKA-2181 upgrade to POI 3 16 beta2, make sure to upgrade overall bundle

2017-02-06 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/2.x cf3996ed0 -> 27e81b97a


TIKA-2181   upgrade to POI 3 16 beta2, make sure to upgrade overall bundle


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/27e81b97
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/27e81b97
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/27e81b97

Branch: refs/heads/2.x
Commit: 27e81b97aa8b3a555fd67ee0132058211b538a28
Parents: cf3996e
Author: tballison 
Authored: Mon Feb 6 09:30:29 2017 -0500
Committer: tballison 
Committed: Mon Feb 6 09:30:29 2017 -0500

--
 tika-bundle/pom.xml | 1 +
 1 file changed, 1 insertion(+)
--


http://git-wip-us.apache.org/repos/asf/tika/blob/27e81b97/tika-bundle/pom.xml
--
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index e8f3e83..6e9a887 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -221,6 +221,7 @@
   org.apache.xml.resolver.tools;resolution:=optional,
   org.apache.xml.security;resolution:=optional,
   org.apache.xml.security.c14n;resolution:=optional,
+  org.apache.xml.security.signature;resolution:=optional,
   org.apache.xml.security.utils;resolution:=optional,
   org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
   org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,



tika git commit: TIKA 2181 upgrade to POI 3 16 beta2

2017-02-06 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/2.x 7b0655cc1 -> cf3996ed0


TIKA 2181   upgrade to POI 3 16 beta2


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/cf3996ed
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/cf3996ed
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/cf3996ed

Branch: refs/heads/2.x
Commit: cf3996ed0784fe1fd79371cc5854be57bbdeb360
Parents: 7b0655c
Author: tballison 
Authored: Mon Feb 6 09:21:02 2017 -0500
Committer: tballison 
Committed: Mon Feb 6 09:21:02 2017 -0500

--
 CHANGES.txt   |  2 +-
 .../tika-parser-office-bundle/pom.xml |  1 +
 tika-parser-modules/pom.xml   |  2 +-
 .../tika/parser/microsoft/OutlookExtractor.java   | 18 +-
 4 files changed, 12 insertions(+), 11 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/tika/blob/cf3996ed/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 8d099b8..45e9651 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -62,7 +62,7 @@ Release 1.15 -???
 
   * Add mime detection and parser for Word 2006ML format (TIKA-2179).
 
-  * Upgrade to POI 3.16-beta1 (TIKA-2116).
+  * Upgrade to POI 3.16-beta2 (TIKA-2116, TIKA-2181).
 
   * Allow configuration of timeout for ForkParser (TIKA-2170).
 

http://git-wip-us.apache.org/repos/asf/tika/blob/cf3996ed/tika-parser-bundles/tika-parser-office-bundle/pom.xml
--
diff --git a/tika-parser-bundles/tika-parser-office-bundle/pom.xml 
b/tika-parser-bundles/tika-parser-office-bundle/pom.xml
index 1ef8b05..7acddad 100644
--- a/tika-parser-bundles/tika-parser-office-bundle/pom.xml
+++ b/tika-parser-bundles/tika-parser-office-bundle/pom.xml
@@ -111,6 +111,7 @@
   org.apache.xml.resolver.tools;resolution:=optional,
   org.apache.xml.security;resolution:=optional,
   org.apache.xml.security.c14n;resolution:=optional,
+  org.apache.xml.security.signature;resolution:=optional,
   org.apache.xml.security.utils;resolution:=optional,
   org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
   org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,

http://git-wip-us.apache.org/repos/asf/tika/blob/cf3996ed/tika-parser-modules/pom.xml
--
diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml
index cfa1109..af70259 100644
--- a/tika-parser-modules/pom.xml
+++ b/tika-parser-modules/pom.xml
@@ -35,7 +35,7 @@
   http://tika.apache.org/
   
   
-3.16-beta1
+3.16-beta2
 
 1.10
 2.0.4

http://git-wip-us.apache.org/repos/asf/tika/blob/cf3996ed/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
--
diff --git 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 17d45d2..8285149 100644
--- 
a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ 
b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -260,25 +260,25 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
 xhtml.startElement("div", "class", "attachment-entry");
 
 String filename = null;
-if (attachment.attachLongFileName != null) {
-filename = attachment.attachLongFileName.getValue();
-} else if (attachment.attachFileName != null) {
-filename = attachment.attachFileName.getValue();
+if (attachment.getAttachLongFileName() != null) {
+filename = attachment.getAttachLongFileName().getValue();
+} else if (attachment.getAttachFileName() != null) {
+filename = attachment.getAttachFileName().getValue();
 }
 if (filename != null && filename.length() > 0) {
 xhtml.element("h1", filename);
 }
 
-if (attachment.attachData != null) {
+if (attachment.getAttachData() != null) {
 handleEmbeddedResource(
-
TikaInputStream.get(attachment.attachData.getValue()),
+

tika git commit: TIKA-2181 - upgrade to POI 3.16.beta2

2017-02-06 Thread tallison
Repository: tika
Updated Branches:
  refs/heads/master 7555b136d -> 0d54f07fa


TIKA-2181 - upgrade to POI 3.16.beta2


Project: http://git-wip-us.apache.org/repos/asf/tika/repo
Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0d54f07f
Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0d54f07f
Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0d54f07f

Branch: refs/heads/master
Commit: 0d54f07facb23219a11f763e4c56cd40ec2bcd77
Parents: 7555b13
Author: tballison 
Authored: Mon Feb 6 09:20:40 2017 -0500
Committer: tballison 
Committed: Mon Feb 6 09:20:40 2017 -0500

--
 CHANGES.txt |  2 +-
 tika-bundle/pom.xml |  1 +
 tika-parsers/pom.xml|  2 +-
 .../tika/parser/microsoft/OutlookExtractor.java | 20 +---
 4 files changed, 12 insertions(+), 13 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/tika/blob/0d54f07f/CHANGES.txt
--
diff --git a/CHANGES.txt b/CHANGES.txt
index 4256c69..bfe817d 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -53,7 +53,7 @@ Release 1.15 - ??
 
   * Add mime detection and parser for Word 2006ML format (TIKA-2179).
 
-  * Upgrade to POI 3.16-beta1 (TIKA-2116).
+  * Upgrade to POI 3.16-beta2 (TIKA-2116, TIKA-2181).
 
   * Allow configuration of timeout for ForkParser (TIKA-2170).
 

http://git-wip-us.apache.org/repos/asf/tika/blob/0d54f07f/tika-bundle/pom.xml
--
diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml
index 069e26d..4e04451 100644
--- a/tika-bundle/pom.xml
+++ b/tika-bundle/pom.xml
@@ -221,6 +221,7 @@
   org.apache.xml.resolver.tools;resolution:=optional,
   org.apache.xml.security;resolution:=optional,
   org.apache.xml.security.c14n;resolution:=optional,
+  org.apache.xml.security.signature;resolution:=optional,
   org.apache.xml.security.utils;resolution:=optional,
   org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional,
   org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,

http://git-wip-us.apache.org/repos/asf/tika/blob/0d54f07f/tika-parsers/pom.xml
--
diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml
index b64e1a3..c7debb8 100644
--- a/tika-parsers/pom.xml
+++ b/tika-parsers/pom.xml
@@ -35,7 +35,7 @@
   http://tika.apache.org/
 
   
-3.16-beta1
+3.16-beta2
 
 1.10
 

http://git-wip-us.apache.org/repos/asf/tika/blob/0d54f07f/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
--
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
index 76ac17f..9818d30 100644
--- 
a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java
@@ -36,8 +36,6 @@ import java.util.regex.Pattern;
 
 import org.apache.james.mime4j.codec.DecodeMonitor;
 import org.apache.james.mime4j.codec.DecoderUtil;
-import org.apache.james.mime4j.dom.field.ParsedField;
-import org.apache.james.mime4j.field.LenientFieldParser;
 import org.apache.poi.hmef.attribute.MAPIRtfAttribute;
 import org.apache.poi.hsmf.MAPIMessage;
 import org.apache.poi.hsmf.datatypes.AttachmentChunks;
@@ -256,25 +254,25 @@ public class OutlookExtractor extends 
AbstractPOIFSExtractor {
 xhtml.startElement("div", "class", "attachment-entry");
 
 String filename = null;
-if (attachment.attachLongFileName != null) {
-filename = attachment.attachLongFileName.getValue();
-} else if (attachment.attachFileName != null) {
-filename = attachment.attachFileName.getValue();
+if (attachment.getAttachLongFileName() != null) {
+filename = attachment.getAttachLongFileName().getValue();
+} else if (attachment.getAttachFileName() != null) {
+filename = attachment.getAttachFileName().getValue();
 }
 if (filename != null && filename.length() > 0) {
 xhtml.element("h1", filename);
 }
 
-if (attachment.attachData != null) {
+if (attachment.getAttachData() != null) {
 handleEmbeddedResource(
-
TikaInputStream.get(attachment.attachData.getValue()),
+