tika git commit: TIKA-2246 and TIKA-2247 -add parsers for EMF and WMF
Repository: tika Updated Branches: refs/heads/2.x d9f376c12 -> 6bfe5d565 TIKA-2246 and TIKA-2247 -add parsers for EMF and WMF Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/6bfe5d56 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/6bfe5d56 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/6bfe5d56 Branch: refs/heads/2.x Commit: 6bfe5d565bd3fbf55a538c39047294814cae0767 Parents: d9f376c Author: tballisonAuthored: Mon Feb 6 14:31:52 2017 -0500 Committer: tballison Committed: Mon Feb 6 14:31:52 2017 -0500 -- CHANGES.txt | 2 + .../org/apache/tika/module/office/BundleIT.java | 2 +- .../apache/tika/parser/microsoft/EMFParser.java | 163 +++ .../apache/tika/parser/microsoft/WMFParser.java | 98 +++ .../services/org.apache.tika.parser.Parser | 2 + .../tika/parser/microsoft/EMFParserTest.java| 68 .../tika/parser/microsoft/WMFParserTest.java| 42 + .../apache/tika/parser/rtf/RTFParserTest.java | 40 ++--- .../testEXCEL_embeddedPDF_mac.xls | Bin 0 -> 69632 bytes .../testEXCEL_embeddedPDF_mac.xlsx | Bin 0 -> 80578 bytes .../testEXCEL_embeddedPDF_windows.xls | Bin 0 -> 61952 bytes .../testEXCEL_embeddedPDF_windows.xlsx | Bin 0 -> 49843 bytes 12 files changed, 396 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tika/blob/6bfe5d56/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index 45e9651..72fc96c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -17,6 +17,8 @@ Release 2.0 - ??? Release 1.15 -??? + * Add parsers for EMF/WMF files (TIKA-2246/TIKA-2247). + * Official mime types for BMP, EMF and WMF have been registered with IANA, so switch to these (image/bmp image/emf image/wmf) (TIKA-2250) http://git-wip-us.apache.org/repos/asf/tika/blob/6bfe5d56/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java -- diff --git a/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java b/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java index 43d55dc..943589b 100644 --- a/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java +++ b/tika-parser-bundles/tika-parser-office-bundle/src/test/java/org/apache/tika/module/office/BundleIT.java @@ -80,6 +80,6 @@ public class BundleIT { @Test public void testServicesCreated() throws Exception { ServiceReference[] services = bc.getAllServiceReferences(Parser.class.getName(), null); -assertEquals("Not all Services have started", 29, services.length); +assertEquals("Not all Services have started", 31, services.length); } } http://git-wip-us.apache.org/repos/asf/tika/blob/6bfe5d56/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java -- diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java new file mode 100644 index 000..be4bc14 --- /dev/null +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.parser.microsoft; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.Set; + +import org.apache.poi.hemf.extractor.HemfExtractor; +import org.apache.poi.hemf.record.AbstractHemfComment; +import
tika git commit: TIKA-2247 and TIKA-2246 -- add parsers for EMF/WMF
Repository: tika Updated Branches: refs/heads/master 27e026eff -> b9befb427 TIKA-2247 and TIKA-2246 -- add parsers for EMF/WMF Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/b9befb42 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/b9befb42 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/b9befb42 Branch: refs/heads/master Commit: b9befb4272cf8b2bda3b3ea25b0511bbabfdeded Parents: 27e026e Author: tballisonAuthored: Mon Feb 6 14:31:09 2017 -0500 Committer: tballison Committed: Mon Feb 6 14:31:09 2017 -0500 -- CHANGES.txt | 2 + .../apache/tika/parser/microsoft/EMFParser.java | 163 +++ .../apache/tika/parser/microsoft/WMFParser.java | 98 +++ .../services/org.apache.tika.parser.Parser | 4 +- .../tika/parser/microsoft/EMFParserTest.java| 66 .../tika/parser/microsoft/WMFParserTest.java| 42 + .../apache/tika/parser/rtf/RTFParserTest.java | 40 ++--- .../testEXCEL_embeddedPDF_mac.xls | Bin 0 -> 69632 bytes .../testEXCEL_embeddedPDF_mac.xlsx | Bin 0 -> 80578 bytes .../testEXCEL_embeddedPDF_windows.xls | Bin 0 -> 61952 bytes .../testEXCEL_embeddedPDF_windows.xlsx | Bin 0 -> 49843 bytes 11 files changed, 394 insertions(+), 21 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tika/blob/b9befb42/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index bfe817d..b8e2dec 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,7 @@ Release 1.15 - ?? + * Add parsers for EMF/WMF files (TIKA-2246/TIKA-2247). + * Official mime types for BMP, EMF and WMF have been registered with IANA, so switch to these (image/bmp image/emf image/wmf) (TIKA-2250) http://git-wip-us.apache.org/repos/asf/tika/blob/b9befb42/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java -- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java new file mode 100644 index 000..be4bc14 --- /dev/null +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/EMFParser.java @@ -0,0 +1,163 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.tika.parser.microsoft; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Collections; +import java.util.Set; + +import org.apache.poi.hemf.extractor.HemfExtractor; +import org.apache.poi.hemf.record.AbstractHemfComment; +import org.apache.poi.hemf.record.HemfCommentPublic; +import org.apache.poi.hemf.record.HemfCommentRecord; +import org.apache.poi.hemf.record.HemfRecord; +import org.apache.poi.hemf.record.HemfRecordType; +import org.apache.poi.hemf.record.HemfText; +import org.apache.poi.util.RecordFormatException; +import org.apache.tika.exception.TikaException; +import org.apache.tika.extractor.EmbeddedDocumentExtractor; +import org.apache.tika.extractor.EmbeddedDocumentUtil; +import org.apache.tika.io.TikaInputStream; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.mime.MediaType; +import org.apache.tika.parser.AbstractParser; +import org.apache.tika.parser.ParseContext; +import org.apache.tika.sax.EmbeddedContentHandler; +import org.apache.tika.sax.XHTMLContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +/** + * Extracts files embedded in EMF and offers a + * very rough capability to extract text if there + * is text stored in the EMF. + * + * To improve text extraction, we'd have to implement + * quite a bit more at the POI level. We'd want to track changes + * in font and use that information for identifying character sets, + * inserting spaces and new lines. + */ +public class EMFParser extends AbstractParser { + +private static final MediaType
tika git commit: TIKA-2134 - remove npe catch after upgrade to POI 3.16.beta2
Repository: tika Updated Branches: refs/heads/2.x 0d7f5bad0 -> d9f376c12 TIKA-2134 - remove npe catch after upgrade to POI 3.16.beta2 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/d9f376c1 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/d9f376c1 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/d9f376c1 Branch: refs/heads/2.x Commit: d9f376c12fe1c0c56c96c866ba62fff3e6ebca2f Parents: 0d7f5ba Author: tballisonAuthored: Mon Feb 6 10:35:35 2017 -0500 Committer: tballison Committed: Mon Feb 6 10:35:35 2017 -0500 -- .../parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tika/blob/d9f376c1/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java -- diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java index 45a6a84..f3d7377 100644 --- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java @@ -159,12 +159,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { for (String footer : sheetExtractor.footers) { extractHeaderFooter(footer, xhtml); } -List shapes = null; -try { -shapes = iter.getShapes(); -} catch (NullPointerException e) { -//missing shape -} +List shapes = iter.getShapes(); processShapes(shapes, xhtml); //for now dump sheet hyperlinks at bottom of page
tika git commit: TIKA-2134 -- remove npe catch after upgrade to POI 3.16.beta2
Repository: tika Updated Branches: refs/heads/master bc3b26369 -> 27e026eff TIKA-2134 -- remove npe catch after upgrade to POI 3.16.beta2 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/27e026ef Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/27e026ef Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/27e026ef Branch: refs/heads/master Commit: 27e026eff589f5829ddb4374de0d85daf37a9d2a Parents: bc3b263 Author: tballisonAuthored: Mon Feb 6 10:34:25 2017 -0500 Committer: tballison Committed: Mon Feb 6 10:34:25 2017 -0500 -- .../parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java | 7 +-- 1 file changed, 1 insertion(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tika/blob/27e026ef/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java -- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java index 45a6a84..f3d7377 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSSFExcelExtractorDecorator.java @@ -159,12 +159,7 @@ public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor { for (String footer : sheetExtractor.footers) { extractHeaderFooter(footer, xhtml); } -List shapes = null; -try { -shapes = iter.getShapes(); -} catch (NullPointerException e) { -//missing shape -} +List shapes = iter.getShapes(); processShapes(shapes, xhtml); //for now dump sheet hyperlinks at bottom of page
tika git commit: TIKA-2198 - add null check to Tika after upgrade to POI 3.16.beta2
Repository: tika Updated Branches: refs/heads/master 0d54f07fa -> bc3b26369 TIKA-2198 - add null check to Tika after upgrade to POI 3.16.beta2 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/bc3b2636 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/bc3b2636 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/bc3b2636 Branch: refs/heads/master Commit: bc3b26369ffbeff83a6e27f57a564089661e5030 Parents: 0d54f07 Author: tballisonAuthored: Mon Feb 6 10:27:41 2017 -0500 Committer: tballison Committed: Mon Feb 6 10:27:41 2017 -0500 -- .../main/java/org/apache/tika/parser/microsoft/ListManager.java | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/tika/blob/bc3b2636/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java -- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java index a0f19e1..504573c 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ListManager.java @@ -81,6 +81,10 @@ public class ListManager extends AbstractListManager { if (lc == null) { ListData listData = listTables.getListData(paragraph.getList().getLsid()); +if (listData == null) { +//silently skip +return ""; +} LevelTuple[] levelTuples = new LevelTuple[listData.getLevels().length]; for (int i = 0; i < listData.getLevels().length; i++) { levelTuples[i] = buildTuple(i, listData.getLevels()[i]);
tika git commit: TIKA-2198 - add null check to Tika after upgrade to POI 3.16-beta2
Repository: tika Updated Branches: refs/heads/2.x 27e81b97a -> 0d7f5bad0 TIKA-2198 - add null check to Tika after upgrade to POI 3.16-beta2 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0d7f5bad Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0d7f5bad Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0d7f5bad Branch: refs/heads/2.x Commit: 0d7f5bad0ff61b14a1fad7816e109e1190b17bab Parents: 27e81b9 Author: tballisonAuthored: Mon Feb 6 10:29:37 2017 -0500 Committer: tballison Committed: Mon Feb 6 10:29:37 2017 -0500 -- .../main/java/org/apache/tika/parser/microsoft/ListManager.java | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/tika/blob/0d7f5bad/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java -- diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java index a0f19e1..504573c 100644 --- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/ListManager.java @@ -81,6 +81,10 @@ public class ListManager extends AbstractListManager { if (lc == null) { ListData listData = listTables.getListData(paragraph.getList().getLsid()); +if (listData == null) { +//silently skip +return ""; +} LevelTuple[] levelTuples = new LevelTuple[listData.getLevels().length]; for (int i = 0; i < listData.getLevels().length; i++) { levelTuples[i] = buildTuple(i, listData.getLevels()[i]);
tika git commit: TIKA-2181 upgrade to POI 3 16 beta2, make sure to upgrade overall bundle
Repository: tika Updated Branches: refs/heads/2.x cf3996ed0 -> 27e81b97a TIKA-2181 upgrade to POI 3 16 beta2, make sure to upgrade overall bundle Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/27e81b97 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/27e81b97 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/27e81b97 Branch: refs/heads/2.x Commit: 27e81b97aa8b3a555fd67ee0132058211b538a28 Parents: cf3996e Author: tballisonAuthored: Mon Feb 6 09:30:29 2017 -0500 Committer: tballison Committed: Mon Feb 6 09:30:29 2017 -0500 -- tika-bundle/pom.xml | 1 + 1 file changed, 1 insertion(+) -- http://git-wip-us.apache.org/repos/asf/tika/blob/27e81b97/tika-bundle/pom.xml -- diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml index e8f3e83..6e9a887 100644 --- a/tika-bundle/pom.xml +++ b/tika-bundle/pom.xml @@ -221,6 +221,7 @@ org.apache.xml.resolver.tools;resolution:=optional, org.apache.xml.security;resolution:=optional, org.apache.xml.security.c14n;resolution:=optional, + org.apache.xml.security.signature;resolution:=optional, org.apache.xml.security.utils;resolution:=optional, org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional, org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional,
tika git commit: TIKA 2181 upgrade to POI 3 16 beta2
Repository: tika Updated Branches: refs/heads/2.x 7b0655cc1 -> cf3996ed0 TIKA 2181 upgrade to POI 3 16 beta2 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/cf3996ed Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/cf3996ed Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/cf3996ed Branch: refs/heads/2.x Commit: cf3996ed0784fe1fd79371cc5854be57bbdeb360 Parents: 7b0655c Author: tballisonAuthored: Mon Feb 6 09:21:02 2017 -0500 Committer: tballison Committed: Mon Feb 6 09:21:02 2017 -0500 -- CHANGES.txt | 2 +- .../tika-parser-office-bundle/pom.xml | 1 + tika-parser-modules/pom.xml | 2 +- .../tika/parser/microsoft/OutlookExtractor.java | 18 +- 4 files changed, 12 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tika/blob/cf3996ed/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index 8d099b8..45e9651 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -62,7 +62,7 @@ Release 1.15 -??? * Add mime detection and parser for Word 2006ML format (TIKA-2179). - * Upgrade to POI 3.16-beta1 (TIKA-2116). + * Upgrade to POI 3.16-beta2 (TIKA-2116, TIKA-2181). * Allow configuration of timeout for ForkParser (TIKA-2170). http://git-wip-us.apache.org/repos/asf/tika/blob/cf3996ed/tika-parser-bundles/tika-parser-office-bundle/pom.xml -- diff --git a/tika-parser-bundles/tika-parser-office-bundle/pom.xml b/tika-parser-bundles/tika-parser-office-bundle/pom.xml index 1ef8b05..7acddad 100644 --- a/tika-parser-bundles/tika-parser-office-bundle/pom.xml +++ b/tika-parser-bundles/tika-parser-office-bundle/pom.xml @@ -111,6 +111,7 @@ org.apache.xml.resolver.tools;resolution:=optional, org.apache.xml.security;resolution:=optional, org.apache.xml.security.c14n;resolution:=optional, + org.apache.xml.security.signature;resolution:=optional, org.apache.xml.security.utils;resolution:=optional, org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional, org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional, http://git-wip-us.apache.org/repos/asf/tika/blob/cf3996ed/tika-parser-modules/pom.xml -- diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml index cfa1109..af70259 100644 --- a/tika-parser-modules/pom.xml +++ b/tika-parser-modules/pom.xml @@ -35,7 +35,7 @@ http://tika.apache.org/ -3.16-beta1 +3.16-beta2 1.10 2.0.4 http://git-wip-us.apache.org/repos/asf/tika/blob/cf3996ed/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java -- diff --git a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java index 17d45d2..8285149 100644 --- a/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java +++ b/tika-parser-modules/tika-parser-office-module/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java @@ -260,25 +260,25 @@ public class OutlookExtractor extends AbstractPOIFSExtractor { xhtml.startElement("div", "class", "attachment-entry"); String filename = null; -if (attachment.attachLongFileName != null) { -filename = attachment.attachLongFileName.getValue(); -} else if (attachment.attachFileName != null) { -filename = attachment.attachFileName.getValue(); +if (attachment.getAttachLongFileName() != null) { +filename = attachment.getAttachLongFileName().getValue(); +} else if (attachment.getAttachFileName() != null) { +filename = attachment.getAttachFileName().getValue(); } if (filename != null && filename.length() > 0) { xhtml.element("h1", filename); } -if (attachment.attachData != null) { +if (attachment.getAttachData() != null) { handleEmbeddedResource( - TikaInputStream.get(attachment.attachData.getValue()), +
tika git commit: TIKA-2181 - upgrade to POI 3.16.beta2
Repository: tika Updated Branches: refs/heads/master 7555b136d -> 0d54f07fa TIKA-2181 - upgrade to POI 3.16.beta2 Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/0d54f07f Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/0d54f07f Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/0d54f07f Branch: refs/heads/master Commit: 0d54f07facb23219a11f763e4c56cd40ec2bcd77 Parents: 7555b13 Author: tballisonAuthored: Mon Feb 6 09:20:40 2017 -0500 Committer: tballison Committed: Mon Feb 6 09:20:40 2017 -0500 -- CHANGES.txt | 2 +- tika-bundle/pom.xml | 1 + tika-parsers/pom.xml| 2 +- .../tika/parser/microsoft/OutlookExtractor.java | 20 +--- 4 files changed, 12 insertions(+), 13 deletions(-) -- http://git-wip-us.apache.org/repos/asf/tika/blob/0d54f07f/CHANGES.txt -- diff --git a/CHANGES.txt b/CHANGES.txt index 4256c69..bfe817d 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -53,7 +53,7 @@ Release 1.15 - ?? * Add mime detection and parser for Word 2006ML format (TIKA-2179). - * Upgrade to POI 3.16-beta1 (TIKA-2116). + * Upgrade to POI 3.16-beta2 (TIKA-2116, TIKA-2181). * Allow configuration of timeout for ForkParser (TIKA-2170). http://git-wip-us.apache.org/repos/asf/tika/blob/0d54f07f/tika-bundle/pom.xml -- diff --git a/tika-bundle/pom.xml b/tika-bundle/pom.xml index 069e26d..4e04451 100644 --- a/tika-bundle/pom.xml +++ b/tika-bundle/pom.xml @@ -221,6 +221,7 @@ org.apache.xml.resolver.tools;resolution:=optional, org.apache.xml.security;resolution:=optional, org.apache.xml.security.c14n;resolution:=optional, + org.apache.xml.security.signature;resolution:=optional, org.apache.xml.security.utils;resolution:=optional, org.apache.xmlbeans.impl.xpath.saxon;resolution:=optional, org.apache.xmlbeans.impl.xquery.saxon;resolution:=optional, http://git-wip-us.apache.org/repos/asf/tika/blob/0d54f07f/tika-parsers/pom.xml -- diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml index b64e1a3..c7debb8 100644 --- a/tika-parsers/pom.xml +++ b/tika-parsers/pom.xml @@ -35,7 +35,7 @@ http://tika.apache.org/ -3.16-beta1 +3.16-beta2 1.10 http://git-wip-us.apache.org/repos/asf/tika/blob/0d54f07f/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java -- diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java index 76ac17f..9818d30 100644 --- a/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java +++ b/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OutlookExtractor.java @@ -36,8 +36,6 @@ import java.util.regex.Pattern; import org.apache.james.mime4j.codec.DecodeMonitor; import org.apache.james.mime4j.codec.DecoderUtil; -import org.apache.james.mime4j.dom.field.ParsedField; -import org.apache.james.mime4j.field.LenientFieldParser; import org.apache.poi.hmef.attribute.MAPIRtfAttribute; import org.apache.poi.hsmf.MAPIMessage; import org.apache.poi.hsmf.datatypes.AttachmentChunks; @@ -256,25 +254,25 @@ public class OutlookExtractor extends AbstractPOIFSExtractor { xhtml.startElement("div", "class", "attachment-entry"); String filename = null; -if (attachment.attachLongFileName != null) { -filename = attachment.attachLongFileName.getValue(); -} else if (attachment.attachFileName != null) { -filename = attachment.attachFileName.getValue(); +if (attachment.getAttachLongFileName() != null) { +filename = attachment.getAttachLongFileName().getValue(); +} else if (attachment.getAttachFileName() != null) { +filename = attachment.getAttachFileName().getValue(); } if (filename != null && filename.length() > 0) { xhtml.element("h1", filename); } -if (attachment.attachData != null) { +if (attachment.getAttachData() != null) { handleEmbeddedResource( - TikaInputStream.get(attachment.attachData.getValue()), +