This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_3x-various-improvements in repository https://gitbox.apache.org/repos/asf/tika.git
commit 0a29fb4e7594ec51bff2eb96deb16be5b23497c7 Author: tallison <[email protected]> AuthorDate: Tue Mar 17 08:42:46 2026 -0400 TIKA-4563 -- various fixes based on regression testing - further updates --- CHANGES.txt | 2 ++ tika-bundles/tika-bundle-standard/pom.xml | 2 +- tika-eval/tika-eval-app/pom.xml | 4 ++++ tika-parent/pom.xml | 11 +++++++++++ .../tika-parser-microsoft-module/pom.xml | 4 ++++ .../main/java/org/apache/tika/parser/pkg/PackageParser.java | 3 +++ 6 files changed, 25 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 5def269d65..cc749a0b13 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,5 +1,7 @@ Release 3.3.0 - ??? + * Switch to poi-ooxml-full (TIKA-4563). + * Users need to add "allowAbsolutePaths=true" for the FileSystemFetcher to fetch an absolute path (TIKA-4649). diff --git a/tika-bundles/tika-bundle-standard/pom.xml b/tika-bundles/tika-bundle-standard/pom.xml index a771b4fe03..dc586b1e9f 100644 --- a/tika-bundles/tika-bundle-standard/pom.xml +++ b/tika-bundles/tika-bundle-standard/pom.xml @@ -167,7 +167,7 @@ poi| poi-scratchpad| poi-ooxml| - poi-ooxml-lite| + poi-ooxml-full| commons-math3| curvesapi| xmlbeans| diff --git a/tika-eval/tika-eval-app/pom.xml b/tika-eval/tika-eval-app/pom.xml index bfa683c383..0c10d52232 100644 --- a/tika-eval/tika-eval-app/pom.xml +++ b/tika-eval/tika-eval-app/pom.xml @@ -61,6 +61,10 @@ <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> </dependency> + <dependency> + <groupId>org.apache.poi</groupId> + <artifactId>poi-ooxml-full</artifactId> + </dependency> <dependency> <groupId>org.apache.tika</groupId> <artifactId>tika-batch</artifactId> diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml index 66e54ccedb..23618e7059 100644 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@ -904,6 +904,17 @@ <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>${poi.version}</version> + <exclusions> + <exclusion> + <groupId>org.apache.poi</groupId> + <artifactId>poi-ooxml-lite</artifactId> + </exclusion> + </exclusions> + </dependency> + <dependency> + <groupId>org.apache.poi</groupId> + <artifactId>poi-ooxml-full</artifactId> + <version>${poi.version}</version> </dependency> <dependency> <groupId>org.apache.xmlbeans</groupId> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/pom.xml b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/pom.xml index bfc4f24fa6..a1443b50ff 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/pom.xml +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/pom.xml @@ -88,6 +88,10 @@ <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> </dependency> + <dependency> + <groupId>org.apache.poi</groupId> + <artifactId>poi-ooxml-full</artifactId> + </dependency> <!-- needed by jackcess --> <dependency> <groupId>commons-logging</groupId> diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/PackageParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/PackageParser.java index 25b4fa1ccc..f596828fa8 100644 --- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/PackageParser.java +++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/PackageParser.java @@ -444,6 +444,9 @@ public class PackageParser extends AbstractEncodingDetectorParser { extractor.parseEmbedded(entryStream, xhtml, entryMetadata, true); } catch (UnsupportedZipFeatureException e) { EmbeddedDocumentUtil.recordEmbeddedStreamException(e, parentMetadata); + if (name != null && name.length() > 0) { + xhtml.element("p", name); + } } } }
