This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 3101a9e6fd76124ccc204ee3d74e00ee1b5957b8 Merge: f8577a0d7 713835b73 Author: Sebastian Nagel <[email protected]> AuthorDate: Tue Feb 3 21:12:07 2026 +0100 Merge pull request #887 from lewismc/NUTCH-3110 NUTCH-3110 Upgrade to Tika 3.2.3 ivy/ivy.xml | 2 +- src/plugin/language-identifier/ivy.xml | 2 +- src/plugin/language-identifier/plugin.xml | 13 +- src/plugin/parse-js/plugin.xml | 4 +- src/plugin/parse-tika/howto_upgrade_tika.md | 37 ++-- src/plugin/parse-tika/ivy.xml | 10 +- src/plugin/parse-tika/plugin.xml | 92 ++++++++- .../apache/nutch/parse/tika/DOMContentUtils.java | 54 ++++- .../org/apache/nutch/parse/tika/TikaParser.java | 58 +++--- .../nutch/parse/tika/TestBoilerpipeExtraction.java | 112 +++++++++++ .../nutch/parse/tika/TestEncodingDetection.java | 193 ++++++++++++++++++ .../apache/nutch/parse/tika/TestHtmlParser.java | 4 +- .../parse/tika/TestLinkExtractionEdgeCases.java | 221 ++++++++++++++++++++ .../nutch/parse/tika/TestMetadataExtraction.java | 223 +++++++++++++++++++++ .../parse/tika/TestParserFailureHandling.java | 221 ++++++++++++++++++++ 15 files changed, 1171 insertions(+), 75 deletions(-)
