This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch TIKA-4715 in repository https://gitbox.apache.org/repos/asf/tika.git
commit f35fb48676343d3d096bef5ff2029e473862cfc9 Merge: 4a28a969fc 4cf115c0e2 Author: tallison <[email protected]> AuthorDate: Thu Apr 9 21:04:46 2026 -0400 Merge remote-tracking branch 'origin/main' into TIKA-4715 # Conflicts: # CHANGES.txt # tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/java/org/apache/tika/parser/AutoDetectParserTest.java .github/workflows/main-jdk17-build.yml | 2 + CHANGES.txt | 3 + docs/build-docs.sh | 53 ++ .../advanced/flores-eval-20260320.txt | 0 .../ROOT/examples/external-parser-exiftool.json | 1 + .../ROOT/examples/external-parser-ffmpeg.json | 1 + .../ROOT/examples/external-parser-multi.json | 1 + .../modules/ROOT/examples/external-parser-sox.json | 1 + docs/modules/ROOT/nav.adoc | 1 + .../pages/advanced/generative-language-model.adoc | 4 +- .../advanced/integration-testing/tika-server.adoc | 4 +- .../pages/advanced/language-detection-build.adoc | 2 +- .../ROOT/pages/advanced/language-detection.adoc | 5 +- .../configuration/parsers/external-parser.adoc | 176 +++++++ docs/modules/ROOT/pages/developers/index.adoc | 2 +- docs/modules/ROOT/pages/index.adoc | 4 + docs/modules/ROOT/pages/maintainers/site.adoc | 52 +- .../pages/migration-to-4x/migrating-to-4x.adoc | 72 ++- docs/modules/ROOT/pages/pipes/unpack-config.adoc | 2 +- docs/modules/ROOT/pages/security.adoc | 18 +- .../services/org.apache.tika.parser.Parser | 16 - .../apache/tika/detect/FileCommandDetector.java | 3 +- .../org/apache/tika/embedder/ExternalEmbedder.java | 13 +- .../parser/external/CompositeExternalParser.java | 44 -- .../tika/parser/external/ExternalParser.java | 581 +++++++-------------- .../ExternalParserConfig.java | 69 ++- .../external/ExternalParsersConfigReader.java | 223 -------- .../ExternalParsersConfigReaderMetKeys.java | 43 -- .../parser/external/ExternalParsersFactory.java | 67 --- .../apache/tika/parser/external/package-info.java | 22 - .../tika/parser/external2/ExternalParser.java | 227 -------- .../java/org/apache/tika/utils/ProcessUtils.java | 74 +++ .../java/org/apache/tika/utils/StreamGobbler.java | 52 +- .../tika/parser/external/tika-external-parsers.xml | 117 ----- .../tika/detect/siegfried/SiegfriedDetector.java | 3 +- tika-parent/pom.xml | 2 +- .../org/apache/tika/parser/gdal/GDALParser.java | 5 +- .../apache/tika/parser/gdal/TestGDALParser.java | 6 +- .../parser/scientific/integration/TestParsers.java | 11 - .../apache/tika/parser/AutoDetectParserTest.java | 8 - .../org/apache/tika/parser/pdf/PDFParserTest.java | 4 +- .../apache/tika/parser/pkg/UnrarParserTest.java | 4 +- .../apache/tika/parser/dwg/DWGParserConfig.java | 4 +- .../org/apache/tika/parser/dwg/DWGParserTest.java | 4 +- .../apache/tika/parser/image/JpegParserTest.java | 20 + .../apache/tika/parser/ocr/TesseractOCRParser.java | 6 +- .../renderer/pdf/poppler/PopplerRendererTest.java | 4 +- .../apache/tika/parser/pkg/UnrarParserTest.java | 4 +- .../apache/tika/parser/strings/StringsParser.java | 6 +- .../tika/parser/strings/StringsParserTest.java | 4 +- .../tika/parser/external/ExternalParserTest.java | 195 +++++++ .../tika/parser/external2/ExternalParserTest.java | 100 ---- .../configs/TIKA-3557-exiftool-example.json | 6 +- .../src/test/resources/configs/TIKA-3557.json | 2 +- ...-example.json => external-parser-exiftool.json} | 15 +- .../resources/configs/external-parser-ffmpeg.json | 35 ++ .../resources/configs/external-parser-multi.json | 47 ++ .../resources/configs/external-parser-sox.json | 37 ++ .../services/org.apache.tika.parser.Parser | 16 - .../tika/server/standard/TikaParsersTest.java | 4 +- 60 files changed, 1148 insertions(+), 1359 deletions(-) diff --cc CHANGES.txt index b3fe46bf53,ada77669ba..dc20edeca4 --- a/CHANGES.txt +++ b/CHANGES.txt @@@ -29,9 -29,9 +29,12 @@@ Release 4.0.0-BETA1 - ?? * Removed DigestingParser (TIKA-4607). + * tika-parsers-standard-package is now a pom, not a jar. + Users must add <type>pom</type> in Maven or @pom in Gradle (TIKA-4712). + + * Removed legacy ExternalParser; external parsers now require explicit + JSON configuration (TIKA-4707). + OTHER CHANGES * Fix concurrency bug in TikaToXMP (TIKA-4393)
