This is an automated email from the ASF dual-hosted git repository.

tballison pushed a commit to branch TIKA-4723
in repository https://gitbox.apache.org/repos/asf/tika.git

commit d32e187fec63406fe451fcf72070ce739e938391
Merge: 08365d977e 0b6583091b
Author: tallison <[email protected]>
AuthorDate: Tue May 12 11:19:03 2026 -0400

    Merge remote-tracking branch 'origin/main' into TIKA-4723

 .github/workflows/docker-release.yml               | 169 ++++++++++--
 .github/workflows/docker-snapshot.yml              |   4 +-
 docs/.gitignore                                    |   3 +
 docs/build-docs.sh                                 |  53 ----
 .../ROOT/examples/migration-full-example.json      |   2 +-
 docs/modules/ROOT/examples/pdf-parser-basic.json   |   2 +-
 docs/modules/ROOT/examples/pdf-parser-full.json    |   2 +-
 .../ROOT/examples/pipes-atlassian-jwt-fetcher.json |   1 +
 .../ROOT/examples/pipes-azblob-emitter.json        |   1 +
 .../ROOT/examples/pipes-azblob-fetcher.json        |   1 +
 .../ROOT/examples/pipes-azblob-iterator.json       |   1 +
 .../ROOT/examples/pipes-azblob-pipeline.json       |   1 +
 .../ROOT/examples/pipes-config-template.json       |   1 +
 docs/modules/ROOT/examples/pipes-csv-iterator.json |   1 +
 .../ROOT/examples/pipes-elasticsearch-emitter.json |   1 +
 .../examples/pipes-elasticsearch-pipeline.json     |   1 +
 .../examples/pipes-elasticsearch-reporter.json     |   1 +
 docs/modules/ROOT/examples/pipes-emit-all.json     |   1 +
 docs/modules/ROOT/examples/pipes-fs-emitter.json   |   1 -
 docs/modules/ROOT/examples/pipes-fs-fetcher.json   |   1 -
 docs/modules/ROOT/examples/pipes-fs-pipeline.json  |   2 +-
 docs/modules/ROOT/examples/pipes-gcs-emitter.json  |   1 +
 docs/modules/ROOT/examples/pipes-gcs-fetcher.json  |   1 +
 docs/modules/ROOT/examples/pipes-gcs-iterator.json |   1 +
 docs/modules/ROOT/examples/pipes-gcs-pipeline.json |   1 +
 .../ROOT/examples/pipes-google-drive-fetcher.json  |   1 +
 docs/modules/ROOT/examples/pipes-http-fetcher.json |   1 +
 docs/modules/ROOT/examples/pipes-jdbc-emitter.json |   1 +
 .../modules/ROOT/examples/pipes-jdbc-iterator.json |   1 +
 .../modules/ROOT/examples/pipes-jdbc-pipeline.json |   1 +
 .../modules/ROOT/examples/pipes-jdbc-reporter.json |   1 +
 .../modules/ROOT/examples/pipes-json-iterator.json |   1 +
 .../modules/ROOT/examples/pipes-kafka-emitter.json |   1 +
 .../ROOT/examples/pipes-kafka-iterator.json        |   1 +
 .../ROOT/examples/pipes-kafka-pipeline.json        |   1 +
 .../examples/pipes-microsoft-graph-fetcher.json    |   1 +
 .../ROOT/examples/pipes-opensearch-emitter.json    |   1 +
 .../ROOT/examples/pipes-opensearch-pipeline.json   |   1 +
 .../ROOT/examples/pipes-opensearch-reporter.json   |   1 +
 docs/modules/ROOT/examples/pipes-s3-emitter.json   |   1 +
 docs/modules/ROOT/examples/pipes-s3-fetcher.json   |   1 +
 docs/modules/ROOT/examples/pipes-s3-iterator.json  |   1 +
 docs/modules/ROOT/examples/pipes-s3-pipeline.json  |   1 +
 .../modules/ROOT/examples/pipes-shared-server.json |   1 +
 .../ROOT/examples/pipes-solr-emitter-zk.json       |   1 +
 docs/modules/ROOT/examples/pipes-solr-emitter.json |   1 +
 .../modules/ROOT/examples/pipes-solr-iterator.json |   1 +
 .../modules/ROOT/examples/pipes-solr-pipeline.json |   1 +
 docs/modules/ROOT/examples/tesseract-basic.json    |   2 +-
 docs/modules/ROOT/examples/tesseract-full.json     |   2 +-
 docs/modules/ROOT/nav.adoc                         |  18 +-
 .../pages/advanced/charset-detection-design.adoc   |   2 +-
 .../integration-testing/run-uat-script.adoc        | 124 +++++++++
 .../ROOT/pages/advanced/junk-detection-build.adoc  |  16 +-
 .../ROOT/pages/advanced/language-detection.adoc    |  19 --
 .../pages/configuration/encoding-detectors.adoc    | 183 +++++++------
 .../configuration/parsers/external-parser.adoc     |   8 +-
 .../pages/maintainers/release-guides/docker.adoc   | 299 +++++++++++++++++----
 .../pages/maintainers/release-guides/tika.adoc     | 168 +++++++++++-
 docs/modules/ROOT/pages/maintainers/site.adoc      |  36 +--
 .../pages/migration-to-4x/design-notes-4x.adoc     |   2 +-
 docs/modules/ROOT/pages/migration-to-4x/index.adoc |   2 +
 docs/modules/ROOT/pages/pipes/configuration.adoc   |  53 +++-
 docs/modules/ROOT/pages/pipes/cpu-sizing.adoc      |  33 +++
 docs/modules/ROOT/pages/pipes/emitters.adoc        | 245 +++++------------
 docs/modules/ROOT/pages/pipes/fetchers.adoc        | 264 ++++--------------
 docs/modules/ROOT/pages/pipes/getting-started.adoc |   4 +-
 docs/modules/ROOT/pages/pipes/index.adoc           |   2 +-
 docs/modules/ROOT/pages/pipes/iterators.adoc       | 230 ++++------------
 docs/modules/ROOT/pages/pipes/parse-modes.adoc     | 143 +++++++---
 .../ROOT/pages/pipes/plugins/atlassian-jwt.adoc    | 121 +++++++++
 docs/modules/ROOT/pages/pipes/plugins/azblob.adoc  | 185 +++++++++++++
 docs/modules/ROOT/pages/pipes/plugins/csv.adoc     |  75 ++++++
 .../ROOT/pages/pipes/plugins/elasticsearch.adoc    | 196 ++++++++++++++
 .../ROOT/pages/pipes/plugins/filesystem.adoc       | 255 ++++++++++++++++++
 docs/modules/ROOT/pages/pipes/plugins/gcs.adoc     | 166 ++++++++++++
 .../ROOT/pages/pipes/plugins/google-drive.adoc     |  79 ++++++
 docs/modules/ROOT/pages/pipes/plugins/http.adoc    | 132 +++++++++
 docs/modules/ROOT/pages/pipes/plugins/index.adoc   | 133 +++++++++
 docs/modules/ROOT/pages/pipes/plugins/jdbc.adoc    | 241 +++++++++++++++++
 docs/modules/ROOT/pages/pipes/plugins/json.adoc    |  63 +++++
 docs/modules/ROOT/pages/pipes/plugins/kafka.adoc   | 213 +++++++++++++++
 .../ROOT/pages/pipes/plugins/microsoft-graph.adoc  |  85 ++++++
 .../ROOT/pages/pipes/plugins/opensearch.adoc       | 176 ++++++++++++
 docs/modules/ROOT/pages/pipes/plugins/s3.adoc      | 242 +++++++++++++++++
 docs/modules/ROOT/pages/pipes/plugins/solr.adoc    | 202 ++++++++++++++
 docs/modules/ROOT/pages/pipes/reporters.adoc       |  99 +++----
 .../ROOT/pages/pipes/shared-server-mode.adoc       |   2 +
 docs/modules/ROOT/pages/using-tika/grpc/index.adoc |  22 ++
 docs/pom.xml                                       |  82 +++++-
 docs/publish-docs.sh                               |  51 ++++
 .../org/apache/tika/mime/tika-mimetypes.xml        |   6 +-
 tika-parent/pom.xml                                |  16 +-
 .../tika-parsers-ml/tika-parser-nlp-module/pom.xml |   5 +-
 .../resources/config-examples/pdf-parser-full.json |   1 +
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |   6 +
 .../apache/tika/parser/pdf/PDFParserConfig.java    |  24 ++
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  28 ++
 .../pipes/atlassianjwt/ConfigExamplesTest.java     |  69 +++++
 .../config-examples/atlassian-jwt-fetcher.json     |  19 ++
 .../tika/pipes/azblob/ConfigExamplesTest.java      | 134 +++++++++
 .../resources/config-examples/az-blob-emitter.json |  14 +
 .../resources/config-examples/az-blob-fetcher.json |  13 +
 .../config-examples/az-blob-pipeline.json          |  45 ++++
 .../config-examples/az-blob-pipes-iterator.json    |  13 +
 .../config/tika-config-az-blob-fetcher.xml         |  30 ---
 .../test/resources/config/tika-config-az-blob.xml  |  28 --
 .../apache/tika/pipes/csv/ConfigExamplesTest.java  |  70 +++++
 .../config-examples/csv-pipes-iterator.json        |  12 +
 .../apache/tika/pipes/es/ConfigExamplesTest.java   | 126 +++++++++
 .../test/resources/config-examples/es-emitter.json |  19 ++
 .../resources/config-examples/es-pipeline.json     |  60 +++++
 .../resources/config-examples/es-reporter.json     |  15 ++
 .../apache/tika/pipes/gcs/ConfigExamplesTest.java  | 133 +++++++++
 .../resources/config-examples/gcs-emitter.json     |  12 +
 .../resources/config-examples/gcs-fetcher.json     |  12 +
 .../resources/config-examples/gcs-pipeline.json    |  42 +++
 .../config-examples/gcs-pipes-iterator.json        |  11 +
 .../src/test/resources/config/tika-config-gcs.xml  |  26 --
 .../tika-pipes-google-drive/pom.xml                |   2 +-
 .../tika/pipes/googledrive/ConfigExamplesTest.java |  70 +++++
 .../config-examples/google-drive-fetcher.json      |  13 +
 .../apache/tika/pipes/http/ConfigExamplesTest.java |  70 +++++
 .../resources/config-examples/http-fetcher.json    |  21 ++
 .../reporter/jdbc/JDBCPipesReporterConfig.java     |  27 +-
 .../apache/tika/pipes/jdbc/ConfigExamplesTest.java | 150 +++++++++++
 .../resources/config-examples/jdbc-emitter.json    |  22 ++
 .../resources/config-examples/jdbc-pipeline.json   |  56 ++++
 .../config-examples/jdbc-pipes-iterator.json       |  15 ++
 .../resources/config-examples/jdbc-reporter.json   |  12 +
 .../tika-config-jdbc-emitter-attachments.xml       |  53 ----
 .../tika-config-jdbc-emitter-existing-table.xml    |  42 ---
 .../tika-config-jdbc-emitter-multivalued.xml       |  45 ----
 .../configs/tika-config-jdbc-emitter-trunc.xml     |  44 ---
 .../resources/configs/tika-config-jdbc-emitter.xml |  54 ----
 .../apache/tika/pipes/json/ConfigExamplesTest.java |  67 +++++
 .../config-examples/json-pipes-iterator.json       |   9 +
 .../tika/pipes/kafka/ConfigExamplesTest.java       | 119 ++++++++
 .../resources/config-examples/kafka-emitter.json   |  19 ++
 .../resources/config-examples/kafka-pipeline.json  |  43 +++
 .../config-examples/kafka-pipes-iterator.json      |  14 +
 .../tika-pipes-microsoft-graph/pom.xml             |   2 +-
 .../pipes/microsoftgraph/ConfigExamplesTest.java   |  72 +++++
 .../config-examples/microsoft-graph-fetcher.json   |  15 ++
 .../tika/pipes/opensearch/ConfigExamplesTest.java  | 123 +++++++++
 .../config-examples/opensearch-emitter.json        |  21 ++
 .../config-examples/opensearch-pipeline.json       |  64 +++++
 .../config-examples/opensearch-reporter.json       |  17 ++
 .../test/resources/tika-config-simple-emitter.xml  |  41 ---
 .../apache/tika/pipes/s3/ConfigExamplesTest.java   | 136 ++++++++++
 .../test/resources/config-examples/s3-emitter.json |  14 +
 .../test/resources/config-examples/s3-fetcher.json |  15 ++
 .../resources/config-examples/s3-pipeline.json     |  49 ++++
 .../config-examples/s3-pipes-iterator.json         |  13 +
 .../apache/tika/pipes/solr/ConfigExamplesTest.java | 134 +++++++++
 .../resources/config-examples/solr-emitter-zk.json |  15 ++
 .../resources/config-examples/solr-emitter.json    |  17 ++
 .../resources/config-examples/solr-pipeline.json   |  42 +++
 .../config-examples/solr-pipes-iterator.json       |  15 ++
 .../test/resources/tika-config-simple-emitter.xml  |  48 ----
 tika-server/docker-build/CHANGES.md                | 110 ++++++++
 tika-server/docker-build/README.md                 | 288 ++++++++++++++++++++
 .../docker-build/docker-compose-tika-customocr.yml |  39 +++
 .../docker-build/docker-compose-tika-grobid.yml    |  45 ++++
 .../docker-build/docker-compose-tika-vision.yml    |  62 +++++
 tika-server/docker-build/docker-tool.sh            |  87 +++++-
 tika-server/docker-build/full/Dockerfile           |  59 ++--
 tika-server/docker-build/full/Dockerfile.snapshot  |  12 +-
 tika-server/docker-build/minimal/Dockerfile        |  52 ++--
 .../docker-build/minimal/Dockerfile.snapshot       |  12 +-
 .../customocr/tika-config-inline.json              |  11 +
 .../customocr/tika-config-inline.xml               |  31 ---
 .../customocr/tika-config-rendered.json            |  16 ++
 .../customocr/tika-config-rendered.xml             |  38 ---
 .../sample-configs/grobid/tika-config.json         |  10 +
 .../sample-configs/grobid/tika-config.xml          |  24 --
 .../sample-configs/ner/run_tika_server.sh          |  62 -----
 .../sample-configs/ner/tika-config.xml             |  28 --
 .../vision/inception-rest-caption.xml              |  32 ---
 .../sample-configs/vision/inception-rest-video.xml |  32 ---
 .../sample-configs/vision/inception-rest.xml       |  32 ---
 .../sample-configs/vision/vlm-claude.json          |  18 ++
 .../sample-configs/vision/vlm-gemini.json          |  17 ++
 .../sample-configs/vision/vlm-openai.json          |  19 ++
 184 files changed, 7158 insertions(+), 1739 deletions(-)


Reply via email to