This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch chardet-work
in repository https://gitbox.apache.org/repos/asf/tika.git


    from dbb4d19222 fix ebcdic test
     add e5151b1e5b TIKA-4327: update google-api
     add 2fd8c0eda2 TIKA-4327: update microsoft-graph.version, maven.bundle, 
aws, google cloud, junrar, mockito, error_prone_annotations
     add 9451da2d56 TIKA-4606: Upgrade Apache Ignite from 2.x to 3.x (fresh) 
(#2654)
     add 2a9957a12b Bump org.tukaani:xz from 1.11 to 1.12 (#2670)
     add 0385b58466 Bump io.swagger.core.v3:swagger-annotations from 2.2.38 to 
2.2.43 (#2669)
     add 1d46c8b97f Bump org.jetbrains.kotlin:kotlin-stdlib from 2.2.0 to 
2.3.10 (#2663)
     add aaef3ca7a3 Bump info.picocli:picocli from 4.7.5 to 4.7.7 (#2661)
     add b9903d0840 Bump org.jetbrains:annotations from 26.0.2-1 to 26.1.0 
(#2659)
     add 4ba11a4e19 Bump org.yaml:snakeyaml from 2.4 to 2.6 (#2671)
     add 4c9017fca3 Bump jakarta.inject:jakarta.inject-api from 2.0.1 to 
2.0.1.MR (#2667)
     add 93b5cfa96f TIKA-4488: update micronaut
     add d139bfe02e TIKA-4488: add micronaut version
     add a7116b05d9 TIKA-4488: add micronaut version
     add 27933e64b9 TIKA-4327: add comment
     add 9f94799669 TIKA-4327: update tyrus, kiota, solrj, spotless-maven-plugin
     add 30e46db4fa TIKA-4606: Add e2e tests for Ignite 3.x upgrade (#2655)
     add ca67465e90 TIKA-4327: update aws, swagger, jackrabbit; add comment on 
solrj 10 migration
     add fdac94fc18 TIKA-4682 4x tweaks (#2674)
     add bbcb82d5a6 Merge remote-tracking branch 'origin/main' into chardet-work
     add 5b32cdf660 chardet - fix IBM855/IBM866 model placement, add docs and 
EBCDIC routing test
     add ade0611dd5 chardet - wip
     add bb9f585def chardet - wip
     add 599a0427a4 TIKA-4327: update aws, zookeeper, shade plugin, azure
     add 2600f092b6 Merge remote-tracking branch 'origin/main' into chardet-work

No new revisions were added by this update.

Summary of changes:
 .github/workflows/main-jdk17-build.yml             |   19 +
 .../main-jdk17-windows-build-multi-locale.yml      |    2 +-
 .github/workflows/main-jdk17-windows-build.yml     |    2 +-
 .java-version                                      |   18 -
 docs/modules/ROOT/nav.adoc                         |    1 +
 .../pages/advanced/charset-detection-design.adoc   |  543 ++++++-----
 pom.xml                                            |    6 +
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |    6 +-
 tika-e2e-tests/README.md                           |   12 +-
 tika-e2e-tests/pom.xml                             |   66 +-
 tika-e2e-tests/tika-grpc/README.md                 |  100 +-
 tika-e2e-tests/tika-grpc/pom.xml                   |   52 +-
 .../tika/parser/ocr/TesseractOCRConfig.properties  |   25 -
 .../customocr/tika-config-inline.json              |   25 -
 .../customocr/tika-config-inline.xml               |   49 -
 .../customocr/tika-config-rendered.json            |   27 -
 .../customocr/tika-config-rendered.xml             |   55 --
 .../tika/parser/journal/GrobidExtractor.properties |   16 -
 .../sample-configs/grobid/tika-config.json         |   22 -
 .../sample-configs/grobid/tika-config.xml          |   41 -
 .../tika-grpc/sample-configs/ignite/README.md      |  117 ---
 .../sample-configs/ignite/tika-config-ignite.json  |    2 +-
 .../sample-configs/ner/run_tika_server.sh          |   62 --
 .../tika-grpc/sample-configs/ner/tika-config.json  |   26 -
 .../tika-grpc/sample-configs/ner/tika-config.xml   |   45 -
 .../tika-grpc/sample-configs/test-simple.json      |   20 -
 .../vision/inception-rest-caption.json             |   18 -
 .../vision/inception-rest-caption.xml              |   32 -
 .../vision/inception-rest-video.json               |   18 -
 .../sample-configs/vision/inception-rest-video.xml |   32 -
 .../sample-configs/vision/inception-rest.json      |   18 -
 .../sample-configs/vision/inception-rest.xml       |   32 -
 .../org/apache/tika/pipes/ExternalTestBase.java    |  285 +++++-
 .../pipes/filesystem/FileSystemFetcherTest.java    |   79 +-
 .../tika/pipes/ignite/IgniteConfigStoreTest.java   |  679 ++++++++-----
 .../java/org/apache/tika/pipes/ignite/README.md    |  172 ----
 .../src/test/resources/docker-compose-ignite.yml   |   25 -
 .../src/test/resources/docker-compose.yml          |   16 -
 .../tika-grpc/src/test/resources/log4j2.xml        |   19 -
 .../src/test/resources/test-fixtures/sample.csv    |    4 +
 .../src/test/resources/test-fixtures/sample.html   |    8 +
 .../src/test/resources/test-fixtures/sample.txt    |    3 +
 .../src/test/resources/test-fixtures/sample.xml    |    5 +
 ...g-ignite.json => tika-config-ignite-local.json} |    4 +-
 .../src/test/resources/tika-config-ignite.json     |    2 +-
 .../tika-grpc/src/test/resources/tika-config.json  |   49 +-
 .../charsoup/CharSoupEncodingDetector.java         |   67 +-
 .../tika/ml/chardetect/CharsetConfusables.java     |   61 +-
 .../ml/chardetect/MojibusterEncodingDetector.java  |  287 ++----
 .../tika/ml/chardetect/chardetect-ebcdic.bin       |  Bin 7312 -> 0 bytes
 .../org/apache/tika/ml/chardetect/chardetect.bin   |  Bin 410106 -> 606934 
bytes
 .../tika/ml/chardetect/EbcdicRoutingTest.java      |   78 +-
 tika-eval/tika-eval-app/pom.xml                    |   25 +-
 .../tika-eval-app}/src/main/assembly/assembly.xml  |    4 -
 tika-grpc/dev-tika-config.json                     |    3 +-
 tika-grpc/pom.xml                                  |   22 +-
 tika-grpc/run-dev.sh                               |   15 +-
 .../org/apache/tika/pipes/grpc/TikaGrpcServer.java |    7 +-
 .../apache/tika/pipes/grpc/TikaGrpcServerImpl.java |   39 +-
 tika-grpc/src/main/proto/tika.proto                |    2 +
 .../src/test/resources/tika-config-ignite.json     |    2 +-
 .../charsoup/CharSoupLanguageDetector.java         |    6 +
 tika-ml/tika-ml-chardetect/README.md               |  266 ++++-
 .../chardetect/tools/BuildCharsetTrainingData.java | 1016 ++++++++++++++++++++
 .../ml/chardetect/tools/EvalCharsetDetectors.java  |    4 +-
 .../ml/chardetect/tools/TrainCharsetModel.java     |    5 +-
 .../src/test/python/build_charset_training.py      |  855 ----------------
 tika-parent/pom.xml                                |  123 ++-
 .../org/apache/tika/parser/txt/TXTParserTest.java  |    7 +-
 .../tika/async/cli/FileListPipesIterator.java      |  122 +++
 .../org/apache/tika/async/cli/PluginsWriter.java   |  118 ++-
 .../org/apache/tika/async/cli/TikaAsyncCLI.java    |  102 +-
 .../apache/tika/async/cli/AsyncCliParserTest.java  |   44 +-
 .../tika/async/cli/FileListPipesIteratorTest.java  |  103 ++
 tika-pipes/tika-pipes-config-store-ignite/pom.xml  |  113 ++-
 .../tika/pipes/ignite/ExtensionConfigDTO.java      |   29 +-
 .../tika/pipes/ignite/IgniteConfigStore.java       |  182 ++--
 .../ignite/config/IgniteConfigStoreConfig.java     |   59 +-
 .../pipes/ignite/server/IgniteStoreServer.java     |  201 ++--
 .../tika/pipes/ignite/IgniteConfigStoreTest.java   |  119 +--
 .../tika/pipes/core/async/AsyncProcessor.java      |   33 +-
 .../tika-pipes-google-drive/pom.xml                |    4 +-
 .../tika-pipes-microsoft-graph/pom.xml             |    4 +-
 .../apache/tika/server/core/TikaServerProcess.java |   29 +-
 tika-translate/pom.xml                             |    2 +-
 85 files changed, 3789 insertions(+), 3228 deletions(-)
 delete mode 100644 .java-version
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/customocr/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/customocr/tika-config-inline.json
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/customocr/tika-config-inline.xml
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/customocr/tika-config-rendered.json
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/customocr/tika-config-rendered.xml
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/grobid/org/apache/tika/parser/journal/GrobidExtractor.properties
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/grobid/tika-config.json
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/grobid/tika-config.xml
 delete mode 100644 tika-e2e-tests/tika-grpc/sample-configs/ignite/README.md
 delete mode 100755 
tika-e2e-tests/tika-grpc/sample-configs/ner/run_tika_server.sh
 delete mode 100644 tika-e2e-tests/tika-grpc/sample-configs/ner/tika-config.json
 delete mode 100644 tika-e2e-tests/tika-grpc/sample-configs/ner/tika-config.xml
 delete mode 100644 tika-e2e-tests/tika-grpc/sample-configs/test-simple.json
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/vision/inception-rest-caption.json
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/vision/inception-rest-caption.xml
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/vision/inception-rest-video.json
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/vision/inception-rest-video.xml
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/vision/inception-rest.json
 delete mode 100644 
tika-e2e-tests/tika-grpc/sample-configs/vision/inception-rest.xml
 delete mode 100644 
tika-e2e-tests/tika-grpc/src/test/java/org/apache/tika/pipes/ignite/README.md
 delete mode 100644 
tika-e2e-tests/tika-grpc/src/test/resources/docker-compose-ignite.yml
 delete mode 100644 
tika-e2e-tests/tika-grpc/src/test/resources/docker-compose.yml
 delete mode 100644 tika-e2e-tests/tika-grpc/src/test/resources/log4j2.xml
 create mode 100644 
tika-e2e-tests/tika-grpc/src/test/resources/test-fixtures/sample.csv
 create mode 100644 
tika-e2e-tests/tika-grpc/src/test/resources/test-fixtures/sample.html
 create mode 100644 
tika-e2e-tests/tika-grpc/src/test/resources/test-fixtures/sample.txt
 create mode 100644 
tika-e2e-tests/tika-grpc/src/test/resources/test-fixtures/sample.xml
 copy tika-e2e-tests/tika-grpc/src/test/resources/{tika-config-ignite.json => 
tika-config-ignite-local.json} (90%)
 delete mode 100644 
tika-encoding-detectors/tika-encoding-detector-mojibuster/src/main/resources/org/apache/tika/ml/chardetect/chardetect-ebcdic.bin
 copy {tika-pipes/tika-pipes-fork-parser => 
tika-eval/tika-eval-app}/src/main/assembly/assembly.xml (92%)
 create mode 100644 
tika-ml/tika-ml-chardetect/src/main/java/org/apache/tika/ml/chardetect/tools/BuildCharsetTrainingData.java
 delete mode 100644 
tika-ml/tika-ml-chardetect/src/test/python/build_charset_training.py
 create mode 100644 
tika-pipes/tika-async-cli/src/main/java/org/apache/tika/async/cli/FileListPipesIterator.java
 create mode 100644 
tika-pipes/tika-async-cli/src/test/java/org/apache/tika/async/cli/FileListPipesIteratorTest.java

Reply via email to