This is an automated email from the ASF dual-hosted git repository.
tballison pushed a change to branch TIKA-4743-improve-site
in repository https://gitbox.apache.org/repos/asf/tika.git
from 474c8fb3cc TIKA-4743 - tiny improvement for a11y, and harden
publish-docs.sh to validate PUBLISH_DIR and refuse symlinked '_'
add cc3cae85fc drop chunking (#2847)
add 2bab6d43f2 TIKA-4734 (#2843)
add acab9b7762 TIKA-4742 -- refactor logging for beta-1 (#2844)
add ba501e6157 fix race condition in PipesClient (#2849)
add 499e703579 TIKA-4745 - add cohort-specific caps (#2848)
add f3425d8895 Merge branch 'main' into TIKA-4743-improve-site
No new revisions were added by this update.
Summary of changes:
.skills/dev.md | 16 ++
.skills/tika-eval-encoding-regression.md | 167 +++++++++++++++++++++
.../pages/migration-to-4x/migrating-to-4x.adoc | 5 +-
docs/modules/ROOT/pages/pipes/troubleshooting.adoc | 80 ++++++++++
docs/publish-docs.sh | 11 +-
docs/supplemental-ui/partials/header-content.hbs | 2 -
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 57 ++++---
.../test/java/org/apache/tika/cli/TikaCLITest.java | 33 ++++
.../resources/pipes-fork-server-default-log4j2.xml | 32 ----
.../NaiveBayesBigramEncodingDetector.java | 126 +++++++++++++---
.../charsoup/CharSoupLanguageDetector.java | 101 ++++---------
.../tika/parser/ocrencode/EncodeOCRParser.java | 4 +-
.../tika/parser/ner/grobid/GrobidNERecogniser.java | 12 +-
.../tika/parser/ner/nltk/NLTKNERecogniser.java | 2 +-
.../org/apache/tika/parser/apple/IWorkTest.java | 32 ----
.../detect/microsoft/POIFSContainerDetector.java | 2 +-
.../tika/parser/microsoft/libpst/LibPstParser.java | 3 +-
.../microsoft/msg/ExtendedMetadataExtractor.java | 2 +-
.../microsoft/ooxml/TikaSheetXMLHandler.java | 5 +-
.../apache/tika/parser/hwp/HwpTextExtractorV5.java | 4 +-
.../apache/tika/parser/ocr/tess4j/ImageDeskew.java | 7 +-
.../apache/tika/parser/ocr/tess4j/ImageUtil.java | 7 +-
.../org/apache/tika/client/HttpClientFactory.java | 4 +-
.../org/apache/tika/pipes/core/PipesClient.java | 70 ++++++---
.../tika/pipes/core/async/AsyncProcessor.java | 3 +-
.../apache/tika/pipes/core/server/EmitHandler.java | 10 +-
.../tika/pipes/core/server/FetchHandler.java | 4 +-
.../tika/pipes/core/server/ParseHandler.java | 22 +--
.../apache/tika/pipes/core/server/PipesServer.java | 21 ++-
.../apache/tika/pipes/core/server/PipesWorker.java | 4 +-
.../resources/pipes-fork-server-default-log4j2.xml | 24 ++-
.../fetcher/atlassianjwt/AtlassianJwtFetcher.java | 7 +-
.../tika/pipes/emitter/azblob/AZBlobEmitter.java | 6 +-
.../tika/pipes/fetcher/azblob/AZBlobFetcher.java | 2 +-
.../pipes/iterator/azblob/AZBlobPipesIterator.java | 2 +-
.../tika/pipes/iterator/csv/CSVPipesIterator.java | 2 +-
.../apache/tika/pipes/emitter/es/ESEmitter.java | 4 +-
.../apache/tika/pipes/emitter/gcs/GCSEmitter.java | 2 +-
.../apache/tika/pipes/fetcher/gcs/GCSFetcher.java | 2 +-
.../tika/pipes/iterator/gcs/GCSPipesIterator.java | 2 +-
.../tika/pipes/fetcher/http/HttpFetcher.java | 8 +-
.../tika/pipes/emitter/jdbc/JDBCEmitter.java | 8 +-
.../pipes/iterator/jdbc/JDBCPipesIterator.java | 19 ++-
.../pipesiterator/json/JsonPipesIterator.java | 2 +-
.../tika/pipes/emitter/kafka/KafkaEmitter.java | 2 +-
.../pipes/iterator/kafka/KafkaPipesIterator.java | 2 +-
.../emitter/opensearch/OpenSearchEmitter.java | 6 +-
.../apache/tika/pipes/emitter/s3/S3Emitter.java | 2 +-
.../apache/tika/pipes/fetcher/s3/S3Fetcher.java | 6 +-
.../tika/pipes/iterator/s3/S3PipesIterator.java | 2 +-
.../pipes/iterator/solr/SolrPipesIterator.java | 2 +-
.../apache/tika/server/client/TikaClientCLI.java | 6 +-
.../tika/server/core/resource/AsyncResource.java | 6 +-
.../server/core/resource/DetectorResource.java | 2 +-
.../server/core/resource/LanguageResource.java | 2 +-
.../server/core/resource/MetadataResource.java | 2 +-
.../server/core/resource/TranslateResource.java | 6 +-
57 files changed, 665 insertions(+), 319 deletions(-)
create mode 100644 .skills/tika-eval-encoding-regression.md
delete mode 100644
tika-core/src/main/resources/pipes-fork-server-default-log4j2.xml
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-integration-tests/src/test/java/org/apache/tika/parser/apple/IWorkTest.java