This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4564
in repository https://gitbox.apache.org/repos/asf/tika.git
from 069e453c88 TIKA-4564 -- tweaks
add 05c286c148 TIKA-4327: update aws
add 9eebc5ab53 TIKA-4562 -- refactor runtime configs in tika-server to json
add f99a05e389 Merge branch 'main' into TIKA-4564
add 12384339f8 TIKA-4564 -- fix test for Windows
No new revisions were added by this update.
Summary of changes:
.../tika/annotation/TikaComponentProcessor.java | 82 ++++-
.../java/org/apache/tika/config/TikaComponent.java | 25 ++
.../apache/tika/cli/XmlToJsonConfigConverter.java | 5 +-
.../org/apache/tika/config/ConfigContainer.java | 15 +-
.../org/apache/tika/config/ParseContextConfig.java | 23 +-
.../org/apache/tika/config/SelfConfiguring.java | 56 +++
.../org/apache/tika/config/TikaTaskTimeout.java | 30 +-
.../SkipEmbeddedDocumentSelector.java} | 23 +-
.../java/org/apache/tika/parser/ParseContext.java | 28 ++
.../SimplePasswordProvider.java} | 44 ++-
tika-parent/pom.xml | 2 +-
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 6 +-
.../java/org/apache/tika/parser/pdf/OCR2XHTML.java | 11 +-
.../java/org/apache/tika/parser/pdf/PDF2XHTML.java | 16 +-
.../tika/parser/pdf/PDFMarkedContent2XHTML.java | 16 +-
.../java/org/apache/tika/parser/pdf/PDFParser.java | 79 ++--
.../apache/tika/parser/pdf/PDFParserConfig.java | 148 +-------
.../org/apache/tika/parser/pdf/PDFParserTest.java | 20 +-
.../apache/tika/parser/pdf/PDFRenderingTest.java | 96 +----
.../org/apache/tika/parser/pdf/PDFParserTest.java | 93 +++++
.../resources/configs/tika-config-lib-pst.json | 16 +
.../configs/tika-config-non-primitives.json | 11 +
.../resources/configs/tika-config-rendering.json | 14 +
.../org/apache/tika/config/TIKA-1558-exclude.json | 24 ++
.../apache/tika/config/TIKA-1558-excludesub.json | 12 +
.../tika/config/TIKA-1702-translator-default.json | 9 +
.../config/TIKA-1702-translator-empty-default.json | 10 +
.../tika/config/TIKA-1702-translator-empty.json | 7 +
.../tika/config/TIKA-1708-detector-composite.json | 16 +
.../apache/tika/parser/ocr/tesseract-config.json | 17 +
.../org/apache/tika/pipes/api/HandlerConfig.java | 4 +-
.../org/apache/tika/pipes/core/PipesClient.java | 2 +-
.../serialization/FetchEmitTupleDeserializer.java | 3 +
.../apache/tika/pipes/core/server/PipesServer.java | 3 +
.../apache/tika/config/loader/ComponentInfo.java | 31 +-
.../tika/config/loader/ComponentRegistry.java | 105 +++++-
.../apache/tika/config/loader/ConfigLoader.java | 11 +-
.../apache/tika/config/loader/JsonMergeUtils.java | 104 ++++++
.../tika/serialization/ConfigDeserializer.java | 22 +-
.../serialization/ParseContextDeserializer.java | 82 +++--
.../tika/serialization/ParseContextSerializer.java | 131 +++++--
.../tika/serialization/ParseContextUtils.java | 202 +++++++++++
.../apache/tika/config/JsonConfigHelperTest.java | 4 +-
.../tika/config/loader/ComponentRegistryTest.java | 2 +-
.../TestParseContextSerialization.java | 181 +++++----
tika-server/tika-server-core/pom.xml | 5 +
.../server/core/CompositeParseContextConfig.java | 42 ---
.../tika/server/core/FetcherStreamFactory.java | 2 -
.../tika/server/core/ParseContextConfig.java | 40 --
.../tika/server/core/ServerStatusWatcher.java | 9 +-
.../server/core/config/DocumentSelectorConfig.java | 46 ---
.../server/core/config/PasswordProviderConfig.java | 53 ---
.../tika/server/core/config/TimeoutConfig.java | 37 --
.../server/core/resource/DetectorResource.java | 1 -
.../server/core/resource/MetadataResource.java | 35 +-
.../tika/server/core/resource/PipesResource.java | 3 +
.../core/resource/RecursiveMetadataResource.java | 56 ++-
.../tika/server/core/resource/TikaResource.java | 318 ++++++++--------
.../server/core/resource/UnpackerResource.java | 88 ++++-
.../org.apache.tika.server.core.ParseContextConfig | 17 -
.../org/apache/tika/server/core/TikaPipesTest.java | 1 -
.../server/core/TikaServerIntegrationTest.java | 37 --
.../core/TikaServerPipesIntegrationTest.java | 44 +++
.../configs/tika-config-server-basic.json | 3 +-
.../configs/tika-config-timeout-100ms.json | 11 +
.../configs/tika-config-with-timeout.json | 14 +
.../server/standard/config/PDFServerConfig.java | 70 ----
.../standard/config/TesseractServerConfig.java | 74 ----
.../org.apache.tika.server.core.ParseContextConfig | 16 -
.../tika/server/standard/MetadataResourceTest.java | 83 +++--
.../standard/RecursiveMetadataResourceTest.java | 54 ++-
.../apache/tika/server/standard/TikaPipesTest.java | 18 +-
.../tika/server/standard/TikaResourceTest.java | 404 ++++++++++++---------
.../tika/server/standard/UnpackerResourceTest.java | 65 +++-
.../standard/UnpackerResourceWithConfigTest.java | 53 ++-
.../resources/configs/tika-config-url-fetcher.json | 12 +
76 files changed, 2086 insertions(+), 1466 deletions(-)
create mode 100644
tika-core/src/main/java/org/apache/tika/config/SelfConfiguring.java
copy tika-core/src/main/java/org/apache/tika/{metadata/filter/NoOpFilter.java
=> extractor/SkipEmbeddedDocumentSelector.java} (66%)
copy tika-core/src/main/java/org/apache/tika/{metadata/filter/NoOpFilter.java
=> parser/SimplePasswordProvider.java} (51%)
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-lib-pst.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-non-primitives.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-rendering.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-exclude.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1558-excludesub.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1702-translator-default.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty-default.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1702-translator-empty.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/config/TIKA-1708-detector-composite.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/org/apache/tika/parser/ocr/tesseract-config.json
copy
tika-core/src/main/java/org/apache/tika/exception/TikaConfigException.java =>
tika-serialization/src/main/java/org/apache/tika/config/loader/ComponentInfo.java
(52%)
create mode 100644
tika-serialization/src/main/java/org/apache/tika/config/loader/JsonMergeUtils.java
create mode 100644
tika-serialization/src/main/java/org/apache/tika/serialization/ParseContextUtils.java
delete mode 100644
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/CompositeParseContextConfig.java
delete mode 100644
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ParseContextConfig.java
delete mode 100644
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/config/DocumentSelectorConfig.java
delete mode 100644
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/config/PasswordProviderConfig.java
delete mode 100644
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/config/TimeoutConfig.java
delete mode 100644
tika-server/tika-server-core/src/main/resources/META-INF/services/org.apache.tika.server.core.ParseContextConfig
create mode 100644
tika-server/tika-server-core/src/test/resources/configs/tika-config-timeout-100ms.json
create mode 100644
tika-server/tika-server-core/src/test/resources/configs/tika-config-with-timeout.json
delete mode 100644
tika-server/tika-server-standard/src/main/java/org/apache/tika/server/standard/config/PDFServerConfig.java
delete mode 100644
tika-server/tika-server-standard/src/main/java/org/apache/tika/server/standard/config/TesseractServerConfig.java
delete mode 100644
tika-server/tika-server-standard/src/main/resources/META-INF/services/org.apache.tika.server.core.ParseContextConfig
create mode 100644
tika-server/tika-server-standard/src/test/resources/configs/tika-config-url-fetcher.json