This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4519
in repository https://gitbox.apache.org/repos/asf/tika.git
from 6eb8916f4 Merge branch 'main' into TIKA-4519
new 18f0d77a3 TIKA-4519 - checkpoint
add 7169ede1b TIKA-4327: update aws
add 05c17f179 TIKA-4531 fix some resource leaks (#2377)
add e631e58e2 TIKA-4531: remove leaky / dead / not needed code, fix javadoc
add 4ac0e545d TIKA-4327: update lucene
add 2021e7138 TIKA-4531: use jdk9 try-with-resources
add 5dbfb15d0 TIKA-4533 -- fix handling of TikaInputStreams with open
containers (#2378)
add 1ba2f3548 TIKA-4533 -- need to fix TikaInputStream's setting of length
on spooling (#2379)
add 065b44cfb TIKA-4327: update aws
add 79938eabf TIKA-4535 -- limit use of TikaConfig.getDefaultConfig to
where we nee⦠(#2381)
add 3a7596f36 TIKA-4327: update aws, jackson
add 457ae09e2 TIKA-4327: update tyrus
add cfa4b090d TIKA-4537: add testcontainers + docker start
add 8a5eb24d2 TIKA-4537: try longer timeout
add 75ab57ad1 TIKA-4537: revert
add 3c7660ff0 TIKA-4538: AZBlobFetcher always throws
FileAlreadyExistsException (#2383)
add 701323a48 TIKA-4533 - third time's the charm -- further refinement
(#2382)
add ee510b531 TIKA-4327: update junit, spring
add a678a49a2 TIKA-4327: update kotlin
add a32098b94 TIKA-4327: update aws
new 883d9366c Merge branch 'main' into TIKA-4519
new 77682b95b TIKA-4519 -- checkpoint commit
The 3 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.../apache/tika/extractor/RUnpackExtractor.java | 32 ++--
.../java/org/apache/tika/io/FilenameUtils.java | 3 +-
.../java/org/apache/tika/io/TikaInputStream.java | 43 ++++-
.../org/apache/tika/parser/DigestingParser.java | 23 ++-
.../org/apache/tika/plugins/PluginConfig.java} | 5 +-
.../org/apache/tika/plugins/PluginConfigs.java | 35 ++---
.../org/apache/tika/sax/SecureContentHandler.java | 10 +-
.../src/test/java/org/apache/tika/TikaTest.java | 9 ++
.../tika/eval/app/ExtractComparerRunner.java | 6 +-
.../apache/tika/eval/app/ExtractProfileRunner.java | 6 +-
.../org/apache/tika/eval/app/db/MimeBuffer.java | 16 +-
.../org/apache/tika/eval/app/io/ExtractReader.java | 8 +-
.../pipes/opensearch/tests/OpenSearchTest.java | 1 -
.../tika/pipes/s3/tests/PipeIntegrationTests.java | 1 -
tika-parent/pom.xml | 10 +-
.../org/apache/tika/parser/isatab/ISATabUtils.java | 25 +--
.../parser/microsoft/AbstractPOIFSExtractor.java | 36 +++--
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 52 +++----
.../parser/microsoft/pst/OutlookPSTParser.java | 31 +++-
.../parser/microsoft/pst/PSTMailItemParser.java | 5 +-
.../parser/microsoft/chm/TestChmExtraction.java | 4 +-
.../apache/tika/parser/AutoDetectParserTest.java | 37 +++++
.../src/test/resources/configs/tika-4533.xml | 47 ++++++
.../resources/test-documents/testLargeOLEDoc.doc | Bin 0 -> 2077696 bytes
tika-pipes/tika-emitters/pom.xml | 18 ++-
.../tika/pipes/emitter/azblob/AZBlobEmitter.java | 1 -
.../pipes/emitter/azblob/TestAZBlobEmitter.java | 1 -
.../tika-emitter-file-system}/pom.xml | 24 +--
.../src/main/assembly/assembly.xml | 0
.../tika/pipes/emitter/fs/FileSystemEmitter.java | 145 +++++++++++++++++
.../pipes/emitter/fs/FileSystemEmitterConfig.java | 20 +++
.../pipes/emitter/fs/FileSystemEmitterPlugin.java} | 8 +-
.../src/main/resources/emitter-plugin.properties} | 8 +-
.../apache/tika/pipes/emitter/gcs/GCSEmitter.java | 1 -
.../tika/pipes/emitter/gcs/TestGCSEmitter.java | 1 -
.../tika/pipes/emitter/jdbc/JDBCEmitter.java | 11 +-
.../tika/pipes/emitter/jdbc/JDBCEmitterTest.java | 1 -
.../apache/tika/pipes/emitter/s3/S3Emitter.java | 1 -
.../tika/pipes/fetcher/azblob/AZBlobFetcher.java | 2 +-
.../tika/pipes/fetcher/fs/FileSystemFetcher.java | 24 +--
...plugin.properties => fetcher-plugin.properties} | 0
.../pipes/fetcher/fs/FileSystemFetcherTest.java | 24 +--
.../tika-fetcher-microsoft-graph/pom.xml | 2 +-
.../tika/pipes/api}/emitter/AbstractEmitter.java | 44 +++---
.../AbstractStreamEmitter.java} | 17 +-
.../apache/tika/pipes/api/emitter/EmitData.java | 19 +++
.../apache/tika/pipes/api}/emitter/Emitter.java | 17 +-
.../tika/pipes/api}/emitter/StreamEmitter.java | 4 +-
.../tika/pipes/api/fetcher/AbstractFetcher.java | 4 +-
.../org/apache/tika/pipes/api/fetcher/Fetcher.java | 3 +-
.../tika/pipes/api/fetcher/FetcherConfig.java | 27 ----
tika-pipes/tika-pipes-core-tests/pom.xml | 6 +
.../apache/tika/pipes/core/PassbackFilterTest.java | 16 +-
.../apache/tika/pipes/core/PluginsTestHelper.java | 14 +-
.../tika/pipes/core/TikaPipesConfigTest.java | 10 +-
.../pipes/core/async/AsyncChaosMonkeyTest.java | 18 +--
.../apache/tika/pipes/core/async/MockEmitter.java | 59 -------
.../apache/tika/pipes/core/async/MockFetcher.java | 4 +-
.../tika/pipes/core/emitter/MockEmitter.java | 54 +++++--
.../tika/pipes/core/fetcher/MockFetcher.java | 4 +-
.../test/resources/configs/fetchers-emitters.json | 16 ++
.../src/test/resources/configs/fetchers.json | 10 --
.../apache/tika/pipes/core/tika-emit-config.xml | 12 --
.../org/apache/tika/pipes/core/PipesClient.java | 8 +-
.../apache/tika/pipes/core/PipesPluginsConfig.java | 91 ++++++++---
.../org/apache/tika/pipes/core/PipesResult.java | 30 ++--
.../org/apache/tika/pipes/core/PipesServer.java | 37 +++--
.../apache/tika/pipes/core/async/AsyncEmitter.java | 34 ++--
.../tika/pipes/core/async/AsyncProcessor.java | 24 +--
.../apache/tika/pipes/core/async/EmitDataPair.java | 6 +
.../emitter/{EmitData.java => EmitDataImpl.java} | 15 +-
.../apache/tika/pipes/core/emitter/EmitKey.java | 16 +-
.../tika/pipes/core/emitter/EmitterManager.java | 71 ++++++---
.../tika/pipes/core/emitter/EmptyEmitter.java | 23 ++-
.../pipes/core/emitter/TikaEmitterException.java | 4 +-
.../EmittingEmbeddedDocumentBytesHandler.java | 4 +-
.../tika/pipes/core/fetcher/EmptyFetcher.java | 4 +-
.../tika/pipes/core/fetcher/FetcherManager.java | 4 +-
.../core/fetcher/config/FetcherConfigImpl.java | 51 ------
.../pipes/core/pipesiterator/PipesIterator.java | 14 +-
.../serialization/FetchEmitTupleSerializer.java | 2 +-
.../pipes/core/serialization/JsonEmitData.java | 6 +-
.../tika/pipes/emitter/fs/FileSystemEmitter.java | 173 ---------------------
.../filelist/FileListPipesIterator.java | 8 +-
.../pipesiterator/fs/FileSystemPipesIterator.java | 6 +-
.../tika/pipes/core/TikaPipesConfigTest.java | 9 +-
.../tika/pipes/core/emitter/MockEmitter.java | 60 -------
.../filelist/FileListPipesIteratorTest.java | 12 +-
.../fs/FileSystemPipesIteratorTest.java | 4 +-
.../src/test/resources/configs/fetchers.json | 2 +-
.../apache/tika/config/pipes-iterator-config.xml | 3 +-
.../tika/config/pipes-iterator-multiple-config.xml | 6 +-
.../pipesiterator/azblob/AZBlobPipesIterator.java | 2 +-
.../pipes/pipesiterator/csv/CSVPipesIterator.java | 2 +-
.../src/test/java/TestCSVPipesIterator.java | 2 +-
.../pipes/pipesiterator/gcs/GCSPipesIterator.java | 2 +-
.../pipesiterator/jdbc/JDBCPipesIterator.java | 6 +-
.../pipesiterator/kafka/KafkaPipesIterator.java | 2 +-
.../pipes/pipesiterator/s3/S3PipesIterator.java | 2 +-
.../pipesiterator/solr/SolrPipesIterator.java | 2 +-
.../tika/serialization/PluginConfigLoader.java | 34 ++++
.../serialization/PluginsConfigDeserializer.java | 27 ++++
.../serialization/PluginsConfigSerializer.java | 21 +++
.../tika/serialization/PluginsConfigTest.java | 69 ++++++++
.../tika/server/core/resource/AsyncResource.java | 8 +-
.../tika/server/core/resource/PipesResource.java | 2 +-
.../server/core/resource/UnpackerResource.java | 18 +--
tika-translate/pom.xml | 2 +-
.../language/translate/impl/RTGTranslatorTest.java | 4 +-
109 files changed, 1165 insertions(+), 862 deletions(-)
copy
tika-core/src/{test/java/org/apache/tika/fork/unusedpackage/ClassInUnusedPackage.java
=> main/java/org/apache/tika/plugins/PluginConfig.java} (89%)
copy
tika-eval/tika-eval-core/src/main/java/org/apache/tika/eval/core/tokens/TokenCounts.java
=> tika-core/src/main/java/org/apache/tika/plugins/PluginConfigs.java (56%)
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4533.xml
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testLargeOLEDoc.doc
copy tika-pipes/{tika-fetchers/tika-fetcher-file-system =>
tika-emitters/tika-emitter-file-system}/pom.xml (85%)
copy tika-pipes/{tika-fetchers/tika-fetcher-file-system =>
tika-emitters/tika-emitter-file-system}/src/main/assembly/assembly.xml (100%)
create mode 100644
tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
create mode 100644
tika-pipes/tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitterConfig.java
copy
tika-pipes/{tika-fetchers/tika-fetcher-file-system/src/main/java/org/apache/tika/pipes/fetcher/fs/FileSystemFetcherPlugin.java
=>
tika-emitters/tika-emitter-file-system/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitterPlugin.java}
(87%)
copy
tika-pipes/{tika-fetchers/tika-fetcher-file-system/src/main/resources/plugin.properties
=>
tika-emitters/tika-emitter-file-system/src/main/resources/emitter-plugin.properties}
(80%)
rename
tika-pipes/tika-fetchers/tika-fetcher-file-system/src/main/resources/{plugin.properties
=> fetcher-plugin.properties} (100%)
rename tika-pipes/{tika-pipes-core/src/main/java/org/apache/tika/pipes/core =>
tika-pipes-api/src/main/java/org/apache/tika/pipes/api}/emitter/AbstractEmitter.java
(51%)
copy
tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/{fetcher/AbstractFetcher.java
=> emitter/AbstractStreamEmitter.java} (76%)
create mode 100644
tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/emitter/EmitData.java
rename tika-pipes/{tika-pipes-core/src/main/java/org/apache/tika/pipes/core =>
tika-pipes-api/src/main/java/org/apache/tika/pipes/api}/emitter/Emitter.java
(75%)
rename tika-pipes/{tika-pipes-core/src/main/java/org/apache/tika/pipes/core =>
tika-pipes-api/src/main/java/org/apache/tika/pipes/api}/emitter/StreamEmitter.java
(91%)
delete mode 100644
tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/fetcher/FetcherConfig.java
delete mode 100644
tika-pipes/tika-pipes-core-tests/src/test/java/org/apache/tika/pipes/core/async/MockEmitter.java
create mode 100644
tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers-emitters.json
delete mode 100644
tika-pipes/tika-pipes-core-tests/src/test/resources/configs/fetchers.json
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/async/EmitDataPair.java
rename
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/emitter/{EmitData.java
=> EmitDataImpl.java} (83%)
delete mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/fetcher/config/FetcherConfigImpl.java
delete mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/emitter/fs/FileSystemEmitter.java
delete mode 100644
tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/emitter/MockEmitter.java
create mode 100644
tika-serialization/src/main/java/org/apache/tika/serialization/PluginConfigLoader.java
create mode 100644
tika-serialization/src/main/java/org/apache/tika/serialization/PluginsConfigDeserializer.java
create mode 100644
tika-serialization/src/main/java/org/apache/tika/serialization/PluginsConfigSerializer.java
create mode 100644
tika-serialization/src/test/java/org/apache/tika/serialization/PluginsConfigTest.java