This is an automated email from the ASF dual-hosted git repository.
ndipiazza pushed a change to branch TIKA-4606-ignite-3x-upgrade
in repository https://gitbox.apache.org/repos/asf/tika.git
from c8956af6a5 Merge branch 'main' of github.com:apache/tika into
TIKA-4606-ignite-3x-upgrade
add 4ee9e39133 TIKA-4610 -- modernize rat exclusions and add exclusion for
README.md (#2511)
add 20f92e9d78 TIKA-4582 (#2467)
add 31a9344658 TIKA-4327: update aws, jsoup, puppycrawl
add b2e0dd5630 Bump google-http-client.version from 2.0.0 to 2.0.3 (#2512)
add c9a866d7fa Bump com.google.j2objc:j2objc-annotations from 3.0.0 to 3.1
(#2514)
add 1eb469aacc Bump io.grpc:grpc-context from 1.69.0 to 1.78.0 (#2513)
add 248fee17d6 TIKA-4605: move grpc version to parent
add 697d7c047d TIKA-4327: update aws, apache parent
add 64b411d1b1 TIKA-4613 -- look for jsonconfig constructor, fall back to
no-arg (#2516)
add 2f7b46d7db TIKA-4614: add Media Management metadata extraction, avoid
NPE, add test
add 23d2109081 TIKA-4327: update aws, jackrabbit, pf4j
add 3f15e82bb4 TIKA-4615 -- rm junit 4 and update testcontainers (#2517)
add 408c26e1e0 TIKA-4612 -- improve mp3 and aac detection (#2520)
add 1b87c57b92 Bump com.nimbusds:nimbus-jose-jwt from 10.6 to 10.7 (#2525)
add 029fb68092 Bump com.google.errorprone:error_prone_annotations from
2.45.0 to 2.46.0 (#2524)
add 9b26790a60 Bump com.microsoft.graph:microsoft-graph from 6.59.0 to
6.60.0 (#2523)
add 2f301ac261 Bump software.amazon.awssdk:bom from 2.41.4 to 2.41.5
(#2521)
add 786d682616 TIKA-4620: avoid NPE (#2526)
add b8ae8ab8d1 TIKA-4327: update apache parent (#2527)
add 48f5991c41 TIKA-4621: avoid NPE (#2528)
add c43f000033 TIKA-4616 -- mv fetcher/emitter CRUD to tika-grpc (#2519)
add 9751122d68 TIKA-4622: Add test for PDF annotations without page
content streams (#2530)
add c72dbc97c3 improve resource clean up -- directories from PipesClient
(#2532)
add bc93db345a TIKA-4619 (#2531)
add a34d52da20 TIKA-4618 -- improve spooling strategy configuration (#2533)
add 5762c59cc8 fix rat
add caefbbf50d add rat check to the primary workflow
add 5f9a808ac3 TIKA-4623 -- for general updates, don't buffer unless
enableRewind has been set (#2534)
add 066412ea14 WIP: Checkpoint - CachingSource metadata update and cleanup
(#2535)
add 53b0cdb865 Bump org.springframework:spring-context from 7.0.2 to 7.0.3
(#2539)
add 4190d7d497 Bump software.amazon.awssdk:bom from 2.41.5 to 2.41.10
(#2543)
add 4b4ebbd862 Bump org.codehaus.mojo:versions-maven-plugin from 2.20.1 to
2.21.0 (#2541)
add 000e3dfe57 Bump io.projectreactor:reactor-core from 3.8.1 to 3.8.2
(#2540)
add dabb1f1e76 Bump com.fasterxml.jackson:jackson-bom from 2.20.1 to
2.21.0 (#2544)
add f1489ec81a Bump org.netpreserve:jwarc from 0.33.0 to 0.34.0 (#2537)
add 3f8577e534 Bump com.google.cloud:google-cloud-storage from 2.61.0 to
2.62.0 (#2542)
add 1c2b1eb0f8 Bump reactor.netty.version from 1.3.1 to 1.3.2 (#2538)
add 5e21b45b3c TIKA-4626 (#2545)
add 19eb31632f TIKA-4631 -- add a detect/no-parse option to pipes (#2549)
add 231ac690b6 TIKA-4625: Add AsciiDoc documentation module (#2536)
add d7781ecf33 TIKA-4630 -- improve tracking of internal paths (#2548)
add 30b5b26396 TIKA-4628 -- improve pipesClient+pipesServer ipc: critical
socket.setTcpNoDelay(true) and migrate to pure jackson serialization (#2546)
add 1c078b499b TIKA-4632 -- initial antora integration (#2550)
add bd1c7edc5f TIKA-4630-on-main (#2551)
add d0f76be3a5 Merge remote-tracking branch 'origin/main'
add 33e1f6a5c8 rm asciidoc plugin and resources
add 7263844498 TIKA-4327: update aws, google auth, google http
add bb785a220c Bump com.diffplug.spotless:spotless-maven-plugin from 3.1.0
to 3.2.0 (#2552)
add 766cf2cd51 TIKA-4634 -- refactor metadata write filter/limiter (#2554)
add bd1513677e TIKA-4635 -- refactor DigesterFactory to be standalone
(#2555)
add 48ca355225 TIKA-4633 centralize limits (#2556)
add 222e0859e3 TIKA-4638 -- unify sax style configuration (#2557)
add 589d1c25b1 TIKA-4636-simplify-embedded-extractor-handling (#2558)
add f73ea2f16a TIKA-4639 (#2559)
add 00954ff3bc TIKA-4640 -- use ephemeral port for unit tests in
tika-server (#2560)
add cd2654772b TIKA-4641 (#2562)
add dcb1ca031e tika-server-simplify-tests (#2563)
add 8e6949312e TIKA-4642 - improve tls configuration and documentation
(#2564)
add 86857ce8af TIKA-4644 - improve config endpoints (#2566)
add acca2fec6c TIKA-4637 (#2565) add UNPACK option for tika-pipes and
integrate it in tika-app and tika-server
add 778acde282 TIKA-4643 -- add frictionless
add ec02aebb37 TIKA-4641 -- step 2: refactor serialization, further. add
docs (#2567)
add a399aa03af Bump com.azure:azure-sdk-bom from 1.3.3 to 1.3.4 (#2576)
add 26fc000596 Bump commons-codec:commons-codec from 1.20.0 to 1.21.0
(#2574)
add b8f4b1a336 Bump software.amazon.awssdk:bom from 2.41.14 to 2.41.19
(#2571)
add 9edac076c8 Bump com.diffplug.spotless:spotless-maven-plugin from 3.2.0
to 3.2.1 (#2570)
add 16d2c76818 Bump org.apache.maven.plugins:maven-compiler-plugin (#2572)
add f91ec64305 Bump sis.version from 1.5 to 1.6 (#2569)
add f6505831dd Bump org.pf4j:pf4j from 3.14.1 to 3.15.0 (#2573)
add d91105104b Bump com.google.cloud:google-cloud-storage from 2.62.0 to
2.62.1 (#2568)
add 46b002d97e TIKA-4645 - part 2, general updates for alpha release --
update docs
add 1c06d308a9 TIKA-4645-usability-scripts and bug fixes (#2577)
add bef2d336b1 TIKA-4646 -- extract hyperlinks from instrText and other
areas in ooxml(#2578)
add d8ee89b143 TIKA-4647 - use an argfile to launch PipesServer (#2579)
add 2b9dc0e8b7 TIKA-4617 - really, I mean it, don't change the file name
(#2581)
add 59af7f3497 TIKA-4648 -- add standard mvn repo and general ASF repo
items (#2580)
add 9f7e4dc04a TIKA-4630 -- use embedded stored filename as the
"resourcename" in gz (#2582)
add 7d7cdb5ecf TIKA-4651 -- refactor cli to us pipes for parser (#2586)
add 570a3b44aa TIKA-4650-refactor-zip-parser (#2584)
add 1cafe16333 TIKA-4327: update grpc, bind, animal-sniffer-annotations
add e327bf61da TIKA-4327: update aws
add d0c5cf5e87 TIKA-4652 -- add a yolo option to tika-pipes to restore
legacy crashability but with the safety of pipes (#2587)
add 272f779b00 Bump org.ops4j.pax.url:pax-url-aether from 3.0.1 to 3.0.2
(#2597)
add 726d0f4b65 Bump software.amazon.awssdk:bom from 2.41.23 to 2.41.24
(#2596)
add 75b84fd405 Bump com.github.luben:zstd-jni from 1.5.7-6 to 1.5.7-7
(#2595)
add f4640be3fc Bump org.apache.maven.plugins:maven-dependency-plugin
(#2594)
add 50697c86cd Bump org.codehaus.mojo:animal-sniffer-annotations from 1.26
to 1.27 (#2593)
add 84cb28865b Bump com.google.errorprone:error_prone_annotations from
2.46.0 to 2.47.0 (#2592)
add e30ce0c56f Bump org.jetbrains.kotlin:kotlin-stdlib from 2.3.0 to
2.3.10 (#2590)
add 897b6d52fe Bump com.microsoft.graph:microsoft-graph from 6.60.0 to
6.61.0 (#2589)
add 9360e1b890 Bump io.netty:netty-bom from 4.2.9.Final to 4.2.10.Final
(#2588)
add 212a4679bc TIKA-4653-markdown-handler (#2598)
add 02dbed720b Merge branch 'main' of github.com:apache/tika into
TIKA-4606-ignite-3x-upgrade
No new revisions were added by this update.
Summary of changes:
.editorconfig | 52 +
.github/pull_request_template.md | 2 +-
.github/workflows/main-jdk17-build.yml | 6 +-
.../main-jdk17-windows-build-multi-locale.yml | 4 +
.github/workflows/main-jdk17-windows-build.yml | 4 +
.github/workflows/main-jdk21-build.yml | 2 +
.github/workflows/main-jdk25-build.yml | 2 +
.gitignore | 1 -
.java-version | 18 +
.mvn/wrapper/maven-wrapper.properties | 20 +
CONTRIBUTING.md | 50 +
README.md | 146 ++-
SECURITY.md | 62 ++
docs/antora-playbook.yml | 66 ++
docs/antora.yml | 24 +
docs/assets/logos/asf-tika-logos.zip | Bin 0 -> 446228 bytes
.../ROOT/examples/migration-full-example.json | 1 +
docs/modules/ROOT/examples/pdf-parser-basic.json | 1 +
docs/modules/ROOT/examples/pdf-parser-full.json | 1 +
docs/modules/ROOT/examples/tesseract-basic.json | 1 +
docs/modules/ROOT/examples/tesseract-full.json | 1 +
docs/modules/ROOT/nav.adoc | 46 +
.../ROOT/pages/advanced/embedded-documents.adoc | 252 +++++
docs/modules/ROOT/pages/advanced/index.adoc | 33 +
.../advanced/integration-testing/tika-app.adoc | 398 ++++++++
.../advanced/integration-testing/tika-server.adoc | 473 +++++++++
docs/modules/ROOT/pages/advanced/robustness.adoc | 137 +++
.../ROOT/pages/advanced/setting-limits.adoc | 468 +++++++++
docs/modules/ROOT/pages/advanced/spooling.adoc | 226 +++++
.../modules/ROOT/pages/advanced/zip-detection.adoc | 102 ++
.../ROOT/pages/configuration/digesters.adoc | 184 ++++
docs/modules/ROOT/pages/configuration/index.adoc | 44 +
.../pages/configuration/parsers/pdf-parser.adoc | 43 +
.../parsers/tesseract-ocr-parser.adoc | 67 ++
docs/modules/ROOT/pages/developers/index.adoc | 30 +
.../ROOT/pages/developers/serialization.adoc | 340 +++++++
docs/modules/ROOT/pages/faq.adoc | 28 +
docs/modules/ROOT/pages/index.adoc | 43 +
docs/modules/ROOT/pages/maintainers/index.adoc | 32 +
.../pages/maintainers/release-guides/docker.adoc | 133 +++
.../pages/maintainers/release-guides/grpc.adoc | 32 +
.../pages/maintainers/release-guides/helm.adoc | 138 +++
.../pages/maintainers/release-guides/index.adoc | 32 +
.../pages/maintainers/release-guides/tika.adoc | 271 ++++++
docs/modules/ROOT/pages/maintainers/site.adoc | 172 ++++
.../pages/migration-to-4x/design-notes-4x.adoc | 127 +++
docs/modules/ROOT/pages/migration-to-4x/index.adoc | 50 +
.../pages/migration-to-4x/metadata-changes-4x.adoc | 121 +++
.../migration-to-4x/migrating-tika-server-4x.adoc | 171 ++++
.../pages/migration-to-4x/migrating-to-4x.adoc | 154 +++
.../pages/migration-to-4x/serialization-4x.adoc | 101 ++
docs/modules/ROOT/pages/pipes/index.adoc | 43 +
.../ROOT/pages/pipes/shared-server-mode.adoc | 119 +++
docs/modules/ROOT/pages/pipes/unpack-config.adoc | 273 ++++++
docs/modules/ROOT/pages/roadmap.adoc | 96 ++
docs/modules/ROOT/pages/security.adoc | 81 ++
docs/modules/ROOT/pages/using-tika/cli/index.adoc | 134 +++
docs/modules/ROOT/pages/using-tika/grpc/index.adoc | 32 +
docs/modules/ROOT/pages/using-tika/index.adoc | 71 ++
.../pages/using-tika/java-api/getting-started.adoc | 130 +++
.../ROOT/pages/using-tika/java-api/index.adoc | 179 ++++
.../ROOT/pages/using-tika/server/index.adoc | 88 ++
docs/modules/ROOT/pages/using-tika/server/tls.adoc | 651 +++++++++++++
docs/pom.xml | 93 ++
docs/src/assembly/docs.xml | 37 +
docs/supplemental-ui/css/search.css | 82 ++
docs/supplemental-ui/img/ASF_Tika-colour.png | Bin 0 -> 30720 bytes
docs/supplemental-ui/img/ASF_Tika-colour.svg | 109 +++
docs/supplemental-ui/js/search.js | 119 +++
docs/supplemental-ui/partials/footer-content.hbs | 3 +
docs/supplemental-ui/partials/footer-scripts.hbs | 1 +
docs/supplemental-ui/partials/head-scripts.hbs | 1 +
docs/supplemental-ui/partials/header-content.hbs | 29 +
mvnw | 295 ++++++
mvnw.cmd | 189 ++++
pom.xml | 20 +-
.../tika/annotation/TikaComponentProcessor.java | 85 +-
.../java/org/apache/tika/config/TikaComponent.java | 29 +-
tika-app/pom.xml | 12 +-
.../main/java/org/apache/tika/cli/AsyncHelper.java | 40 +
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 350 ++++---
.../src/main/java/org/apache/tika/gui/TikaGUI.java | 9 +-
.../java/org/apache/tika/cli/AsyncHelperTest.java | 43 +
.../test/java/org/apache/tika/cli/TikaCLITest.java | 233 +++--
.../tika/cli/XmlToJsonConfigConverterTest.java | 11 -
.../test/resources/configs/config-template.json | 25 +-
tika-bundles/pom.xml | 6 +-
tika-bundles/tika-bundle-standard/pom.xml | 11 +-
tika-core/pom.xml | 16 +-
tika-core/src/main/java/org/apache/tika/Tika.java | 67 +-
.../org/apache/tika/config/EmbeddedLimits.java | 222 +++++
.../java/org/apache/tika/config/OutputLimits.java | 269 ++++++
.../java/org/apache/tika/config/TimeoutLimits.java | 135 +++
.../org/apache/tika/detect/AutoDetectReader.java | 10 +-
.../org/apache/tika/detect/DefaultDetector.java | 101 +-
.../java/org/apache/tika/detect/DetectHelper.java | 121 +++
.../java/org/apache/tika/digest/DigestDef.java | 2 +-
.../java/org/apache/tika/digest/DigestHelper.java | 39 +-
.../main/java/org/apache/tika/digest/Digester.java | 11 +-
.../org/apache/tika/digest/DigesterFactory.java | 32 +-
.../apache/tika/digest/InputStreamDigester.java | 109 +--
.../exception/EmbeddedLimitReachedException.java | 62 ++
.../tika/extractor/BasicEmbeddedBytesSelector.java | 77 --
.../EmbeddedDocumentByteStoreExtractorFactory.java | 7 +-
.../tika/extractor/EmbeddedDocumentExtractor.java | 9 +
.../tika/extractor/ParserContainerExtractor.java | 4 +-
.../ParsingEmbeddedDocumentExtractor.java | 89 +-
.../tika/extractor/RUnpackExtractorFactory.java | 121 ---
...rFactory.java => StandardExtractorFactory.java} | 18 +-
...ocumentBytesHandler.java => UnpackHandler.java} | 2 +-
...eddedBytesSelector.java => UnpackSelector.java} | 6 +-
.../java/org/apache/tika/io/ByteArraySource.java | 143 +++
.../org/apache/tika/io/CachingInputStream.java | 215 +++++
.../java/org/apache/tika/io/CachingSource.java | 292 ++++++
.../main/java/org/apache/tika/io/FileSource.java | 147 +++
.../java/org/apache/tika/io/FilenameUtils.java | 16 +-
.../src/main/java/org/apache/tika/io/IOUtils.java | 48 -
.../org/apache/tika/io/InputStreamFactory.java | 34 -
.../java/org/apache/tika/io/SpoolingStrategy.java | 140 +++
.../main/java/org/apache/tika/io/StreamCache.java | 282 ++++++
.../java/org/apache/tika/io/TikaInputSource.java | 68 ++
.../java/org/apache/tika/io/TikaInputStream.java | 905 +++++-------------
.../java/org/apache/tika/metadata/Metadata.java | 68 +-
.../main/java/org/apache/tika/metadata/Office.java | 41 +
.../main/java/org/apache/tika/metadata/PST.java | 1 -
.../apache/tika/metadata/TikaCoreProperties.java | 39 +-
.../main/java/org/apache/tika/metadata/Zip.java | 130 +++
...aWriteFilter.java => MetadataWriteLimiter.java} | 21 +-
...ctory.java => MetadataWriteLimiterFactory.java} | 19 +-
...iteFilter.java => StandardMetadataLimiter.java} | 94 +-
.../StandardMetadataLimiterFactory.java | 152 +++
.../writefilter/StandardWriteFilterFactory.java | 127 ---
.../org/apache/tika/parser/AutoDetectParser.java | 89 +-
.../apache/tika/parser/AutoDetectParserConfig.java | 209 +---
.../org/apache/tika/parser/CompositeParser.java | 8 +-
.../java/org/apache/tika/parser/EmptyParser.java | 2 +-
.../java/org/apache/tika/parser/ParseContext.java | 54 ++
.../java/org/apache/tika/parser/ParseRecord.java | 144 +++
.../{StatefulParser.java => ParsingIntent.java} | 29 +-
.../java/org/apache/tika/parser/ParsingReader.java | 21 +-
.../org/apache/tika/parser/PasswordProvider.java | 4 +-
.../apache/tika/parser/RecursiveParserWrapper.java | 10 +-
.../apache/tika/parser/SimplePasswordProvider.java | 2 +
.../tika/parser/external/ExternalParser.java | 6 +-
.../tika/parser/external2/ExternalParser.java | 2 +-
.../parser/multiple/AbstractMultipleParser.java | 13 +-
.../sax/AbstractRecursiveParserWrapperHandler.java | 36 +-
.../tika/sax/BasicContentHandlerFactory.java | 112 ++-
.../org/apache/tika/sax/ContentHandlerFactory.java | 22 +-
.../tika/sax/RecursiveParserWrapperHandler.java | 20 +-
.../java/org/apache/tika/sax/SAXOutputConfig.java | 79 ++
.../org/apache/tika/sax/SecureContentHandler.java | 24 +
...ry.java => StreamingContentHandlerFactory.java} | 25 +-
.../apache/tika/sax/ToMarkdownContentHandler.java | 542 +++++++++++
.../apache/tika/sax/WriteOutContentHandler.java | 18 +
.../org/apache/tika/sax/XHTMLContentHandler.java | 87 +-
.../java/org/apache/tika/utils/ParserUtils.java | 43 -
.../apache/tika/utils/RereadableInputStream.java | 308 ------
.../org/apache/tika/mime/tika-mimetypes.xml | 20 +-
.../org/apache/tika/MultiThreadedTikaTest.java | 3 +-
.../org/apache/tika/TestRereadableInputStream.java | 204 ----
.../src/test/java/org/apache/tika/TikaTest.java | 8 +-
.../org/apache/tika/io/TikaInputStreamTest.java | 898 +++++++++++++++++-
.../org/apache/tika/parser/mock/MockParser.java | 4 +
.../tika/sax/BasicContentHandlerFactoryTest.java | 36 +-
.../apache/tika/sax/BodyContentHandlerTest.java | 3 +-
.../apache/tika/sax/SecureContentHandlerTest.java | 8 +
.../tika/sax/ToMarkdownContentHandlerTest.java | 941 ++++++++++++++++++
.../apache/tika/sax/XHTMLContentHandlerTest.java | 123 +++
tika-detectors/tika-detector-magika/pom.xml | 6 +-
tika-detectors/tika-detector-siegfried/pom.xml | 6 +-
tika-e2e-tests/README.md | 6 +-
tika-e2e-tests/pom.xml | 2 +-
tika-e2e-tests/tika-grpc/README.md | 20 +-
tika-e2e-tests/tika-grpc/pom.xml | 2 +-
tika-eval/pom.xml | 6 +-
tika-eval/tika-eval-app/pom.xml | 12 +-
.../main/resources/pipes-iterator-template.json | 18 +-
tika-eval/tika-eval-core/pom.xml | 12 +-
.../org/apache/tika/example/ParsingExample.java | 2 +-
.../tika/example/PickBestTextEncodingParser.java | 10 +-
.../tika/example/PipesForkParserExample.java | 6 +-
.../org/apache/tika/example/RollbackSoftware.java | 2 +-
tika-grpc/README.md | 33 +-
tika-grpc/pom.xml | 9 +-
.../apache/tika/pipes/grpc/TikaGrpcServerImpl.java | 60 +-
tika-handlers/pom.xml | 6 +-
.../tika-pipes-kafka-integration-tests/pom.xml | 10 +-
.../tika/pipes/kafka/tests/TikaPipesKafkaTest.java | 4 +-
.../src/test/resources/kafka/plugins-template.json | 25 +-
.../pom.xml | 8 +-
.../pipes/opensearch/tests/OpenSearchTest.java | 18 +-
.../resources/opensearch/plugins-template.json | 28 +-
.../opensearch/tika-config-opensearch.json | 25 +-
.../tika-pipes-s3-integration-tests/pom.xml | 10 +-
.../tika/pipes/s3/tests/S3PipeIntegrationTest.java | 3 +-
.../src/test/resources/s3/plugins-template.json | 25 +-
.../src/test/resources/s3/tika-config-s3.json | 5 +-
.../tika-pipes-solr-integration-tests/pom.xml | 8 +-
.../pipes/solr/tests/TikaPipesSolrTestBase.java | 8 +-
.../src/test/resources/solr/plugins-template.json | 25 +-
.../org/apache/custom/parser/MyCustomParser.java | 2 +-
tika-java7/pom.xml | 8 +-
tika-langdetect/pom.xml | 16 +-
tika-langdetect/tika-langdetect-opennlp/pom.xml | 6 +-
tika-langdetect/tika-langdetect-tika/pom.xml | 6 +-
tika-parent/pom.xml | 102 +-
tika-parsers/pom.xml | 32 +-
tika-parsers/tika-parsers-extended/pom.xml | 12 +-
.../tika-parser-scientific-module/pom.xml | 8 +-
.../apache/tika/parser/envi/EnviHeaderParser.java | 2 +-
.../org/apache/tika/parser/gdal/GDALParser.java | 8 +-
.../geoinfo/GeographicInformationParser.java | 2 +-
.../org/apache/tika/parser/grib/GribParser.java | 2 +-
.../java/org/apache/tika/parser/hdf/HDFParser.java | 2 +-
.../apache/tika/parser/isatab/ISArchiveParser.java | 7 +-
.../apache/tika/parser/netcdf/NetCDFParser.java | 2 +-
.../tika/parser/isatab/ISArchiveParserTest.java | 2 +
.../tika-parser-scientific-package/pom.xml | 6 +-
.../src/test/resources/2.4.1-no-tesseract.txt | 6 +-
.../src/test/resources/2.4.1-tesseract.txt | 6 +-
.../ocr/configs/tika-config-restricted-gdal.json | 5 -
tika-parsers/tika-parsers-ml/pom.xml | 14 +-
.../tika-parsers-ml/tika-parser-nlp-module/pom.xml | 8 +-
.../apache/tika/parser/journal/TEIDOMParser.java | 2 +-
.../apache/tika/parser/ner/NamedEntityParser.java | 2 +-
.../tika-parsers-ml/tika-transcribe-aws/pom.xml | 8 +-
.../parser/transcribe/aws/AmazonTranscribe.java | 2 +-
tika-parsers/tika-parsers-standard/pom.xml | 24 +-
.../tika-parsers-standard-modules/pom.xml | 26 +-
.../tika/parser/apple/AppleSingleFileParser.java | 20 +-
.../org/apache/tika/parser/apple/PListParser.java | 19 +-
.../tika/parser/iwork/IWorkPackageParser.java | 2 +-
.../tika/parser/iwork/PagesContentHandler.java | 2 +-
.../parser/iwork/iwana/IWork13PackageParser.java | 8 +-
.../org/apache/tika/parser/audio/AudioParser.java | 2 +-
.../org/apache/tika/parser/audio/MidiParser.java | 2 +-
.../java/org/apache/tika/parser/mp3/Mp3Parser.java | 2 +-
.../java/org/apache/tika/parser/mp4/MP4Parser.java | 2 +-
.../org/apache/tika/parser/ogg/FlacParser.java | 2 +-
.../java/org/apache/tika/parser/ogg/OggParser.java | 2 +-
.../org/apache/tika/parser/ogg/OpusParser.java | 2 +-
.../org/apache/tika/parser/ogg/SpeexParser.java | 2 +-
.../org/apache/tika/parser/ogg/TheoraParser.java | 2 +-
.../org/apache/tika/parser/ogg/VorbisParser.java | 2 +-
.../org/apache/tika/parser/video/FLVParser.java | 2 +-
.../org/apache/tika/parser/dgn/DGN8Parser.java | 2 +-
.../java/org/apache/tika/parser/dwg/DWGParser.java | 2 +-
.../org/apache/tika/parser/dwg/DWGReadParser.java | 2 +-
.../java/org/apache/tika/parser/prt/PRTParser.java | 2 +-
.../tika-parser-code-module/pom.xml | 6 +-
.../org/apache/tika/parser/asm/ClassParser.java | 2 +-
.../apache/tika/parser/asm/XHTMLClassVisitor.java | 5 +-
.../apache/tika/parser/code/SourceCodeParser.java | 2 +-
.../tika/parser/executable/ExecutableParser.java | 2 +-
.../executable/UniversalExecutableParser.java | 4 +-
.../java/org/apache/tika/parser/mat/MatParser.java | 2 +-
.../org/apache/tika/parser/sas/SAS7BDATParser.java | 2 +-
.../org/apache/tika/parser/crypto/Pkcs7Parser.java | 2 +-
.../org/apache/tika/parser/crypto/TSDParser.java | 71 +-
.../parser/digestutils/BouncyCastleDigester.java | 35 +-
.../digestutils/BouncyCastleDigesterFactory.java | 33 +-
.../tika/parser/digestutils/CommonsDigester.java | 33 +-
.../parser/digestutils/CommonsDigesterFactory.java | 33 +-
.../tika-parser-font-module/pom.xml | 6 +-
.../tika/parser/font/AdobeFontMetricParser.java | 2 +-
.../apache/tika/parser/font/TrueTypeParser.java | 2 +-
.../tika-parser-html-module/pom.xml | 6 +-
.../org/apache/tika/parser/html/HtmlHandler.java | 11 +-
.../html/StandardHtmlEncodingDetectorTest.java | 2 +-
.../tika-parser-image-module/pom.xml | 6 +-
.../tika/parser/image/AbstractImageParser.java | 4 +-
.../org/apache/tika/parser/image/ICNSParser.java | 2 +-
.../org/apache/tika/parser/image/PSDParser.java | 2 +-
.../org/apache/tika/parser/image/WebPParser.java | 2 +-
.../apache/tika/parser/jdbc/AbstractDBParser.java | 2 +-
.../apache/tika/parser/jdbc/JDBCTableReader.java | 4 +-
.../tika-parser-mail-module/pom.xml | 6 +-
.../tika/parser/mail/MailContentHandler.java | 4 +-
.../org/apache/tika/parser/mail/RFC822Parser.java | 2 +-
.../org/apache/tika/parser/mbox/MboxParser.java | 4 +-
.../tika-parser-microsoft-module/pom.xml | 6 +-
.../detect/microsoft/POIFSContainerDetector.java | 51 +-
.../detect/microsoft/ooxml/OPCPackageDetector.java | 7 +-
.../microsoft/MSEmbeddedStreamTranslator.java | 1 -
.../parser/microsoft/AbstractPOIFSExtractor.java | 15 +-
.../apache/tika/parser/microsoft/EMFParser.java | 6 +-
.../tika/parser/microsoft/HSLFExtractor.java | 6 +-
.../tika/parser/microsoft/JackcessExtractor.java | 2 +-
.../tika/parser/microsoft/JackcessParser.java | 2 +-
.../tika/parser/microsoft/MSOwnerFileParser.java | 2 +-
.../apache/tika/parser/microsoft/OfficeParser.java | 18 +-
.../tika/parser/microsoft/OldExcelParser.java | 2 +-
.../tika/parser/microsoft/OutlookExtractor.java | 12 +-
.../apache/tika/parser/microsoft/TNEFParser.java | 10 +-
.../apache/tika/parser/microsoft/WMFParser.java | 2 +-
.../microsoft/activemime/ActiveMimeParser.java | 4 +-
.../tika/parser/microsoft/chm/ChmParser.java | 4 +-
.../tika/parser/microsoft/libpst/EmailVisitor.java | 15 +-
.../tika/parser/microsoft/libpst/LibPstParser.java | 2 +-
.../parser/microsoft/onenote/OneNoteParser.java | 2 +-
.../microsoft/onenote/OneNoteTreeWalker.java | 7 +-
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 53 +-
.../microsoft/ooxml/FieldHyperlinkTracker.java | 168 ++++
.../microsoft/ooxml/OOXMLExtractorFactory.java | 25 +-
.../microsoft/ooxml/OOXMLTikaBodyPartHandler.java | 25 +
.../ooxml/OOXMLWordAndPowerPointTextHandler.java | 187 +++-
.../ooxml/SXWPFWordExtractorDecorator.java | 179 +++-
.../ooxml/XSSFExcelExtractorDecorator.java | 390 ++++++++
.../ooxml/XWPFWordExtractorDecorator.java | 100 +-
.../microsoft/ooxml/xps/XPSExtractorDecorator.java | 2 +-
.../microsoft/ooxml/xps/XPSPageContentHandler.java | 8 +-
.../xslf/XSLFEventBasedPowerPointExtractor.java | 5 +
.../ooxml/xwpf/XWPFEventBasedWordExtractor.java | 5 +
.../ooxml/xwpf/ml2006/BinaryDataHandler.java | 2 +-
.../ooxml/xwpf/ml2006/Word2006MLParser.java | 2 +-
.../parser/microsoft/pst/OutlookPSTParser.java | 20 +-
.../parser/microsoft/pst/PSTMailItemParser.java | 9 +-
.../parser/microsoft/rtf/RTFEmbObjHandler.java | 45 +-
.../tika/parser/microsoft/rtf/RTFParser.java | 2 +-
.../microsoft/xml/AbstractXML2003Parser.java | 2 +-
.../tika/parser/microsoft/xml/WordMLParser.java | 2 +-
.../microsoft/POIFSContainerDetectorTest.java | 51 +-
.../tika/parser/microsoft/ExcelParserTest.java | 43 +
.../parser/microsoft/libpst/TestLibPstParser.java | 11 +-
.../ooxml/OOXMLContainerExtractionTest.java | 2 +-
.../parser/microsoft/ooxml/OOXMLParserTest.java | 43 +
.../parser/microsoft/ooxml/SXWPFExtractorTest.java | 109 +++
.../parser/microsoft/pst/OutlookPSTParserTest.java | 7 +-
.../test-documents/testAttachedTemplate.docx | Bin 0 -> 2284 bytes
.../test-documents/testDataConnections.xlsx | Bin 0 -> 2967 bytes
.../test/resources/test-documents/testDdeLink.xlsx | Bin 0 -> 3030 bytes
.../resources/test-documents/testExternalRefs.docx | Bin 0 -> 2125 bytes
.../resources/test-documents/testFrameset.docx | Bin 0 -> 2328 bytes
.../resources/test-documents/testHoverAndVml.docx | Bin 0 -> 2270 bytes
.../resources/test-documents/testInstrLink.docx | Bin 0 -> 14464 bytes
.../resources/test-documents/testMailMerge.docx | Bin 0 -> 2306 bytes
.../resources/test-documents/testSubdocument.docx | Bin 0 -> 1980 bytes
.../tika-parser-miscoffice-module/pom.xml | 6 +-
.../apache/tika/detect/ole/MiscOLEDetector.java | 25 +-
.../java/org/apache/tika/parser/dbf/DBFParser.java | 2 +-
.../java/org/apache/tika/parser/dif/DIFParser.java | 2 +-
.../org/apache/tika/parser/epub/EpubParser.java | 8 +-
.../apache/tika/parser/hwp/HwpStreamReader.java | 8 +-
.../org/apache/tika/parser/hwp/HwpV5Parser.java | 2 +-
.../apache/tika/parser/indesign/IDMLParser.java | 8 +-
.../java/org/apache/tika/parser/mif/MIFParser.java | 2 +-
.../parser/odf/FlatOpenDocumentMacroHandler.java | 7 +-
.../tika/parser/odf/FlatOpenDocumentParser.java | 2 +-
.../tika/parser/odf/OpenDocumentBodyHandler.java | 7 +-
.../tika/parser/odf/OpenDocumentContentParser.java | 2 +-
.../tika/parser/odf/OpenDocumentMacroHandler.java | 3 +-
.../apache/tika/parser/odf/OpenDocumentParser.java | 26 +-
.../tika/parser/wordperfect/QuattroProParser.java | 2 +-
.../tika/parser/wordperfect/WordPerfectParser.java | 2 +-
.../tika/parser/wordperfect/WPInputStreamTest.java | 3 +-
.../tika-parser-news-module/pom.xml | 6 +-
.../org/apache/tika/parser/feed/FeedParser.java | 2 +-
.../apache/tika/parser/iptc/IptcAnpaParser.java | 5 +-
.../apache/tika/parser/ocr/TesseractOCRParser.java | 2 +-
.../tika-parser-pdf-module/pom.xml | 6 +-
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 14 +-
.../java/org/apache/tika/parser/pdf/PDF2XHTML.java | 2 +-
.../java/org/apache/tika/parser/pdf/PDFParser.java | 4 +-
.../tika/parser/pdf/image/ImageGraphicsEngine.java | 6 +-
.../tika/renderer/pdf/mutool/MuPDFRenderer.java | 2 +-
.../tika/renderer/pdf/pdfbox/PDFBoxRenderer.java | 2 +-
.../org/apache/tika/parser/pdf/PDFParserTest.java | 11 +-
.../tika-parser-pkg-module/pom.xml | 6 +-
.../tika/parser/pkg/AbstractArchiveParser.java | 84 ++
.../apache/tika/parser/pkg/CompressorParser.java | 56 +-
.../org/apache/tika/parser/pkg/PackageParser.java | 463 +--------
.../java/org/apache/tika/parser/pkg/RarParser.java | 8 +-
.../org/apache/tika/parser/pkg/SevenZParser.java | 166 ++++
.../org/apache/tika/parser/pkg/UnrarParser.java | 4 +-
.../java/org/apache/tika/parser/pkg/ZipParser.java | 698 ++++++++++++++
.../apache/tika/parser/pkg/ZipParserConfig.java | 105 ++
.../org/apache/tika/parser/pkg/GzipParserTest.java | 39 +-
.../apache/tika/parser/pkg/PackageParserTest.java | 45 +-
.../org/apache/tika/parser/pkg/ZipBenchmark.java | 144 +++
.../org/apache/tika/parser/pkg/ZipParserTest.java | 457 +++++++--
.../org/apache/tika/parser/pkg/tika-config.xml | 31 -
.../src/test/resources/test-documents/bob.gz | Bin 0 -> 41 bytes
.../tika-parser-text-module/pom.xml | 8 +-
.../apache/tika/parser/csv/TextAndCSVParser.java | 8 +-
.../tika/parser/strings/Latin1StringsParser.java | 2 +-
.../apache/tika/parser/strings/StringsParser.java | 2 +-
.../java/org/apache/tika/parser/txt/TXTParser.java | 2 +-
.../tika-parser-webarchive-module/pom.xml | 6 +-
.../org/apache/tika/parser/http/HttpParser.java | 4 +-
.../org/apache/tika/parser/wacz/WACZParser.java | 21 +-
.../org/apache/tika/parser/warc/WARCParser.java | 4 +-
.../apache/tika/parser/warc/WARCParserTest.java | 1 +
.../tika-parser-xml-module/pom.xml | 6 +-
.../java/org/apache/tika/parser/tmx/TMXParser.java | 2 +-
.../apache/tika/parser/xliff/XLIFF12Parser.java | 2 +-
.../org/apache/tika/parser/xliff/XLZParser.java | 2 +-
.../apache/tika/parser/xml/FictionBookParser.java | 2 +-
.../java/org/apache/tika/parser/xml/XMLParser.java | 2 +-
.../tika-parser-xmp-commons/pom.xml | 6 +-
.../tika/parser/xmp/XMPMetadataExtractor.java | 118 ++-
...ExtractorTest.java => XmpboxExtractorTest.java} | 122 +--
.../detect/zip/DefaultZipContainerDetector.java | 209 ++--
.../detect/zip/StreamingZipContainerDetector.java | 83 +-
.../org/apache/tika/zip/utils/ZipSalvager.java | 211 ++++-
.../apache/tika/detect/zip/ZipDetectionTest.java | 65 +-
.../tika-parsers-standard-package/pom.xml | 6 +-
.../config/ComponentRegistryIntegrationTest.java | 12 +-
.../org/apache/tika/config/ConfigExamplesTest.java | 97 ++
.../tika/config/TikaEncodingDetectorTest.java | 6 +-
.../tika/detect/TestContainerAwareDetector.java | 159 +++-
.../apache/tika/detect/TestDetectorLoading.java | 3 +-
.../java/org/apache/tika/mime/TestMimeTypes.java | 9 +
.../tika/parser/AutoDetectParserConfigTest.java | 42 +-
.../apache/tika/parser/AutoDetectParserTest.java | 30 +-
.../tika/parser/AutoDetectReaderParserTest.java | 2 +-
.../tika/parser/RecursiveParserWrapperTest.java | 157 ++-
.../apache/tika/parser/crypto/TSDParserTest.java | 3 +-
.../tika/parser/digest/DigestConfigTest.java | 58 +-
.../digest/SkipContainerDocumentDigestTest.java | 93 +-
.../apache/tika/parser/image/JpegParserTest.java | 2 +-
.../parser/microsoft/ooxml/OOXMLParserTest.java | 7 +-
.../parser/microsoft/ooxml/TruncatedOOXMLTest.java | 5 +-
.../tika/parser/microsoft/rtf/RTFParserTest.java | 5 +-
.../org/apache/tika/parser/pdf/PDFParserTest.java | 2 +-
.../org/apache/tika/parser/pkg/ZipParserTest.java | 20 +-
.../config-examples/migration-full-example.json | 26 +
.../config-examples/pdf-parser-basic.json} | 1 +
.../resources/config-examples/pdf-parser-full.json | 53 ++
.../config-examples/tesseract-basic.json} | 4 +-
.../resources/config-examples/tesseract-full.json | 35 +
.../src/test/resources/configs/tika-4441-120.json | 29 -
.../test/resources/configs/tika-4441-12000000.json | 29 -
.../src/test/resources/configs/tika-4441-neg1.json | 29 -
.../src/test/resources/configs/tika-4533.json | 12 +-
.../configs/tika-config-bc-digests-base32.json | 13 +-
.../configs/tika-config-bc-digests-basic.json | 13 +-
.../configs/tika-config-bc-digests-multiple.json | 13 +-
.../configs/tika-config-commons-digests-basic.json | 13 +-
.../configs/tika-config-digests-pdf-only.json | 13 +-
.../tika-config-digests-skip-container.json | 19 +-
.../resources/configs/tika-config-digests.json | 21 +-
...a-config-doubling-custom-handler-decorator.json | 2 -
.../resources/configs/tika-config-md5-digest.json | 7 +-
.../resources/configs/tika-config-no-names.json | 9 +-
...a-config-upcasing-custom-handler-decorator.json | 45 +-
.../resources/configs/tika-config-with-names.json | 8 +-
.../configs/tika-config-write-filter.json | 36 +-
.../apache/tika/parser/ocr/tesseract-config.json | 5 -
.../test-documents/testMP3_id3_false_aac.mp3 | Bin 0 -> 1024 bytes
tika-pipes/pom.xml | 13 +-
.../org/apache/tika/async/cli/PluginsWriter.java | 52 +-
.../apache/tika/async/cli/SimpleAsyncConfig.java | 52 +-
.../org/apache/tika/async/cli/TikaAsyncCLI.java | 144 ++-
.../src/main/resources/config-template.json | 26 +-
.../apache/tika/async/cli/AsyncCliParserTest.java | 90 ++
.../apache/tika/async/cli/AsyncProcessorTest.java | 35 +-
.../tika/async/cli/TikaConfigAsyncWriterTest.java | 2 +-
.../org/apache/tika/pipes/api/HandlerConfig.java | 149 ---
.../java/org/apache/tika/pipes/api/ParseMode.java | 94 ++
.../tika/pipes/api/emitter/AbstractEmitter.java | 7 +-
.../pipes/api/emitter/AbstractStreamEmitter.java | 7 +-
.../apache/tika/pipes/api/emitter/EmitData.java | 6 +-
.../api/pipesiterator/PipesIteratorBaseConfig.java | 37 -
.../api/pipesiterator/PipesIteratorConfig.java | 21 -
.../main/resources/META-INF/tika/other-configs.idx | 20 -
tika-pipes/tika-pipes-config-store-ignite/pom.xml | 9 +
tika-pipes/tika-pipes-core/pom.xml | 12 +-
.../apache/tika/pipes/core/EmitStrategyConfig.java | 24 +-
.../tika/pipes/core/EmitStrategyOverride.java | 82 --
.../tika/pipes/core/PerClientServerManager.java | 376 ++++++++
.../org/apache/tika/pipes/core/PipesClient.java | 907 ++++--------------
.../org/apache/tika/pipes/core/PipesConfig.java | 123 ++-
.../org/apache/tika/pipes/core/PipesParser.java | 74 +-
.../org/apache/tika/pipes/core/ServerManager.java | 152 +++
.../tika/pipes/core/SharedServerManager.java | 478 ++++++++++
.../tika/pipes/core/async/AsyncProcessor.java | 53 +-
.../tika/pipes/core/config/ConfigMerger.java | 255 +++++
.../tika/pipes/core/config/ConfigOverrides.java | 285 ++++++
.../tika/pipes/core/emitter/EmitDataImpl.java | 32 +-
...ytesHandler.java => AbstractUnpackHandler.java} | 53 +-
.../BasicEmbeddedDocumentBytesHandler.java | 57 --
.../extractor/EmbeddedDocumentBytesConfig.java | 204 ----
...ytesHandler.java => EmittingUnpackHandler.java} | 21 +-
.../core/extractor/FrictionlessUnpackHandler.java | 337 +++++++
.../core/extractor/StandardUnpackSelector.java | 143 +++
.../core/extractor/TempFileUnpackHandler.java | 159 ++++
.../tika/pipes/core/extractor/UnpackConfig.java | 350 +++++++
.../tika/pipes/core/extractor/UnpackExtractor.java | 67 +-
.../core/extractor/UnpackExtractorFactory.java | 20 +-
.../core/extractor/frictionless/DataPackage.java | 226 +++++
.../frictionless/FrictionlessResource.java | 83 ++
.../core/serialization/EmitDataDeserializer.java | 75 ++
.../core/serialization/EmitDataSerializer.java | 45 +
.../core/serialization/JsonFetchEmitTuple.java | 23 +-
.../pipes/core/serialization/JsonPipesIpc.java | 88 ++
.../serialization/PipesResultDeserializer.java | 65 ++
.../core/serialization/PipesResultSerializer.java | 46 +
.../tika/pipes/core/server/ConnectionHandler.java | 404 ++++++++
.../apache/tika/pipes/core/server/EmitHandler.java | 43 +-
.../tika/pipes/core/server/FetchHandler.java | 5 +-
.../core/server/MetadataListAndEmbeddedBytes.java | 40 +-
.../tika/pipes/core/server/ParseHandler.java | 130 ++-
.../apache/tika/pipes/core/server/PipesServer.java | 719 +++++---------
.../apache/tika/pipes/core/server/PipesWorker.java | 609 ++++++++++--
.../pipes/core/server/SharedServerResources.java | 212 +++++
.../tika/pipes/core/config/ConfigMergerTest.java | 324 +++++++
.../core/extractor/StandardUnpackSelectorTest.java | 51 +-
.../core/serialization/JsonFetchEmitTupleTest.java | 77 +-
.../apache/tika/pipes/fork/PipesForkParser.java | 204 ++--
.../tika/pipes/fork/PipesForkParserConfig.java | 101 +-
.../tika/pipes/fork/PipesForkParserTest.java | 213 ++++-
tika-pipes/tika-pipes-integration-tests/pom.xml | 28 +-
.../pipes/core/DigestingOpenContainersTest.java | 66 ++
.../apache/tika/pipes/core/EmbeddedLimitsTest.java | 304 ++++++
.../tika/pipes/core/FrictionlessUnpackTest.java | 713 ++++++++++++++
.../tika/pipes/core/MetadataWriteLimiterTest.java | 124 +++
.../apache/tika/pipes/core/MockPassbackFilter.java | 39 +-
.../apache/tika/pipes/core/PassbackFilterTest.java | 24 +-
.../apache/tika/pipes/core/PipesClientTest.java | 72 +-
.../apache/tika/pipes/core/PipesServerTest.java | 100 +-
.../pipes/core/SharedServerChaosMonkeyTest.java | 328 +++++++
.../tika/pipes/core/SharedServerModeTest.java | 691 ++++++++++++++
.../org/apache/tika/pipes/core/UnpackModeTest.java | 635 +++++++++++++
.../tika/pipes/core/async/MockDigesterFactory.java | 49 -
.../src/test/resources/configs/tika-4533.json | 17 +
.../test/resources/configs/tika-config-basic.json | 40 +-
.../resources/configs/tika-config-passback.json | 40 +-
...g-basic.json => tika-config-shared-server.json} | 43 +-
.../resources/configs/tika-config-truncate.json | 52 +-
...fig-basic.json => tika-config-uppercasing.json} | 36 +-
.../configs/tika-config-write-limiter.json | 59 ++
.../resources/test-documents/testLargeOLEDoc.doc | Bin
.../pipes/pipesiterator/PipesIteratorBase.java | 10 +
.../pipes/pipesiterator/PipesIteratorConfig.java | 61 ++
tika-pipes/tika-pipes-plugins/pom.xml | 8 +-
.../pipes/iterator/azblob/AZBlobPipesIterator.java | 11 +-
.../iterator/azblob/AZBlobPipesIteratorConfig.java | 24 +-
.../iterator/azblob/TestAZBlobPipesIterator.java | 7 +-
.../tika-pipes-plugins/tika-pipes-csv/pom.xml | 6 +-
.../tika/pipes/iterator/csv/CSVPipesIterator.java | 29 +-
.../pipes/iterator/csv/CSVPipesIteratorConfig.java | 24 +-
.../pipes/iterator/csv/TestCSVPipesIterator.java | 8 +-
.../pipes/iterator/fs/FileSystemPipesIterator.java | 12 +-
.../iterator/fs/FileSystemPipesIteratorConfig.java | 23 +-
.../apache/tika/pipes/fs/ConfigExamplesTest.java | 69 ++
.../config-examples/file-system-emitter.json | 13 +
.../config-examples/file-system-fetcher.json | 11 +
.../config-examples/file-system-pipeline.json | 27 +
.../tika/pipes/iterator/gcs/GCSPipesIterator.java | 13 +-
.../pipes/iterator/gcs/GCSPipesIteratorConfig.java | 24 +-
.../pipes/iterator/gcs/TestGCSPipesIterator.java | 8 +-
.../tika-pipes-google-drive/pom.xml | 10 +-
.../tika/pipes/emitter/jdbc/JDBCEmitter.java | 6 +-
.../pipes/iterator/jdbc/JDBCPipesIterator.java | 33 +-
.../iterator/jdbc/JDBCPipesIteratorConfig.java | 24 +-
.../pipes/iterator/jdbc/TestJDBCPipesIterator.java | 9 +-
.../tika-pipes-plugins/tika-pipes-json/pom.xml | 6 +-
.../json/JsonPipesIteratorConfig.java | 24 +-
.../pipesiterator/json/TestJsonPipesIterator.java | 34 +-
...h-embedded-bytes.json => test-with-unpack.json} | 1002 ++++----------------
.../src/test/resources/test-documents/test.json | 600 ------------
.../pipes/iterator/kafka/KafkaPipesIterator.java | 11 +-
.../iterator/kafka/KafkaPipesIteratorConfig.java | 24 +-
.../iterator/kafka/TestKafkaPipesIterator.java | 7 +-
.../tika-pipes-microsoft-graph/pom.xml | 4 +-
.../tika/pipes/iterator/s3/S3PipesIterator.java | 13 +-
.../pipes/iterator/s3/S3PipesIteratorConfig.java | 24 +-
.../pipes/iterator/s3/TestS3PipesIterator.java | 7 +-
.../pipes/iterator/solr/SolrPipesIterator.java | 12 +-
.../iterator/solr/SolrPipesIteratorConfig.java | 24 +-
tika-plugins-core/pom.xml | 6 +-
tika-serialization/pom.xml | 11 +-
.../apache/tika/config/loader/ComponentInfo.java | 15 +-
.../tika/config/loader/ComponentRegistry.java | 55 +-
.../apache/tika/config/loader/ConfigLoader.java | 47 +-
.../apache/tika/config/loader/FrameworkConfig.java | 11 +-
.../apache/tika/config/loader/TikaJsonConfig.java | 16 +-
.../org/apache/tika/config/loader/TikaLoader.java | 149 ++-
.../config/loader/TikaObjectMapperFactory.java | 25 +-
.../tika/serialization/ComponentNameResolver.java | 22 +
.../apache/tika/serialization/JsonMetadata.java | 8 +-
.../tika/serialization/JsonMetadataList.java | 8 +-
.../tika/serialization/ParseContextUtils.java | 140 ++-
.../org/apache/tika/serialization/TikaModule.java | 79 +-
.../serdes/ParseContextDeserializer.java | 126 ++-
.../serdes/ParseContextSerializer.java | 43 +-
.../java/org/apache/tika/config/AllLimitsTest.java | 155 +++
.../org/apache/tika/config/EmbeddedLimitsTest.java | 110 +++
.../org/apache/tika/config/OutputLimitsTest.java | 120 +++
.../org/apache/tika/config/TimeoutLimitsTest.java | 96 ++
.../tika/config/loader/ConfigLoaderTest.java | 184 ++--
.../tika/config/loader/TikaJsonConfigTest.java | 6 +-
.../apache/tika/config/loader/TikaLoaderTest.java | 99 +-
.../apache/tika/digest/MockDigesterFactory.java | 2 +-
.../tika/metadata/filter/JsonConfigOnlyFilter.java | 66 ++
.../tika/metadata/filter/TestMetadataFilter.java | 19 +
...rTest.java => StandardMetadataLimiterTest.java} | 74 +-
.../tika/sax/UppercasingContentHandlerFactory.java | 15 +-
.../serialization/RoundTripSerializationTest.java | 6 +-
.../apache/tika/serialization/SmileFormatTest.java | 110 +++
.../TestParseContextSerialization.java | 184 +++-
.../test/resources/configs/TIKA-3695-exclude.json | 10 +-
.../test/resources/configs/TIKA-3695-fields.json | 20 +-
.../src/test/resources/configs/TIKA-3695.json | 14 +-
.../configs/TIKA-4207-embedded-bytes-config.json | 16 -
.../configs/TIKA-4582-json-config-only.json | 9 +
.../test/resources/configs/all-limits-test.json | 30 +
.../resources/configs/embedded-limits-test.json | 10 +
.../test/resources/configs/output-limits-test.json | 12 +
.../test/resources/configs/test-config-loader.json | 8 +-
.../resources/configs/test-interface-no-type.json | 2 +-
.../test/resources/configs/test-invalid-class.json | 2 +-
.../resources/configs/test-partial-config.json | 6 +-
.../resources/configs/test-unexpected-field.json | 4 +-
.../test/resources/configs/test-wrong-type.json | 2 +-
.../resources/configs/timeout-limits-test.json | 7 +
.../org/apache/tika/server/client/TestBasic.java | 3 -
tika-server/tika-server-core/pom.xml | 10 +-
...lter.java => ConfigEndpointSecurityFilter.java} | 37 +-
.../tika/server/core/FetcherStreamFactory.java | 140 ---
.../tika/server/core/InputStreamFactory.java | 38 -
.../org/apache/tika/server/core/ServerStatus.java | 79 +-
.../tika/server/core/ServerStatusWatcher.java | 84 --
.../org/apache/tika/server/core/TaskStatus.java | 20 +-
.../apache/tika/server/core/TikaServerConfig.java | 71 +-
.../apache/tika/server/core/TikaServerProcess.java | 310 ++++--
.../org/apache/tika/server/core/TlsConfig.java | 229 ++++-
.../tika/server/core/resource/AsyncResource.java | 9 +-
.../server/core/resource/DetectorResource.java | 10 +-
.../server/core/resource/MetadataResource.java | 21 +-
.../server/core/resource/PipesParsingHelper.java | 497 ++++++++++
.../core/resource/RecursiveMetadataResource.java | 169 ++--
...erverResource.java => ServerHandlerConfig.java} | 18 +-
.../tika/server/core/resource/TikaResource.java | 791 +++++++++------
.../server/core/resource/TikaServerStatus.java | 3 +-
.../tika/server/core/resource/TikaVersion.java | 1 -
.../tika/server/core/resource/TikaWelcome.java | 3 -
.../server/core/resource/TranslateResource.java | 13 +-
.../server/core/resource/UnpackerResource.java | 482 ++++------
.../main/resources/tika-server-config-default.xml | 8 -
.../org/apache/tika/server/core/CXFTestBase.java | 235 +++--
.../core/ConfigEndpointSecurityEnabledTest.java | 111 +++
.../server/core/ConfigEndpointSecurityTest.java | 143 +++
.../tika/server/core/ConfigExamplesTest.java | 64 ++
.../tika/server/core/IntegrationTestBase.java | 18 +-
.../server/core/RecursiveMetadataResourceTest.java | 3 -
.../apache/tika/server/core/ServerStatusTest.java | 5 +-
.../apache/tika/server/core/StackTraceOffTest.java | 163 ----
.../apache/tika/server/core/StackTraceTest.java | 78 +-
.../tika/server/core/TestPortAllocator.java} | 33 +-
.../org/apache/tika/server/core/TikaPipesTest.java | 107 ++-
.../tika/server/core/TikaResourceFetcherTest.java | 160 ----
.../core/TikaResourceMetadataFilterTest.java | 79 --
.../tika/server/core/TikaResourceNoStackTest.java | 98 --
.../apache/tika/server/core/TikaResourceTest.java | 76 +-
.../core/TikaServerAsyncIntegrationTest.java | 9 +-
.../tika/server/core/TikaServerConfigTest.java | 2 -
.../server/core/TikaServerIntegrationTest.java | 134 +--
.../core/TikaServerPipesIntegrationTest.java | 13 +-
.../tika/server/core/TranslateResourceTest.java | 2 +-
.../server/core/benchmark/TikaServerBenchmark.java | 716 ++++++++++++++
.../resources/config-examples/server-basic.json | 13 +
.../config-examples/server-with-parsers.json | 24 +
.../resources/configs/cxf-test-base-template.json | 15 +-
.../configs/cxf-unpack-test-template.json | 38 +
.../configs/tika-config-server-basic.json | 20 +-
.../configs/tika-config-server-emitter.json | 19 +-
.../tika-config-server-fetcher-template.json | 19 +-
.../tika-config-server-fetchers-emitters.json | 14 +-
.../configs/tika-config-server-pipes-basic.json | 25 +
.../configs/tika-config-server-timeout-5000.json | 7 -
.../tika-config-server-tls-one-way-template.json | 20 +-
.../tika-config-server-tls-two-way-template.json | 20 +-
.../resources/configs/tika-config-server-tls.json | 15 +-
.../test/resources/configs/tika-config-server.json | 15 +-
tika-server/tika-server-standard/pom.xml | 21 +-
.../src/main/assembly/assembly.xml | 4 +
.../standard/resource/XMPMetadataResource.java | 10 +-
.../apache/tika/server/standard/FetcherTest.java | 107 ---
.../server/standard/JsonMaxFieldLengthTest.java | 8 +-
.../tika/server/standard/MetadataResourceTest.java | 1 -
.../server/standard/OpenNLPMetadataFilterTest.java | 8 +-
.../standard/OptimaizeMetadataFilterTest.java | 10 +-
.../standard/RecursiveMetadataResourceTest.java | 117 ++-
.../tika/server/standard/TikaDetectorsTest.java | 19 +-
.../apache/tika/server/standard/TikaPipesTest.java | 113 ++-
.../tika/server/standard/TikaResourceTest.java | 166 +---
.../tika/server/standard/UnpackerResourceTest.java | 429 +++++++--
.../standard/UnpackerResourceWithConfigTest.java | 114 ++-
.../resources/configs/cxf-test-base-template.json | 13 +-
.../configs/tika-config-for-server-tests.json | 23 +-
.../test/resources/configs/tika-config-json.json | 15 +-
.../tika-config-langdetect-opennlp-filter.json | 23 +-
.../tika-config-langdetect-optimaize-filter.json | 23 +-
tika-translate/pom.xml | 8 +-
696 files changed, 35918 insertions(+), 12201 deletions(-)
create mode 100644 .editorconfig
create mode 100644 .java-version
create mode 100644 .mvn/wrapper/maven-wrapper.properties
create mode 100644 CONTRIBUTING.md
create mode 100644 SECURITY.md
create mode 100644 docs/antora-playbook.yml
create mode 100644 docs/antora.yml
create mode 100644 docs/assets/logos/asf-tika-logos.zip
create mode 120000 docs/modules/ROOT/examples/migration-full-example.json
create mode 120000 docs/modules/ROOT/examples/pdf-parser-basic.json
create mode 120000 docs/modules/ROOT/examples/pdf-parser-full.json
create mode 120000 docs/modules/ROOT/examples/tesseract-basic.json
create mode 120000 docs/modules/ROOT/examples/tesseract-full.json
create mode 100644 docs/modules/ROOT/nav.adoc
create mode 100644 docs/modules/ROOT/pages/advanced/embedded-documents.adoc
create mode 100644 docs/modules/ROOT/pages/advanced/index.adoc
create mode 100644
docs/modules/ROOT/pages/advanced/integration-testing/tika-app.adoc
create mode 100644
docs/modules/ROOT/pages/advanced/integration-testing/tika-server.adoc
create mode 100644 docs/modules/ROOT/pages/advanced/robustness.adoc
create mode 100644 docs/modules/ROOT/pages/advanced/setting-limits.adoc
create mode 100644 docs/modules/ROOT/pages/advanced/spooling.adoc
create mode 100644 docs/modules/ROOT/pages/advanced/zip-detection.adoc
create mode 100644 docs/modules/ROOT/pages/configuration/digesters.adoc
create mode 100644 docs/modules/ROOT/pages/configuration/index.adoc
create mode 100644
docs/modules/ROOT/pages/configuration/parsers/pdf-parser.adoc
create mode 100644
docs/modules/ROOT/pages/configuration/parsers/tesseract-ocr-parser.adoc
create mode 100644 docs/modules/ROOT/pages/developers/index.adoc
create mode 100644 docs/modules/ROOT/pages/developers/serialization.adoc
create mode 100644 docs/modules/ROOT/pages/faq.adoc
create mode 100644 docs/modules/ROOT/pages/index.adoc
create mode 100644 docs/modules/ROOT/pages/maintainers/index.adoc
create mode 100644
docs/modules/ROOT/pages/maintainers/release-guides/docker.adoc
create mode 100644 docs/modules/ROOT/pages/maintainers/release-guides/grpc.adoc
create mode 100644 docs/modules/ROOT/pages/maintainers/release-guides/helm.adoc
create mode 100644
docs/modules/ROOT/pages/maintainers/release-guides/index.adoc
create mode 100644 docs/modules/ROOT/pages/maintainers/release-guides/tika.adoc
create mode 100644 docs/modules/ROOT/pages/maintainers/site.adoc
create mode 100644 docs/modules/ROOT/pages/migration-to-4x/design-notes-4x.adoc
create mode 100644 docs/modules/ROOT/pages/migration-to-4x/index.adoc
create mode 100644
docs/modules/ROOT/pages/migration-to-4x/metadata-changes-4x.adoc
create mode 100644
docs/modules/ROOT/pages/migration-to-4x/migrating-tika-server-4x.adoc
create mode 100644 docs/modules/ROOT/pages/migration-to-4x/migrating-to-4x.adoc
create mode 100644
docs/modules/ROOT/pages/migration-to-4x/serialization-4x.adoc
create mode 100644 docs/modules/ROOT/pages/pipes/index.adoc
create mode 100644 docs/modules/ROOT/pages/pipes/shared-server-mode.adoc
create mode 100644 docs/modules/ROOT/pages/pipes/unpack-config.adoc
create mode 100644 docs/modules/ROOT/pages/roadmap.adoc
create mode 100644 docs/modules/ROOT/pages/security.adoc
create mode 100644 docs/modules/ROOT/pages/using-tika/cli/index.adoc
create mode 100644 docs/modules/ROOT/pages/using-tika/grpc/index.adoc
create mode 100644 docs/modules/ROOT/pages/using-tika/index.adoc
create mode 100644
docs/modules/ROOT/pages/using-tika/java-api/getting-started.adoc
create mode 100644 docs/modules/ROOT/pages/using-tika/java-api/index.adoc
create mode 100644 docs/modules/ROOT/pages/using-tika/server/index.adoc
create mode 100644 docs/modules/ROOT/pages/using-tika/server/tls.adoc
create mode 100644 docs/pom.xml
create mode 100644 docs/src/assembly/docs.xml
create mode 100644 docs/supplemental-ui/css/search.css
create mode 100644 docs/supplemental-ui/img/ASF_Tika-colour.png
create mode 100644 docs/supplemental-ui/img/ASF_Tika-colour.svg
create mode 100644 docs/supplemental-ui/js/search.js
create mode 100644 docs/supplemental-ui/partials/footer-content.hbs
create mode 100644 docs/supplemental-ui/partials/footer-scripts.hbs
create mode 100644 docs/supplemental-ui/partials/head-scripts.hbs
create mode 100644 docs/supplemental-ui/partials/header-content.hbs
create mode 100755 mvnw
create mode 100644 mvnw.cmd
create mode 100644
tika-core/src/main/java/org/apache/tika/config/EmbeddedLimits.java
create mode 100644
tika-core/src/main/java/org/apache/tika/config/OutputLimits.java
create mode 100644
tika-core/src/main/java/org/apache/tika/config/TimeoutLimits.java
create mode 100644
tika-core/src/main/java/org/apache/tika/detect/DetectHelper.java
create mode 100644
tika-core/src/main/java/org/apache/tika/exception/EmbeddedLimitReachedException.java
delete mode 100644
tika-core/src/main/java/org/apache/tika/extractor/BasicEmbeddedBytesSelector.java
delete mode 100644
tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractorFactory.java
copy
tika-core/src/main/java/org/apache/tika/extractor/{ParsingEmbeddedDocumentExtractorFactory.java
=> StandardExtractorFactory.java} (68%)
rename
tika-core/src/main/java/org/apache/tika/extractor/{EmbeddedDocumentBytesHandler.java
=> UnpackHandler.java} (94%)
rename
tika-core/src/main/java/org/apache/tika/extractor/{EmbeddedBytesSelector.java
=> UnpackSelector.java} (87%)
create mode 100644
tika-core/src/main/java/org/apache/tika/io/ByteArraySource.java
create mode 100644
tika-core/src/main/java/org/apache/tika/io/CachingInputStream.java
create mode 100644
tika-core/src/main/java/org/apache/tika/io/CachingSource.java
create mode 100644 tika-core/src/main/java/org/apache/tika/io/FileSource.java
delete mode 100644 tika-core/src/main/java/org/apache/tika/io/IOUtils.java
delete mode 100644
tika-core/src/main/java/org/apache/tika/io/InputStreamFactory.java
create mode 100644
tika-core/src/main/java/org/apache/tika/io/SpoolingStrategy.java
create mode 100644 tika-core/src/main/java/org/apache/tika/io/StreamCache.java
create mode 100644
tika-core/src/main/java/org/apache/tika/io/TikaInputSource.java
create mode 100644 tika-core/src/main/java/org/apache/tika/metadata/Zip.java
rename
tika-core/src/main/java/org/apache/tika/metadata/writefilter/{MetadataWriteFilter.java
=> MetadataWriteLimiter.java} (76%)
rename
tika-core/src/main/java/org/apache/tika/metadata/writefilter/{MetadataWriteFilterFactory.java
=> MetadataWriteLimiterFactory.java} (59%)
rename
tika-core/src/main/java/org/apache/tika/metadata/writefilter/{StandardWriteFilter.java
=> StandardMetadataLimiter.java} (83%)
create mode 100644
tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardMetadataLimiterFactory.java
delete mode 100644
tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilterFactory.java
copy tika-core/src/main/java/org/apache/tika/parser/{StatefulParser.java =>
ParsingIntent.java} (55%)
create mode 100644
tika-core/src/main/java/org/apache/tika/sax/SAXOutputConfig.java
copy tika-core/src/main/java/org/apache/tika/sax/{ContentHandlerFactory.java
=> StreamingContentHandlerFactory.java} (53%)
create mode 100644
tika-core/src/main/java/org/apache/tika/sax/ToMarkdownContentHandler.java
delete mode 100644
tika-core/src/main/java/org/apache/tika/utils/RereadableInputStream.java
delete mode 100644
tika-core/src/test/java/org/apache/tika/TestRereadableInputStream.java
create mode 100644
tika-core/src/test/java/org/apache/tika/sax/ToMarkdownContentHandlerTest.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/FieldHyperlinkTracker.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testAttachedTemplate.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDataConnections.xlsx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testDdeLink.xlsx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testExternalRefs.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testFrameset.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testHoverAndVml.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testInstrLink.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testMailMerge.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/resources/test-documents/testSubdocument.docx
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/AbstractArchiveParser.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/SevenZParser.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/ZipParser.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/main/java/org/apache/tika/parser/pkg/ZipParserConfig.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/java/org/apache/tika/parser/pkg/ZipBenchmark.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/org/apache/tika/parser/pkg/tika-config.xml
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pkg-module/src/test/resources/test-documents/bob.gz
copy
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-xmp-commons/src/test/java/org/apache/tika/parser/xmp/{JempboxExtractorTest.java
=> XmpboxExtractorTest.java} (59%)
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/ConfigExamplesTest.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/migration-full-example.json
copy
tika-parsers/tika-parsers-standard/{tika-parsers-standard-modules/tika-parser-pdf-module/src/test/resources/org/apache/tika/parser/pdf/tika-config.json
=>
tika-parsers-standard-package/src/test/resources/config-examples/pdf-parser-basic.json}
(72%)
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/pdf-parser-full.json
copy
tika-parsers/tika-parsers-standard/{tika-parsers-standard-modules/tika-parser-ocr-module/src/test/resources/configs/TIKA-3582-tesseract.json
=>
tika-parsers-standard-package/src/test/resources/config-examples/tesseract-basic.json}
(56%)
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/config-examples/tesseract-full.json
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4441-120.json
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4441-12000000.json
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-4441-neg1.json
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testMP3_id3_false_aac.mp3
delete mode 100644
tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/HandlerConfig.java
create mode 100644
tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/ParseMode.java
delete mode 100644
tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/pipesiterator/PipesIteratorBaseConfig.java
delete mode 100644
tika-pipes/tika-pipes-api/src/main/java/org/apache/tika/pipes/api/pipesiterator/PipesIteratorConfig.java
delete mode 100644
tika-pipes/tika-pipes-api/src/main/resources/META-INF/tika/other-configs.idx
delete mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/EmitStrategyOverride.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/PerClientServerManager.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/ServerManager.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/SharedServerManager.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigMerger.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/config/ConfigOverrides.java
rename
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/{AbstractEmbeddedDocumentBytesHandler.java
=> AbstractUnpackHandler.java} (50%)
delete mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/BasicEmbeddedDocumentBytesHandler.java
delete mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/EmbeddedDocumentBytesConfig.java
rename
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/{EmittingEmbeddedDocumentBytesHandler.java
=> EmittingUnpackHandler.java} (77%)
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/FrictionlessUnpackHandler.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/StandardUnpackSelector.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/TempFileUnpackHandler.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/UnpackConfig.java
rename tika-core/src/main/java/org/apache/tika/extractor/RUnpackExtractor.java
=>
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/UnpackExtractor.java
(75%)
rename
tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractorFactory.java
=>
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/UnpackExtractorFactory.java
(65%)
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/frictionless/DataPackage.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/extractor/frictionless/FrictionlessResource.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/EmitDataDeserializer.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/EmitDataSerializer.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/JsonPipesIpc.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/PipesResultDeserializer.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/PipesResultSerializer.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/ConnectionHandler.java
create mode 100644
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/SharedServerResources.java
create mode 100644
tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/config/ConfigMergerTest.java
rename
tika-serialization/src/test/java/org/apache/tika/parser/AutoDetectParserConfigTest.java
=>
tika-pipes/tika-pipes-core/src/test/java/org/apache/tika/pipes/core/extractor/StandardUnpackSelectorTest.java
(57%)
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/DigestingOpenContainersTest.java
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/EmbeddedLimitsTest.java
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/FrictionlessUnpackTest.java
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MetadataWriteLimiterTest.java
copy
tika-serialization/src/test/java/org/apache/tika/metadata/filter/AttachmentCountingListFilter.java
=>
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MockPassbackFilter.java
(51%)
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/SharedServerChaosMonkeyTest.java
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/SharedServerModeTest.java
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/UnpackModeTest.java
delete mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/async/MockDigesterFactory.java
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-4533.json
copy
tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/{tika-config-basic.json
=> tika-config-shared-server.json} (51%)
copy
tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/{tika-config-basic.json
=> tika-config-uppercasing.json} (53%)
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/tika-config-write-limiter.json
copy {tika-parsers/tika-parsers-standard/tika-parsers-standard-package =>
tika-pipes/tika-pipes-integration-tests}/src/test/resources/test-documents/testLargeOLEDoc.doc
(100%)
create mode 100644
tika-pipes/tika-pipes-iterator-commons/src/main/java/org/apache/tika/pipes/pipesiterator/PipesIteratorConfig.java
create mode 100644
tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/java/org/apache/tika/pipes/fs/ConfigExamplesTest.java
create mode 100644
tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-emitter.json
create mode 100644
tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-fetcher.json
create mode 100644
tika-pipes/tika-pipes-plugins/tika-pipes-file-system/src/test/resources/config-examples/file-system-pipeline.json
rename
tika-pipes/tika-pipes-plugins/tika-pipes-json/src/test/resources/test-documents/{test-with-embedded-bytes.json
=> test-with-unpack.json} (55%)
create mode 100644
tika-serialization/src/test/java/org/apache/tika/config/AllLimitsTest.java
create mode 100644
tika-serialization/src/test/java/org/apache/tika/config/EmbeddedLimitsTest.java
create mode 100644
tika-serialization/src/test/java/org/apache/tika/config/OutputLimitsTest.java
create mode 100644
tika-serialization/src/test/java/org/apache/tika/config/TimeoutLimitsTest.java
create mode 100644
tika-serialization/src/test/java/org/apache/tika/metadata/filter/JsonConfigOnlyFilter.java
rename
tika-serialization/src/test/java/org/apache/tika/metadata/writefilter/{StandardWriteFilterTest.java
=> StandardMetadataLimiterTest.java} (80%)
create mode 100644
tika-serialization/src/test/java/org/apache/tika/serialization/SmileFormatTest.java
delete mode 100644
tika-serialization/src/test/resources/configs/TIKA-4207-embedded-bytes-config.json
create mode 100644
tika-serialization/src/test/resources/configs/TIKA-4582-json-config-only.json
create mode 100644
tika-serialization/src/test/resources/configs/all-limits-test.json
create mode 100644
tika-serialization/src/test/resources/configs/embedded-limits-test.json
create mode 100644
tika-serialization/src/test/resources/configs/output-limits-test.json
create mode 100644
tika-serialization/src/test/resources/configs/timeout-limits-test.json
copy
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/{TikaLoggingFilter.java
=> ConfigEndpointSecurityFilter.java} (51%)
delete mode 100644
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/FetcherStreamFactory.java
delete mode 100644
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/InputStreamFactory.java
delete mode 100644
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/ServerStatusWatcher.java
create mode 100644
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/PipesParsingHelper.java
copy
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/{TikaServerResource.java
=> ServerHandlerConfig.java} (57%)
create mode 100644
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/ConfigEndpointSecurityEnabledTest.java
create mode 100644
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/ConfigEndpointSecurityTest.java
create mode 100644
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/ConfigExamplesTest.java
delete mode 100644
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/StackTraceOffTest.java
rename
tika-server/tika-server-core/src/{main/java/org/apache/tika/server/core/DefaultInputStreamFactory.java
=> test/java/org/apache/tika/server/core/TestPortAllocator.java} (52%)
delete mode 100644
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceFetcherTest.java
delete mode 100644
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceMetadataFilterTest.java
delete mode 100644
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/TikaResourceNoStackTest.java
create mode 100644
tika-server/tika-server-core/src/test/java/org/apache/tika/server/core/benchmark/TikaServerBenchmark.java
create mode 100644
tika-server/tika-server-core/src/test/resources/config-examples/server-basic.json
create mode 100644
tika-server/tika-server-core/src/test/resources/config-examples/server-with-parsers.json
create mode 100644
tika-server/tika-server-core/src/test/resources/configs/cxf-unpack-test-template.json
create mode 100644
tika-server/tika-server-core/src/test/resources/configs/tika-config-server-pipes-basic.json
delete mode 100644
tika-server/tika-server-core/src/test/resources/configs/tika-config-server-timeout-5000.json
delete mode 100644
tika-server/tika-server-standard/src/test/java/org/apache/tika/server/standard/FetcherTest.java