This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-3948
in repository https://gitbox.apache.org/repos/asf/tika.git
from 957ddd79d bump OpenSearch and Solr versions in integration tests
add cb76ac642 Bump org.netpreserve:jwarc from 0.28.2 to 0.28.3
add 5fb4bd105 Merge pull request #1365 from
apache/dependabot/maven/org.netpreserve-jwarc-0.28.3
add 2d407f529 Bump aws.version from 1.12.558 to 1.12.559
add 3c66c8078 Merge pull request #1367 from
apache/dependabot/maven/aws.version-1.12.559
add a2604ae69 Bump org.xerial.snappy:snappy-java from 1.1.10.4 to 1.1.10.5
add 74e1b0f9c Merge pull request #1366 from
apache/dependabot/maven/org.xerial.snappy-snappy-java-1.1.10.5
add 571900ac3 TIKA-4123: update avro
add fea615ef9 Bump aws.version from 1.12.559 to 1.12.560
add e80e46817 Merge pull request #1372 from
apache/dependabot/maven/aws.version-1.12.560
add cc29260d0 Bump io.netty:netty-bom from 4.1.98.Final to 4.1.99.Final
add e6f68c98c Merge pull request #1373 from
apache/dependabot/maven/io.netty-netty-bom-4.1.99.Final
add 41b10977c Bump poi.version from 5.2.3 to 5.2.4
add d0f054729 Merge pull request #1371 from
apache/dependabot/maven/poi.version-5.2.4
add 73e00dde6 Merge branch 'main' of
https://gitbox.apache.org/repos/asf/tika
add 8bf7e977d TIKA-4149 -- PipesServer should include
throwOnWriteLimitReached in the constructor of the BasicContentHandlerFactory
(#1374)
add d1b124ce9 TIKA-4144 -- update netcdf integration (#1364)
add 20e17f297 TIKA-4135 -- can't rely on English-only messages from Java
xml parsers now that we've removed xerces
add 25807484e update checkout version
add 206e1db14 TIKA-4123: remove version defined in parent
add bdd983eb0 TIKA-4123: restore version defined in parent because of
convergence errors
add 8932e3d2f TIKA-4123: create version property for snappy; update
commons-io
add 54e76aa84 TIKA-4129: update lucene, shade plugin
add a59ee0829 TIKA-4129: remove unneeded dependency, move comment to
parent, update asm
add b5add91f3 TIKA-4129: align asm opcode with version according to comment
add f9f4be5da TIKA-4129: remove unneeded dependency, move comment to parent
add d27b84150 Bump com.github.luben:zstd-jni from 1.5.5-5 to 1.5.5-6
add f8e850591 Merge pull request #1378 from
apache/dependabot/maven/com.github.luben-zstd-jni-1.5.5-6
add 2f871fd5f Bump de.thetaphi:forbiddenapis from 3.5.1 to 3.6
add aa77b4df7 Merge pull request #1377 from
apache/dependabot/maven/de.thetaphi-forbiddenapis-3.6
add be5858c42 Bump test.containers.version from 1.19.0 to 1.19.1
add dad37cdf4 Merge pull request #1379 from
apache/dependabot/maven/test.containers.version-1.19.1
add 1a0b35ca3 TIKA-4139 (#1381)
add a394ddae8 TIKA-4147 (#1382)
add 504629391 TIKA-4132 -- remove some deprecated code, remove deprecation
warnings and deprecate AbstractParser, (#1383)
add 21ce7c4c5 Bump aws.version from 1.12.560 to 1.12.562
add 572d00ac2 Merge pull request #1385 from
apache/dependabot/maven/aws.version-1.12.562
add 7058436b4 Bump com.google.protobuf:protobuf-java from 3.24.3 to 3.24.4
add c8e189df9 Merge pull request #1387 from
apache/dependabot/maven/com.google.protobuf-protobuf-java-3.24.4
add 7872a81b7 Bump aws.version from 1.12.562 to 1.12.563
add de73ae41f Merge pull request #1386 from
apache/dependabot/maven/aws.version-1.12.563
add 413b23223 TIKA-4130 -- add xerces and xml-apis to the banned
dependencies list in 3.x/main
add eba103b05 Bump aws.version from 1.12.563 to 1.12.564
add 9e597df2a Merge pull request #1388 from
apache/dependabot/maven/aws.version-1.12.564
add c9b880755 TIKA-4129: update puppycrawl, kafka, commons-net
add 6e68c12dc TIKA-4129: update aws, mockito, azure; add comment
add 97d3e4f0e TIKA-4129: update zookeeper; remove unneeded version entries
add 0c4ea0b36 Bump com.azure:azure-core-http-netty from 1.13.7 to 1.13.8
add 3ef7a4d78 Merge pull request #1390 from
apache/dependabot/maven/com.azure-azure-core-http-netty-1.13.8
add 12c2d8f23 Bump org.eclipse.jetty:jetty-bom
add d466492e0 Merge pull request #1391 from
apache/dependabot/maven/org.eclipse.jetty-jetty-bom-9.4.53.v20231009
add 29f8a80a7 Merge remote-tracking branch 'origin/main' into TIKA-3948
add 847d0f5cd Merge remote-tracking branch 'origin/TIKA-3948' into
TIKA-3948
add 98621d5fe Lots of merge conflict resolutions
No new revisions were added by this update.
Summary of changes:
.github/workflows/main-jdk11-build.yml | 2 +-
.github/workflows/main-jdk17-build.yml | 2 +-
.github/workflows/main-jdk21-build.yml | 2 +-
CHANGES.txt | 5 +
tika-app/pom.xml | 10 +-
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 2 -
.../src/main/java/org/apache/tika/gui/TikaGUI.java | 3 +-
.../org/apache/tika/batch/fs/FSFileResource.java | 12 --
.../org/apache/tika/batch/fs/FSListCrawler.java | 26 ----
.../tika/batch/fs/FSOutputStreamFactory.java | 14 --
.../main/java/org/apache/tika/util/PropsUtil.java | 26 ----
tika-bundles/tika-bundle-standard/pom.xml | 14 +-
.../java/org/apache/tika/config/TikaConfig.java | 6 -
.../tika/extractor/EmbeddedDocumentUtil.java | 16 ---
.../main/java/org/apache/tika/fork/ForkParser.java | 35 +----
.../java/org/apache/tika/io/TikaInputStream.java | 11 --
.../org/apache/tika/mime/MimeTypesFactory.java | 15 ++-
.../parser/AbstractEncodingDetectorParser.java | 2 +-
.../tika/parser/AbstractExternalProcessParser.java | 2 +-
.../org/apache/tika/parser/AbstractParser.java | 2 +
.../apache/tika/parser/AutoDetectParserConfig.java | 5 -
.../org/apache/tika/parser/CompositeParser.java | 2 +-
.../org/apache/tika/parser/DelegatingParser.java | 2 +-
.../java/org/apache/tika/parser/EmptyParser.java | 2 +-
.../java/org/apache/tika/parser/ErrorParser.java | 2 +-
.../java/org/apache/tika/parser/NetworkParser.java | 2 +-
.../org/apache/tika/parser/ParserDecorator.java | 2 +-
.../org/apache/tika/parser/RegexCaptureParser.java | 11 +-
.../tika/parser/external/ExternalParser.java | 4 +-
.../tika/parser/external2/ExternalParser.java | 3 +-
.../parser/multiple/AbstractMultipleParser.java | 3 +-
.../apache/tika/pipes/CompositePipesReporter.java | 12 +-
.../java/org/apache/tika/pipes/PipesServer.java | 14 +-
.../tika/pipes/fetcher/fs/FileSystemFetcher.java | 4 -
.../tika/pipes/pipesiterator/PipesIterator.java | 13 +-
.../tika/sax/BasicContentHandlerFactory.java | 7 +-
.../org/apache/tika/sax/BodyContentHandler.java | 11 --
.../tika/sax/ContentHandlerDecoratorFactory.java | 9 --
.../org/apache/tika/sax/ContentHandlerFactory.java | 8 --
.../org/apache/tika/sax/ToTextContentHandler.java | 12 --
.../apache/tika/sax/WriteOutContentHandler.java | 15 ---
.../org/apache/tika/config/TikaConfigTest.java | 2 +-
.../java/org/apache/tika/fork/ForkParserTest.java | 13 --
.../apache/tika/fork/ForkParserTikaBinTest.java | 4 +-
.../java/org/apache/tika/fork/ForkTestParser.java | 4 +-
.../org/apache/tika/mime/CustomReaderTest.java | 4 +-
.../tika/parser/DummyInitializableParser.java | 2 +-
.../tika/parser/DummyParameterizedParser.java | 2 +-
.../java/org/apache/tika/parser/DummyParser.java | 2 +-
.../org/apache/tika/parser/mock/MockParser.java | 3 +-
.../tika/sax/BasicContentHandlerFactoryTest.java | 21 ++-
.../apache/tika/sax/BodyContentHandlerTest.java | 5 +-
.../apache/tika/mime => }/custom-mimetypes.xml | 0
.../apache/tika/mime => }/custom-mimetypes2.xml | 0
tika-detectors/tika-detector-siegfried/pom.xml | 11 ++
.../java/org/apache/tika/eval/app/db/JDBCUtil.java | 28 ----
.../eval/app/tools/SlowCompositeReaderWrapper.java | 8 +-
.../eval/core/tokens/CommonTokenCountManager.java | 31 -----
.../tika/example/EncryptedPrescriptionParser.java | 4 +-
.../tika/example/PickBestTextEncodingParser.java | 8 --
.../org/apache/tika/example/SpringExample.java | 5 +-
.../org/apache/tika/example/TIAParsingExample.java | 11 +-
tika-fuzzing/pom.xml | 11 ++
tika-handlers/tika-handler-boilerpipe/pom.xml | 16 +++
.../org/apache/custom/parser/MyCustomParser.java | 4 +-
tika-java7/pom.xml | 11 ++
tika-langdetect/tika-langdetect-lingo24/pom.xml | 16 +++
tika-langdetect/tika-langdetect-mitll-text/pom.xml | 15 +++
tika-langdetect/tika-langdetect-opennlp/pom.xml | 14 +-
tika-langdetect/tika-langdetect-optimaize/pom.xml | 16 ++-
tika-langdetect/tika-langdetect-tika/pom.xml | 14 +-
tika-parent/pom.xml | 143 ++++++++++++++-------
.../tika-parser-scientific-module/pom.xml | 3 -
.../org/apache/tika/parser/gdal/GDALParser.java | 4 +-
.../geoinfo/GeographicInformationParser.java | 4 +-
.../org/apache/tika/parser/grib/GribParser.java | 4 +-
.../java/org/apache/tika/parser/hdf/HDFParser.java | 4 +-
.../apache/tika/parser/netcdf/NetCDFParser.java | 3 +-
.../tika-parser-scientific-package/pom.xml | 2 +
.../apache/tika/parser/sqlite3/SQLite3Parser.java | 4 +-
.../tika-parsers-ml/tika-age-recogniser/pom.xml | 10 +-
.../tika/parser/recognition/AgeRecogniser.java | 4 +-
.../tika/parser/pot/PooledTimeSeriesParser.java | 3 +-
.../recognition/ObjectRecognitionParser.java | 4 +-
.../apache/tika/parser/geo/topic/GeoParser.java | 4 +-
.../apache/tika/parser/journal/JournalParser.java | 4 +-
.../apache/tika/parser/ner/NamedEntityParser.java | 4 +-
.../parser/sentiment/SentimentAnalysisParser.java | 4 +-
.../parser/transcribe/aws/AmazonTranscribe.java | 4 +-
.../tika/parser/apple/AppleSingleFileParser.java | 4 +-
.../org/apache/tika/parser/apple/PListParser.java | 4 +-
.../tika/parser/iwork/IWorkPackageParser.java | 4 +-
.../parser/iwork/iwana/IWork13PackageParser.java | 4 +-
.../parser/iwork/iwana/IWork18PackageParser.java | 4 +-
.../apache/tika/parser/iwork/IWorkParserTest.java | 3 +-
.../org/apache/tika/parser/audio/AudioParser.java | 4 +-
.../org/apache/tika/parser/audio/MidiParser.java | 4 +-
.../java/org/apache/tika/parser/mp3/Mp3Parser.java | 4 +-
.../java/org/apache/tika/parser/mp4/MP4Parser.java | 4 +-
.../org/apache/tika/parser/video/FLVParser.java | 4 +-
.../org/apache/tika/parser/dgn/DGN8Parser.java | 4 +-
.../apache/tika/parser/dwg/AbstractDWGParser.java | 6 +-
.../java/org/apache/tika/parser/prt/PRTParser.java | 4 +-
.../org/apache/tika/parser/prt/PRTParserTest.java | 5 +-
.../tika-parser-code-module/pom.xml | 6 -
.../org/apache/tika/parser/asm/ClassParser.java | 4 +-
.../apache/tika/parser/asm/XHTMLClassVisitor.java | 2 +-
.../tika/parser/executable/ExecutableParser.java | 4 +-
.../java/org/apache/tika/parser/mat/MatParser.java | 4 +-
.../org/apache/tika/parser/sas/SAS7BDATParser.java | 4 +-
.../org/apache/tika/parser/crypto/Pkcs7Parser.java | 3 +-
.../org/apache/tika/parser/crypto/TSDParser.java | 4 +-
.../tika-parser-digest-commons/pom.xml | 15 +++
.../tika/parser/digestutils/CommonsDigester.java | 1 -
.../tika/parser/font/AdobeFontMetricParser.java | 4 +-
.../apache/tika/parser/font/TrueTypeParser.java | 4 +-
.../org/apache/tika/parser/html/HtmlHandler.java | 14 --
.../tika/parser/image/AbstractImageParser.java | 3 +-
.../org/apache/tika/parser/image/ICNSParser.java | 4 +-
.../org/apache/tika/parser/image/JXLParser.java | 4 +-
.../org/apache/tika/parser/image/PSDParser.java | 4 +-
.../org/apache/tika/parser/image/WebPParser.java | 4 +-
.../tika-parser-jdbc-commons/pom.xml | 15 +++
.../apache/tika/parser/jdbc/AbstractDBParser.java | 4 +-
.../tika-parser-mail-commons/pom.xml | 16 ++-
.../org/apache/tika/parser/mail/RFC822Parser.java | 4 +-
.../org/apache/tika/parser/mbox/MboxParser.java | 4 +-
.../detect/microsoft/POIFSContainerDetector.java | 12 --
.../parser/microsoft/AbstractOfficeParser.java | 4 +-
.../parser/microsoft/AbstractPOIFSExtractor.java | 12 +-
.../apache/tika/parser/microsoft/EMFParser.java | 4 +-
.../tika/parser/microsoft/JackcessParser.java | 4 +-
.../tika/parser/microsoft/MSOwnerFileParser.java | 4 +-
.../tika/parser/microsoft/OldExcelParser.java | 4 +-
.../tika/parser/microsoft/OutlookExtractor.java | 73 ++---------
.../apache/tika/parser/microsoft/TNEFParser.java | 4 +-
.../apache/tika/parser/microsoft/WMFParser.java | 4 +-
.../microsoft/activemime/ActiveMimeParser.java | 4 +-
.../tika/parser/microsoft/chm/ChmParser.java | 3 +-
.../parser/microsoft/onenote/OneNoteParser.java | 4 +-
.../ooxml/XWPFWordExtractorDecorator.java | 11 --
.../parser/microsoft/pst/OutlookPSTParser.java | 4 +-
.../tika/parser/microsoft/rtf/RTFParser.java | 30 +----
.../microsoft/xml/AbstractXML2003Parser.java | 4 +-
.../java/org/apache/tika/parser/dbf/DBFParser.java | 4 +-
.../java/org/apache/tika/parser/dif/DIFParser.java | 4 +-
.../apache/tika/parser/epub/EpubContentParser.java | 4 +-
.../org/apache/tika/parser/epub/EpubParser.java | 3 +-
.../org/apache/tika/parser/hwp/HwpV5Parser.java | 4 +-
.../apache/tika/parser/indesign/IDMLParser.java | 4 +-
.../tika/parser/odf/FlatOpenDocumentParser.java | 4 +-
.../tika/parser/odf/OpenDocumentContentParser.java | 4 +-
.../apache/tika/parser/odf/OpenDocumentParser.java | 3 +-
.../tika/parser/wordperfect/QuattroProParser.java | 4 +-
.../tika/parser/wordperfect/WordPerfectParser.java | 4 +-
.../org/apache/tika/parser/odf/ODFParserTest.java | 4 +-
.../org/apache/tika/parser/feed/FeedParser.java | 4 +-
.../java/org/apache/tika/parser/pdf/PDFParser.java | 21 +--
.../apache/tika/parser/pkg/CompressorParser.java | 4 +-
.../org/apache/tika/parser/pkg/PackageParser.java | 1 +
.../java/org/apache/tika/parser/pkg/RarParser.java | 4 +-
.../org/apache/tika/parser/pkg/UnrarParser.java | 4 +-
.../apache/tika/parser/pkg/AbstractPkgTest.java | 3 +-
.../tika/parser/strings/Latin1StringsParser.java | 4 +-
.../apache/tika/parser/strings/StringsParser.java | 4 +-
.../org/apache/tika/parser/http/HttpParser.java | 4 +-
.../org/apache/tika/parser/wacz/WACZParser.java | 4 +-
.../org/apache/tika/parser/warc/WARCParser.java | 4 +-
.../java/org/apache/tika/parser/tmx/TMXParser.java | 4 +-
.../apache/tika/parser/xliff/XLIFF12Parser.java | 4 +-
.../org/apache/tika/parser/xliff/XLZParser.java | 3 +-
.../java/org/apache/tika/parser/xml/XMLParser.java | 4 +-
.../org/apache/tika/parser/xml/XMLProfiler.java | 4 +-
.../apache/tika/parser/xml/DcXMLParserTest.java | 5 +-
.../parser/xml/TextAndAttributeXMLParserTest.java | 8 +-
.../tika-parsers-standard-package/pom.xml | 11 ++
.../apache/tika/parser/AutoDetectParserTest.java | 2 +-
.../apache/tika/parser/TestXMLEntityExpansion.java | 8 --
.../java/org/apache/tika/parser/XMLTestBase.java | 4 +-
.../DoublingContentHandlerDecoratorFactory.java | 5 -
.../UpcasingContentHandlerDecoratorFactory.java | 4 -
tika-pipes/tika-emitters/tika-emitter-jdbc/pom.xml | 18 ++-
.../apache/tika/pipes/fetcher/s3/S3Fetcher.java | 9 --
tika-pipes/tika-httpclient-commons/pom.xml | 16 +++
.../server/core/DefaultInputStreamFactory.java | 6 -
.../tika/server/core/FetcherStreamFactory.java | 43 +++----
.../tika/server/core/InputStreamFactory.java | 16 +--
187 files changed, 638 insertions(+), 868 deletions(-)
rename tika-core/src/test/resources/{org/apache/tika/mime =>
}/custom-mimetypes.xml (100%)
rename tika-core/src/test/resources/{org/apache/tika/mime =>
}/custom-mimetypes2.xml (100%)