This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4545-add-translators
in repository https://gitbox.apache.org/repos/asf/tika.git
from e9a403546 git add test file
add add0146d8 TIKA-4251 -- imports and licenses only (#2419)
add 9298dc3f8 TIKA-4545 -- fix rat locally
add b7f728dc4 spotless - attempt to fix spotless
add 624f5a60f Merge remote-tracking branch 'origin/main' into
TIKA-4545-add-translators
add c19f0208e translator fixes
No new revisions were added by this update.
Summary of changes:
.github/workflows/main-jdk17-build.yml | 2 +-
.../main-jdk17-windows-build-multi-locale.yml | 2 +-
.github/workflows/main-jdk17-windows-build.yml | 2 +-
.github/workflows/main-jdk21-build.yml | 2 +-
.github/workflows/main-jdk25-build.yml | 2 +-
.../test/java/org/apache/tika/bundle/BundleIT.java | 2 +-
.../tika/detect/DefaultEncodingDetector.java | 1 -
.../apache/tika/detect/NNExampleModelDetector.java | 1 -
.../org/apache/tika/detect/NNTrainedModel.java | 1 -
.../apache/tika/detect/NNTrainedModelBuilder.java | 2 -
.../tika/detect/OverrideEncodingDetector.java | 1 -
.../apache/tika/detect/TrainedModelDetector.java | 1 -
.../tika/exception/FileTooLongException.java | 1 -
.../tika/exception/TikaMemoryLimitException.java | 1 -
.../tika/exception/UnsupportedFormatException.java | 1 -
.../tika/exception/ZeroByteFileException.java | 3 +-
.../tika/extractor/EmbeddedDocumentExtractor.java | 1 -
.../EmbeddedDocumentExtractorFactory.java | 1 -
.../org/apache/tika/fork/ParserFactoryFactory.java | 1 -
.../org/apache/tika/io/BoundedInputStream.java | 2 +-
.../main/java/org/apache/tika/io/EndianUtils.java | 2 +-
.../java/org/apache/tika/io/FilenameUtils.java | 2 +-
.../src/main/java/org/apache/tika/io/IOUtils.java | 2 +-
.../org/apache/tika/io/LookaheadInputStream.java | 2 +-
.../tika/language/translate/DefaultTranslator.java | 1 -
.../apache/tika/metadata/AccessPermissions.java | 1 -
.../main/java/org/apache/tika/metadata/Font.java | 3 +-
.../main/java/org/apache/tika/metadata/HTML.java | 3 +-
.../main/java/org/apache/tika/metadata/MAPI.java | 1 -
.../main/java/org/apache/tika/metadata/PST.java | 1 -
.../java/org/apache/tika/metadata/RTFMetadata.java | 3 +-
.../java/org/apache/tika/metadata/Rendering.java | 2 -
.../tika/parser/AbstractExternalProcessParser.java | 1 -
.../org/apache/tika/parser/AbstractParser.java | 1 -
.../apache/tika/parser/AutoDetectParserConfig.java | 1 -
.../tika/parser/AutoDetectParserFactory.java | 1 -
.../org/apache/tika/parser/DigestingParser.java | 1 -
.../java/org/apache/tika/parser/NetworkParser.java | 2 +-
.../java/org/apache/tika/parser/ParserFactory.java | 1 -
.../java/org/apache/tika/parser/ParsingReader.java | 2 +-
.../org/apache/tika/parser/RenderingParser.java | 3 +-
.../tika/parser/digest/CompositeDigester.java | 1 -
.../tika/parser/digest/InputStreamDigester.java | 1 -
.../tika/parser/multiple/FallbackParser.java | 1 -
.../tika/parser/multiple/SupplementingParser.java | 1 -
.../apache/tika/renderer/CompositeRenderer.java | 3 +-
.../java/org/apache/tika/sax/CleanPhoneText.java | 1 -
.../tika/sax/PhoneExtractingContentHandler.java | 1 -
.../apache/tika/sax/RichTextContentHandler.java | 1 -
.../org/apache/tika/sax/SafeContentHandler.java | 5 --
.../org/apache/tika/sax/StandardOrganizations.java | 1 -
.../org/apache/tika/sax/StandardReference.java | 1 -
.../sax/StandardsExtractingContentHandler.java | 1 -
.../java/org/apache/tika/sax/StandardsText.java | 1 -
.../apache/tika/sax/StoppingEarlyException.java | 1 -
.../org/apache/tika/sax/TaggedContentHandler.java | 2 +-
.../org/apache/tika/sax/TaggedSAXException.java | 2 +-
.../java/org/apache/tika/utils/StreamGobbler.java | 1 -
.../java/org/apache/tika/utils/XMLReaderUtils.java | 1 -
.../org/apache/tika/MultiThreadedTikaTest.java | 3 +-
.../org/apache/tika/config/MockConfigTest.java | 6 +--
.../java/org/apache/tika/config/ParamTest.java | 6 +--
.../tika/config/TikaConfigSerializerTest.java | 1 -
.../java/org/apache/tika/io/EndianUtilsTest.java | 3 +-
.../java/org/apache/tika/io/FilenameUtilsTest.java | 3 +-
.../apache/tika/io/LookaheadInputStreamTest.java | 2 +-
.../tika/metadata/filter/TestMetadataFilter.java | 1 -
.../tika/parser/DummyInitializableParser.java | 1 -
.../org/apache/tika/parser/mock/VowelParser.java | 1 -
.../org/apache/tika/eval/app/EvalFilePaths.java | 1 -
.../org/apache/tika/eval/app/ProfilerBase.java | 2 -
.../java/org/apache/tika/eval/app/db/Cols.java | 1 -
.../java/org/apache/tika/eval/app/db/JDBCUtil.java | 1 -
.../org/apache/tika/eval/app/db/TableInfo.java | 1 -
.../org/apache/tika/eval/app/io/IDBWriter.java | 1 -
.../tika/eval/app/reports/XLSXNumFormatter.java | 1 -
.../apache/tika/eval/app/AnalyzerManagerTest.java | 1 -
.../org/apache/tika/eval/app/EvalConfigTest.java | 1 -
.../apache/tika/eval/app/ProfilerBatchTest.java | 1 -
.../org/apache/tika/eval/app/TikaEvalCLITest.java | 1 -
.../tika/eval/app/db/AbstractBufferTest.java | 1 -
.../tika/eval/app/reports/ResultsReporterTest.java | 1 -
.../core/textstats/TokenCountPriorityQueue.java | 1 -
.../eval/core/tokens/CommonTokenCountManager.java | 1 -
.../tika/eval/core/tokens/ContrastStatistics.java | 1 -
.../tika/eval/core/tokens/TokenContraster.java | 1 -
.../eval/core/tokens/TokenCountPriorityQueue.java | 1 -
.../tika/eval/core/util/ContentTagParser.java | 1 -
.../apache/tika/eval/core/langid/LangIdTest.java | 1 -
.../tika/eval/core/tokens/LuceneTokenCounter.java | 1 -
.../tika/eval/core/tokens/TokenCounterTest.java | 1 -
.../apache/tika/eval/core/util/MimeUtilTest.java | 1 -
.../apache/tika/example/AdvancedTypeDetector.java | 1 -
.../apache/tika/example/ContentHandlerExample.java | 1 -
.../org/apache/tika/example/CustomMimeInfo.java | 1 -
.../org/apache/tika/example/DescribeMetadata.java | 1 -
.../org/apache/tika/example/DirListParser.java | 1 -
.../apache/tika/example/DisplayMetInstance.java | 1 -
.../apache/tika/example/DumpTikaConfigExample.java | 1 -
.../example/EncryptedPrescriptionDetector.java | 1 -
.../tika/example/EncryptedPrescriptionParser.java | 1 -
.../apache/tika/example/ExtractEmbeddedFiles.java | 1 -
.../tika/example/GrabPhoneNumbersExample.java | 1 -
.../org/apache/tika/example/ImportContextImpl.java | 1 -
.../tika/example/InterruptableParsingExample.java | 1 -
.../java/org/apache/tika/example/Language.java | 1 -
.../tika/example/LanguageDetectingParser.java | 1 -
.../tika/example/LanguageDetectorExample.java | 1 -
.../org/apache/tika/example/LuceneIndexer.java | 1 -
.../apache/tika/example/LuceneIndexerExtended.java | 1 -
.../org/apache/tika/example/MediaTypeExample.java | 1 -
.../tika/example/MetadataAwareLuceneIndexer.java | 1 -
.../java/org/apache/tika/example/MyFirstTika.java | 1 -
.../org/apache/tika/example/ParsingExample.java | 1 -
.../java/org/apache/tika/example/Pharmacy.java | 1 -
.../apache/tika/example/PrescriptionParser.java | 1 -
.../java/org/apache/tika/example/RecentFiles.java | 1 -
.../org/apache/tika/example/RollbackSoftware.java | 1 -
.../apache/tika/example/SimpleTextExtractor.java | 1 -
.../apache/tika/example/SimpleTypeDetector.java | 1 -
.../org/apache/tika/example/SpringExample.java | 1 -
.../tika/example/StandardsExtractionExample.java | 1 -
.../org/apache/tika/example/TIAParsingExample.java | 1 -
.../tika/example/TranscribeTranslateExample.java | 1 -
.../org/apache/tika/example/TranslatorExample.java | 1 -
.../apache/tika/example/TrecDocumentGenerator.java | 1 -
.../java/org/apache/tika/example/ZipListFiles.java | 1 -
.../tika/example/AdvancedTypeDetectorTest.java | 1 -
.../tika/example/ContentHandlerExampleTest.java | 1 -
.../tika/example/ExtractEmbeddedFilesTest.java | 1 -
.../tika/example/LanguageDetectorExampleTest.java | 1 -
.../tika/example/SimpleTextExtractorTest.java | 1 -
.../tika/example/SimpleTypeDetectorTest.java | 1 -
.../apache/tika/example/TestParsingExample.java | 1 -
.../apache/tika/example/TranslatorExampleTest.java | 1 -
.../tika/pipes/s3/tests/PipeIntegrationTests.java | 1 -
.../filetypedetector/TikaFileTypeDetector.java | 2 -
.../opennlp/ProbingLanguageDetector.java | 2 +-
.../langdetect/opennlp/OpenNLPDetectorTest.java | 2 +-
.../tika/LanguageProfilerBuilderTest.java | 1 -
tika-parent/checkstyle.xml | 28 +----------
HEADER.txt => tika-parent/license-header.txt | 2 +-
tika-parent/pom.xml | 57 +++++++++++++++++++++-
.../apache/tika/parser/envi/EnviHeaderParser.java | 1 -
.../org/apache/tika/parser/gdal/GDALParser.java | 1 -
.../geoinfo/GeographicInformationParser.java | 1 -
.../org/apache/tika/parser/grib/GribParser.java | 1 -
.../java/org/apache/tika/parser/hdf/HDFParser.java | 1 -
.../tika/parser/envi/EnviHeaderParserTest.java | 1 -
.../apache/tika/parser/gdal/TestGDALParser.java | 1 -
.../geoinfo/GeographicInformationParserTest.java | 1 -
.../apache/tika/parser/grib/GribParserTest.java | 2 -
.../tika/parser/isatab/ISArchiveParserTest.java | 1 -
.../parser/scientific/integration/TestParsers.java | 1 -
.../apache/tika/parser/geo/topic/GeoParser.java | 3 +-
.../tika/parser/geo/topic/GeoParserConfig.java | 1 -
.../org/apache/tika/parser/geo/topic/GeoTag.java | 1 -
.../tika/parser/geo/topic/NameEntityExtractor.java | 1 -
.../geo/topic/gazetteer/GeoGazetteerClient.java | 1 -
.../tika/parser/geo/topic/gazetteer/Location.java | 1 -
.../tika/parser/journal/GrobidRESTParser.java | 1 -
.../apache/tika/parser/journal/JournalParser.java | 1 -
.../apache/tika/parser/journal/TEIDOMParser.java | 1 -
.../org/apache/tika/parser/ner/NERecogniser.java | 1 -
.../apache/tika/parser/ner/NamedEntityParser.java | 1 -
.../parser/ner/opennlp/OpenNLPNERecogniser.java | 1 -
.../tika/parser/ner/opennlp/OpenNLPNameFinder.java | 1 -
.../tika/parser/ner/regex/RegexNERecogniser.java | 1 -
.../parser/sentiment/SentimentAnalysisParser.java | 1 -
.../tika/parser/geo/topic/GeoParserTest.java | 1 -
.../tika/parser/journal/JournalParserTest.java | 1 -
.../org/apache/tika/parser/journal/TEITest.java | 1 -
.../tika/parser/ner/nltk/NLTKNERecogniserTest.java | 2 +-
.../parser/ner/regex/RegexNERecogniserTest.java | 2 +-
.../parser/transcribe/aws/AmazonTranscribe.java | 1 -
.../parser/iwork/iwana/IWork13PackageParser.java | 1 -
.../parser/iwork/iwana/IWork18PackageParser.java | 1 -
.../org/apache/tika/detect/MatroskaDetector.java | 11 ++---
.../tika/parser/mp4/boxes/TikaUserDataBox.java | 2 -
.../apache/tika/detect/MatroskaDetectorTest.java | 10 ++--
.../apache/tika/parser/dwg/DWGParserConfig.java | 1 -
.../tika/parser/dwg/DWGReadFormatRemover.java | 1 -
.../org/apache/tika/parser/dwg/DWGReadParser.java | 1 -
.../org/apache/tika/parser/dwg/JulianDateUtil.java | 1 -
.../apache/tika/parser/crypto/Pkcs7ParserTest.java | 1 -
.../org/apache/tika/parser/html/DataURIScheme.java | 1 -
.../parser/html/DataURISchemeParseException.java | 1 -
.../apache/tika/parser/html/DataURISchemeUtil.java | 1 -
.../tika/parser/html/DataURISchemeParserTest.java | 1 -
.../tika/parser/html/HtmlEncodingDetectorTest.java | 1 -
.../org/apache/tika/parser/html/SrcDocTest.java | 1 -
.../html/StandardHtmlEncodingDetectorTest.java | 1 -
.../org/apache/tika/parser/image/ICNSType.java | 1 -
.../apache/tika/parser/image/HeifParserTest.java | 1 -
.../apache/tika/parser/image/WebPParserTest.java | 1 -
.../apache/tika/parser/mailcommons/MailUtil.java | 1 -
.../tika/parser/mailcommons/MailUtilTest.java | 1 -
.../apache/tika/parser/microsoft/EMFParser.java | 1 -
.../tika/parser/microsoft/JackcessExtractor.java | 2 -
.../tika/parser/microsoft/JackcessParser.java | 1 -
.../tika/parser/microsoft/OfficeParserConfig.java | 2 -
.../apache/tika/parser/microsoft/WMFParser.java | 1 -
.../tika/parser/microsoft/chm/ChmWrapper.java | 1 -
.../parser/microsoft/msg/TikaNameIdChunks.java | 7 ++-
.../tika/parser/microsoft/onenote/CompactID.java | 1 -
.../tika/parser/microsoft/onenote/Error.java | 1 -
.../parser/microsoft/onenote/ExtendedGUID.java | 1 -
.../microsoft/onenote/FileChunkReference.java | 1 -
.../microsoft/onenote/FileDataStoreObject.java | 1 -
.../tika/parser/microsoft/onenote/FileNode.java | 1 -
.../parser/microsoft/onenote/FileNodeList.java | 1 -
.../microsoft/onenote/FileNodeListHeader.java | 1 -
.../tika/parser/microsoft/onenote/FileNodePtr.java | 1 -
.../microsoft/onenote/FileNodePtrBackPush.java | 1 -
.../parser/microsoft/onenote/FileNodeUnion.java | 1 -
.../apache/tika/parser/microsoft/onenote/GUID.java | 1 -
.../tika/parser/microsoft/onenote/IndentUtil.java | 1 -
.../tika/parser/microsoft/onenote/Int24.java | 1 -
.../apache/tika/parser/microsoft/onenote/JCID.java | 1 -
.../microsoft/onenote/JCIDPropertySetTypeEnum.java | 1 -
.../onenote/ObjectDeclarationWithRefCount.java | 1 -
.../onenote/ObjectDeclarationWithRefCountBody.java | 1 -
.../onenote/OneNoteDirectFileResource.java | 1 -
.../parser/microsoft/onenote/OneNoteDocument.java | 1 -
.../parser/microsoft/onenote/OneNoteHeader.java | 1 -
.../parser/microsoft/onenote/OneNoteParser.java | 1 -
.../microsoft/onenote/OneNotePropertyEnum.java | 1 -
.../microsoft/onenote/OneNotePropertyId.java | 1 -
.../tika/parser/microsoft/onenote/OneNotePtr.java | 1 -
.../microsoft/onenote/OneNoteTreeWalker.java | 1 -
.../parser/microsoft/onenote/PropertyIDType.java | 1 -
.../tika/parser/microsoft/onenote/PropertySet.java | 1 -
.../parser/microsoft/onenote/PropertyValue.java | 1 -
.../tika/parser/microsoft/onenote/Revision.java | 1 -
.../parser/microsoft/onenote/RevisionManifest.java | 1 -
.../onenote/fsshttpb/IFSSHTTPBSerializable.java | 1 -
.../onenote/fsshttpb/MSOneStorePackage.java | 1 -
.../onenote/fsshttpb/MSOneStoreParser.java | 1 -
.../exception/DataElementParseErrorException.java | 1 -
.../onenote/fsshttpb/property/ArrayNumber.java | 1 -
.../fsshttpb/property/EightBytesOfData.java | 1 -
.../onenote/fsshttpb/property/FourBytesOfData.java | 1 -
.../onenote/fsshttpb/property/IProperty.java | 1 -
.../onenote/fsshttpb/property/NoData.java | 1 -
.../onenote/fsshttpb/property/OneByteOfData.java | 1 -
.../property/PrtArrayOfPropertyValues.java | 1 -
.../PrtFourBytesOfLengthFollowedByData.java | 1 -
.../onenote/fsshttpb/property/TwoBytesOfData.java | 1 -
.../streamobj/CellManifestCurrentRevision.java | 1 -
.../streamobj/CellManifestDataElementData.java | 1 -
.../onenote/fsshttpb/streamobj/DataElement.java | 1 -
.../fsshttpb/streamobj/DataElementData.java | 1 -
.../fsshttpb/streamobj/DataElementHash.java | 1 -
.../fsshttpb/streamobj/DataElementPackage.java | 1 -
.../onenote/fsshttpb/streamobj/DataHashObject.java | 1 -
.../onenote/fsshttpb/streamobj/DataSizeObject.java | 1 -
.../fsshttpb/streamobj/EncryptionObject.java | 1 -
.../onenote/fsshttpb/streamobj/FileDataObject.java | 1 -
.../fsshttpb/streamobj/IntermediateNodeObject.java | 1 -
.../onenote/fsshttpb/streamobj/JCIDObject.java | 1 -
.../onenote/fsshttpb/streamobj/LeafNodeObject.java | 1 -
.../onenote/fsshttpb/streamobj/NodeObject.java | 1 -
.../fsshttpb/streamobj/ObjectGroupData.java | 1 -
.../streamobj/ObjectGroupDataElementData.java | 1 -
.../streamobj/ObjectGroupDeclarations.java | 1 -
.../fsshttpb/streamobj/ObjectGroupMetadata.java | 1 -
.../streamobj/ObjectGroupMetadataDeclarations.java | 1 -
.../ObjectGroupObjectBLOBDataDeclaration.java | 1 -
.../fsshttpb/streamobj/ObjectGroupObjectData.java | 1 -
.../ObjectGroupObjectDataBLOBReference.java | 1 -
.../streamobj/ObjectGroupObjectDeclare.java | 1 -
.../onenote/fsshttpb/streamobj/PropertySet.java | 1 -
.../fsshttpb/streamobj/PropertySetObject.java | 1 -
.../fsshttpb/streamobj/RevisionManifest.java | 1 -
.../streamobj/RevisionManifestDataElementData.java | 1 -
.../RevisionManifestObjectGroupReferences.java | 1 -
.../streamobj/RevisionManifestRootDeclare.java | 1 -
.../fsshttpb/streamobj/RevisionStoreObject.java | 1 -
.../streamobj/RevisionStoreObjectGroup.java | 1 -
.../fsshttpb/streamobj/SignatureObject.java | 1 -
.../streamobj/StorageIndexCellMapping.java | 1 -
.../streamobj/StorageIndexDataElementData.java | 1 -
.../streamobj/StorageIndexManifestMapping.java | 1 -
.../streamobj/StorageIndexRevisionMapping.java | 1 -
.../streamobj/StorageManifestDataElementData.java | 1 -
.../streamobj/StorageManifestRootDeclare.java | 1 -
.../streamobj/StorageManifestSchemaGUID.java | 1 -
.../onenote/fsshttpb/streamobj/StreamObject.java | 1 -
.../fsshttpb/streamobj/StreamObjectHeaderEnd.java | 1 -
.../streamobj/StreamObjectHeaderEnd16bit.java | 1 -
.../streamobj/StreamObjectHeaderEnd8bit.java | 1 -
.../streamobj/StreamObjectHeaderStart.java | 1 -
.../streamobj/StreamObjectHeaderStart16bit.java | 1 -
.../streamobj/StreamObjectHeaderStart32bit.java | 1 -
.../streamobj/StreamObjectParseErrorException.java | 1 -
.../streamobj/StreamObjectTypeHeaderEnd.java | 1 -
.../streamobj/StreamObjectTypeHeaderStart.java | 1 -
.../fsshttpb/streamobj/basic/AdapterHelper.java | 1 -
.../streamobj/basic/AlternativePackaging.java | 1 -
.../fsshttpb/streamobj/basic/BasicObject.java | 1 -
.../fsshttpb/streamobj/basic/BinaryItem.java | 1 -
.../onenote/fsshttpb/streamobj/basic/CellID.java | 1 -
.../fsshttpb/streamobj/basic/CellIDArray.java | 1 -
.../fsshttpb/streamobj/basic/Compact64bitInt.java | 1 -
.../fsshttpb/streamobj/basic/CompactID.java | 1 -
.../fsshttpb/streamobj/basic/DataElementType.java | 1 -
.../streamobj/basic/DataNodeObjectData.java | 1 -
.../fsshttpb/streamobj/basic/ExGUIDArray.java | 1 -
.../onenote/fsshttpb/streamobj/basic/ExGuid.java | 1 -
.../fsshttpb/streamobj/basic/HeaderCell.java | 1 -
.../onenote/fsshttpb/streamobj/basic/JCID.java | 1 -
.../fsshttpb/streamobj/basic/PropertyID.java | 1 -
.../fsshttpb/streamobj/basic/PropertyType.java | 1 -
.../fsshttpb/streamobj/basic/RequestTypes.java | 1 -
.../fsshttpb/streamobj/basic/SerialNumber.java | 1 -
.../fsshttpb/streamobj/basic/ZipHeader.java | 1 -
.../streamobj/chunking/AbstractChunking.java | 1 -
.../streamobj/chunking/ChunkingFactory.java | 1 -
.../streamobj/chunking/ChunkingMethod.java | 1 -
.../streamobj/chunking/RDCAnalysisChunking.java | 1 -
.../streamobj/chunking/SimpleChunking.java | 1 -
.../streamobj/chunking/ZipFilesChunking.java | 1 -
.../streamobj/space/ObjectSpaceObjectPropSet.java | 1 -
.../space/ObjectSpaceObjectStreamHeader.java | 1 -
.../space/ObjectSpaceObjectStreamOfContextIDs.java | 1 -
.../space/ObjectSpaceObjectStreamOfOIDs.java | 1 -
.../space/ObjectSpaceObjectStreamOfOSIDs.java | 1 -
.../microsoft/onenote/fsshttpb/util/Bit.java | 1 -
.../onenote/fsshttpb/util/BitConverter.java | 1 -
.../microsoft/onenote/fsshttpb/util/BitReader.java | 1 -
.../microsoft/onenote/fsshttpb/util/BitWriter.java | 1 -
.../microsoft/onenote/fsshttpb/util/ByteUtil.java | 1 -
.../onenote/fsshttpb/util/DataElementUtils.java | 1 -
.../microsoft/onenote/fsshttpb/util/GuidUtil.java | 1 -
.../fsshttpb/util/LittleEndianBitConverter.java | 1 -
.../fsshttpb/util/SequenceNumberGenerator.java | 1 -
.../microsoft/onenote/fsshttpb/util/UuidUtils.java | 1 -
.../microsoft/ooxml/OOXMLTikaBodyPartHandler.java | 1 -
.../ooxml/OOXMLWordAndPowerPointTextHandler.java | 1 -
.../microsoft/ooxml/ParagraphProperties.java | 1 -
.../tika/parser/microsoft/ooxml/RunProperties.java | 1 -
.../microsoft/ooxml/xps/XPSExtractorDecorator.java | 1 -
.../microsoft/ooxml/xps/XPSTextExtractor.java | 1 -
.../xslf/XSLFEventBasedPowerPointExtractor.java | 1 -
.../ooxml/xwpf/XWPFEventBasedWordExtractor.java | 2 -
.../microsoft/ooxml/xwpf/XWPFNumberingShim.java | 1 -
.../microsoft/ooxml/xwpf/XWPFStylesShim.java | 1 -
.../ooxml/xwpf/ml2006/BinaryDataHandler.java | 1 -
.../ooxml/xwpf/ml2006/CorePropertiesHandler.java | 1 -
.../xwpf/ml2006/ExtendedPropertiesHandler.java | 1 -
.../microsoft/ooxml/xwpf/ml2006/Relationship.java | 1 -
.../ooxml/xwpf/ml2006/RelationshipsHandler.java | 1 -
.../ooxml/xwpf/ml2006/RelationshipsManager.java | 1 -
.../ooxml/xwpf/ml2006/Word2006MLDocHandler.java | 1 -
.../ooxml/xwpf/ml2006/Word2006MLParser.java | 1 -
.../parser/microsoft/rtf/RTFEmbObjHandler.java | 2 +-
.../parser/microsoft/rtf/RTFObjDataParser.java | 5 +-
.../tika/parser/microsoft/rtf/TextExtractor.java | 1 -
.../parser/microsoft/xml/HyperlinkHandler.java | 1 -
.../tika/parser/microsoft/EMFParserTest.java | 1 -
.../tika/parser/microsoft/JackcessParserTest.java | 1 -
.../tika/parser/microsoft/WMFParserTest.java | 1 -
.../microsoft/chm/TestChmLzxcResetTable.java | 1 -
.../parser/microsoft/ooxml/SXSLFExtractorTest.java | 1 -
.../parser/microsoft/ooxml/SXWPFExtractorTest.java | 1 -
.../ooxml/xwpf/ml2006/Word2006MLParserTest.java | 1 -
.../tika/parser/wordperfect/WPInputStream.java | 1 -
.../tika/parser/indesign/IDMLParserTest.java | 1 -
.../org/apache/tika/parser/mif/MIFParserTest.java | 1 -
.../tika/parser/wordperfect/WPInputStreamTest.java | 1 -
.../apache/tika/parser/ocr/TesseractOCRParser.java | 1 -
.../apache/tika/parser/ocr/tess4j/ImageDeskew.java | 7 ++-
.../apache/tika/parser/ocr/tess4j/ImageUtil.java | 4 --
.../org/apache/tika/parser/pdf/AccessChecker.java | 1 -
.../java/org/apache/tika/parser/pdf/OCR2XHTML.java | 1 -
.../java/org/apache/tika/parser/pdf/PDF2XHTML.java | 1 -
.../tika/parser/pdf/PDFEncodedStringDecoder.java | 1 -
.../tika/parser/pdf/updates/StartXRefScanner.java | 3 +-
.../renderer/pdf/pdfbox/NoTextPDFRenderer.java | 2 +-
.../renderer/pdf/pdfbox/TextOnlyPDFRenderer.java | 2 +-
.../pdf/pdfbox/VectorGraphicsOnlyPDFRenderer.java | 2 +-
.../apache/tika/parser/pdf/PDFRenderingTest.java | 1 -
.../org/apache/tika/parser/pkg/ArParserTest.java | 1 -
.../tika/parser/pkg/CompressorParserTest.java | 1 -
.../apache/tika/parser/pkg/PackageParserTest.java | 1 -
.../java/org/apache/tika/parser/csv/CSVParams.java | 2 +-
.../java/org/apache/tika/parser/csv/CSVResult.java | 2 +-
.../org/apache/tika/parser/csv/CSVSniffer.java | 3 +-
.../apache/tika/parser/csv/TextAndCSVConfig.java | 2 +-
.../apache/tika/parser/csv/TextAndCSVParser.java | 2 +-
.../tika/parser/strings/Latin1StringsParser.java | 2 +-
.../apache/tika/parser/strings/StringsConfig.java | 2 +-
.../tika/parser/strings/StringsEncoding.java | 2 +-
.../apache/tika/parser/strings/StringsParser.java | 2 +-
.../org/apache/tika/parser/csv/CSVSnifferTest.java | 2 +-
.../tika/parser/csv/TextAndCSVParserTest.java | 2 +-
.../parser/strings/Latin1StringsParserTest.java | 2 +-
.../tika/parser/strings/StringsConfigTest.java | 2 +-
.../tika/parser/strings/StringsParserTest.java | 2 +-
.../org/apache/tika/parser/http/HttpParser.java | 2 +-
.../org/apache/tika/parser/wacz/WACZParser.java | 2 +-
.../org/apache/tika/parser/warc/WARCParser.java | 2 +-
.../apache/tika/parser/http/HttpParserTest.java | 2 +-
.../apache/tika/parser/wacz/WACZParserTest.java | 2 +-
.../apache/tika/parser/warc/WARCParserTest.java | 2 +-
.../apache/tika/parser/tmx/TMXContentHandler.java | 1 -
.../apache/tika/parser/xmp/XMPPacketScanner.java | 8 ++-
.../java/org/apache/tika/TikaLoaderHelper.java | 2 +-
.../tika/config/TikaEncodingDetectorTest.java | 1 -
.../org/apache/tika/detect/TestZipDetector.java | 1 -
.../tika/extractor/EmbeddedDocumentUtilTest.java | 1 -
.../org/apache/tika/parser/ParsingReaderTest.java | 2 +-
.../org/apache/tika/parser/TabularFormatsTest.java | 2 +-
.../apache/tika/parser/TestXMLEntityExpansion.java | 2 +-
.../java/org/apache/tika/parser/TestXXEInXML.java | 2 +-
.../java/org/apache/tika/parser/XMLTestBase.java | 2 +-
.../parser/apple/AppleSingleFileParserTest.java | 1 -
.../tika/parser/microsoft/XML2003ParserTest.java | 1 -
.../org/apache/tika/parser/pkg/ArParserTest.java | 1 -
.../pkg/CompositeZipContainerDetectorTest.java | 1 -
.../tika/parser/pkg/CompressorParserTest.java | 1 -
.../sax/PhoneExtractingContentHandlerTest.java | 1 -
.../sax/StandardsExtractingContentHandlerTest.java | 1 -
.../apache/tika/utils/ServiceLoaderUtilsTest.java | 1 -
.../api/pipesiterator/PipesIteratorBaseConfig.java | 2 -
.../org/apache/tika/pipes/core/PipesConfig.java | 1 -
.../tika/pipes/core/emitter/EmitterManager.java | 3 +-
.../BasicEmbeddedDocumentBytesHandler.java | 1 -
.../tika/pipes/core/fetcher/FetcherManager.java | 3 +-
.../core/pipesiterator/PipesIteratorManager.java | 3 +-
.../tika/pipes/core/reporter/NoOpReporter.java | 1 -
.../tika/pipes/core/reporter/ReporterManager.java | 3 +-
.../tika/pipes/core/TikaPipesConfigTest.java | 6 +--
.../tika/pipes/core/TikaPipesConfigTest.java | 8 +--
.../reporter/fs/TestFileSystemStatusReporter.java | 1 -
.../pipesiterator/json/TestJsonPipesIterator.java | 1 -
.../apache/tika/plugins/PluginComponentLoader.java | 2 -
.../org/apache/tika/plugins/TikaPluginManager.java | 13 ++++-
.../loader/PolymorphicObjectMapperFactory.java | 4 --
.../serialization/PrettyMetadataKeyComparator.java | 1 -
.../META-INF/tika-serialization-allowlist.txt | 16 +++---
.../server/core/DefaultInputStreamFactory.java | 1 -
.../org/apache/tika/server/core/HTMLHelper.java | 1 -
.../tika/server/core/InputStreamFactory.java | 1 -
.../tika/server/core/ParseContextConfig.java | 16 ------
.../server/core/ProduceTypeResourceComparator.java | 1 -
.../tika/server/core/ServerStatusWatcher.java | 1 -
.../apache/tika/server/core/TikaLoggingFilter.java | 1 -
.../org/apache/tika/server/core/TikaServerCli.java | 1 -
.../apache/tika/server/core/TikaServerProcess.java | 1 -
.../tika/server/core/resource/AsyncResource.java | 1 -
.../server/core/resource/DetectorResource.java | 1 -
.../server/core/resource/LanguageResource.java | 1 -
.../server/core/resource/MetadataResource.java | 1 -
.../tika/server/core/resource/PipesResource.java | 1 -
.../core/resource/RecursiveMetadataResource.java | 1 -
.../tika/server/core/resource/TikaResource.java | 1 -
.../server/core/resource/TranslateResource.java | 1 -
.../server/core/resource/UnpackerResource.java | 1 -
.../server/core/writer/CSVMessageBodyWriter.java | 1 -
.../server/core/writer/JSONMessageBodyWriter.java | 1 -
.../tika/server/core/writer/JSONObjWriter.java | 1 -
.../core/writer/MetadataListMessageBodyWriter.java | 1 -
.../apache/tika/server/core/writer/TarWriter.java | 1 -
.../server/core/writer/TextMessageBodyWriter.java | 2 -
.../apache/tika/server/core/writer/ZipWriter.java | 1 -
.../org/apache/tika/server/core/CXFTestBase.java | 1 -
.../tika/server/core/LanguageResourceTest.java | 1 -
.../server/core/RecursiveMetadataResourceTest.java | 1 -
.../apache/tika/server/core/StackTraceOffTest.java | 6 +--
.../apache/tika/server/core/StackTraceTest.java | 6 +--
.../apache/tika/server/core/TikaMimeTypesTest.java | 1 -
.../org/apache/tika/server/core/TikaPipesTest.java | 6 +--
.../tika/server/core/TikaResourceFetcherTest.java | 1 +
.../core/TikaResourceMetadataFilterTest.java | 1 -
.../tika/server/core/TikaResourceNoStackTest.java | 1 -
.../apache/tika/server/core/TikaResourceTest.java | 1 -
.../apache/tika/server/core/TikaVersionTest.java | 1 -
.../apache/tika/server/core/TikaWelcomeTest.java | 1 -
.../tika/server/core/TranslateResourceTest.java | 2 -
.../tika/server/eval/TikaEvalResourceTest.java | 1 -
.../standard/writer/XMPMessageBodyWriter.java | 1 -
.../tika/server/standard/DetectorResourceTest.java | 1 -
.../apache/tika/server/standard/FetcherTest.java | 1 -
.../server/standard/JsonMaxFieldLengthTest.java | 3 +-
.../tika/server/standard/MetadataResourceTest.java | 2 -
.../server/standard/OpenNLPMetadataFilterTest.java | 1 -
.../standard/OptimaizeMetadataFilterTest.java | 1 -
.../standard/RecursiveMetadataFilterTest.java | 1 -
.../standard/RecursiveMetadataResourceTest.java | 1 -
.../tika/server/standard/TikaDetectorsTest.java | 1 -
.../tika/server/standard/TikaParsersTest.java | 1 -
.../apache/tika/server/standard/TikaPipesTest.java | 6 +--
.../tika/server/standard/TikaResourceTest.java | 1 -
.../tika/server/standard/UnpackerResourceTest.java | 1 -
.../standard/UnpackerResourceWithConfigTest.java | 1 -
tika-translate/pom.xml | 14 ++++++
.../language/translate/impl/CachedTranslator.java | 1 -
.../translate/impl/ExternalTranslator.java | 1 -
.../language/translate/impl/GoogleTranslator.java | 1 -
.../language/translate/impl/Lingo24Translator.java | 1 -
.../language/translate/impl/MarianTranslator.java | 1 -
.../translate/impl/MicrosoftTranslator.java | 1 -
.../language/translate/impl/MosesTranslator.java | 1 -
.../language/translate/impl/RTGTranslator.java | 8 ++-
.../language/translate/impl/YandexTranslator.java | 1 -
.../translate/impl/CachedTranslatorTest.java | 1 -
.../translate/impl/GoogleTranslatorTest.java | 1 -
.../translate/impl/Lingo24TranslatorTest.java | 1 -
.../language/translate/impl/RTGTranslatorTest.java | 7 ++-
510 files changed, 214 insertions(+), 639 deletions(-)
copy HEADER.txt => tika-parent/license-header.txt (99%)
copy
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/log4j.properties
=>
tika-serialization/src/test/resources/META-INF/tika-serialization-allowlist.txt
(68%)