This is an automated email from the git hooks/post-receive script. apo-guest pushed a commit to branch master in repository tika.
commit 5555afe598590622a0b2aa5e4275ad9dc785e36a Merge: 9bd7743 c3b0fed Author: Markus Koschany <[email protected]> Date: Mon Nov 30 15:20:22 2015 +0000 Merge tag 'upstream/1.11_rc1' Upstream version 1.11~rc1 # gpg: Signature made Mon Nov 30 15:20:18 2015 UTC using RSA key ID 513B51E4 # gpg: Good signature from "Markus Koschany <[email protected]>" [ultimate] # gpg: aka "Markus Koschany <[email protected]>" [ultimate] # gpg: aka "Markus Koschany <[email protected]>" [ultimate] .gitignore | 3 + CHANGES.txt | 456 ++ KEYS | 111 +- LICENSE.txt | 48 + NOTICE.txt | 6 +- README.md | 85 + README.txt | 102 - pom.xml | 112 +- src/site/apt/detection.apt | 152 - src/site/apt/formats.apt | 145 - src/site/apt/gettingstarted.apt | 208 - src/site/apt/index.apt | 31 - src/site/apt/parser.apt | 245 - src/site/apt/parser_guide.apt | 135 - src/site/resources/css/site.css | 324 -- src/site/resources/tika.png | Bin 10203 -> 0 bytes src/site/resources/tika.svg | 5318 -------------------- src/site/resources/tikaNoText.svg | 5305 ------------------- src/site/resources/tikaNoText16.png | Bin 641 -> 0 bytes src/site/resources/tikaNoText32.png | Bin 1768 -> 0 bytes src/site/resources/tikaNoText64.png | Bin 5552 -> 0 bytes src/site/site.vm | 283 -- src/site/site.xml | 47 - tika-app/pom.xml | 90 +- .../src/main/appended-resources/META-INF/LICENSE | 5 + .../batch/DigestingAutoDetectParserFactory.java | 36 +- .../batch/builders/AppParserFactoryBuilder.java | 76 + .../apache/tika/cli/BatchCommandLineBuilder.java | 209 + .../src/main/java/org/apache/tika/cli/TikaCLI.java | 526 +- .../src/main/java/org/apache/tika/gui/TikaGUI.java | 140 +- .../src/main/resources/log4j.properties | 12 +- .../main/resources/log4j_batch_process.properties | 12 +- .../src/main/resources/tika-app-batch-config.xml | 136 + .../tika/cli/TikaCLIBatchCommandLineTest.java | 207 + .../tika/cli/TikaCLIBatchIntegrationTest.java | 174 + .../test/java/org/apache/tika/cli/TikaCLITest.java | 213 +- {tika-core => tika-batch}/pom.xml | 129 +- .../apache/tika/batch/AutoDetectParserFactory.java | 18 +- .../org/apache/tika/batch/BatchNoRestartError.java | 20 +- .../java/org/apache/tika/batch/BatchProcess.java | 597 +++ .../apache/tika/batch/BatchProcessDriverCLI.java | 403 ++ .../org/apache/tika/batch/ConsumersManager.java | 80 + .../tika/batch/FileConsumerFutureResult.java | 28 +- .../java/org/apache/tika/batch/FileResource.java | 68 + .../apache/tika/batch/FileResourceConsumer.java | 429 ++ .../org/apache/tika/batch/FileResourceCrawler.java | 270 + .../batch/FileResourceCrawlerFutureResult.java | 28 +- .../java/org/apache/tika/batch/FileStarted.java | 113 + .../tika/batch/IFileProcessorFutureResult.java | 13 +- .../java/org/apache/tika/batch/Interrupter.java | 59 + .../apache/tika/batch/InterrupterFutureResult.java | 13 +- .../org/apache/tika/batch/OutputStreamFactory.java | 16 +- .../tika/batch/ParallelFileProcessingResult.java | 109 + .../java/org/apache/tika/batch/ParserFactory.java | 25 +- .../org/apache/tika/batch/PoisonFileResource.java | 37 +- .../java/org/apache/tika/batch/StatusReporter.java | 227 + .../tika/batch/StatusReporterFutureResult.java | 12 +- .../batch/builders/AbstractConsumersBuilder.java | 28 +- .../tika/batch/builders/BatchProcessBuilder.java | 295 ++ .../batch/builders/CommandLineParserBuilder.java | 143 + .../DefaultContentHandlerFactoryBuilder.java | 58 + .../builders/IContentHandlerFactoryBuilder.java | 18 +- .../tika/batch/builders/ICrawlerBuilder.java | 20 +- .../tika/batch/builders/IParserFactoryBuilder.java | 15 +- .../tika/batch/builders/InterrupterBuilder.java | 19 +- .../builders/ObjectFromDOMAndQueueBuilder.java | 21 +- .../tika/batch/builders/ObjectFromDOMBuilder.java | 18 +- .../tika/batch/builders/ParserFactoryBuilder.java | 49 + .../tika/batch/builders/ReporterBuilder.java | 19 +- .../batch/builders/SimpleLogReporterBuilder.java | 43 + .../tika/batch/builders/StatusReporterBuilder.java | 23 +- .../apache/tika/batch/fs/AbstractFSConsumer.java | 78 + .../apache/tika/batch/fs/BasicTikaFSConsumer.java | 126 + .../apache/tika/batch/fs/FSBatchProcessCLI.java | 160 + .../apache/tika/batch/fs/FSConsumersManager.java | 30 +- .../apache/tika/batch/fs/FSDirectoryCrawler.java | 165 + .../apache/tika/batch/fs/FSDocumentSelector.java | 83 + .../org/apache/tika/batch/fs/FSFileResource.java | 130 + .../org/apache/tika/batch/fs/FSListCrawler.java | 118 + .../tika/batch/fs/FSOutputStreamFactory.java | 114 + .../org/apache/tika/batch/fs/FSProperties.java | 22 +- .../main/java/org/apache/tika/batch/fs/FSUtil.java | 211 + .../batch/fs/RecursiveParserWrapperFSConsumer.java | 159 + .../fs/builders/BasicTikaFSConsumersBuilder.java | 207 + .../tika/batch/fs/builders/FSCrawlerBuilder.java | 141 + .../batch/fs/strawman/StrawManTikaAppDriver.java | 254 + .../java/org/apache/tika/util/ClassLoaderUtil.java | 41 +- .../org/apache/tika/util/DurationFormatUtils.java | 66 + .../main/java/org/apache/tika/util/PropsUtil.java | 149 + .../main/java/org/apache/tika/util/XMLDOMUtil.java | 109 + tika-batch/src/main/java/overview.html | 41 + .../tika/batch/fs/default-tika-batch-config.xml | 127 + .../tika/batch/CommandLineParserBuilderTest.java | 34 +- .../RecursiveParserWrapperFSConsumerTest.java | 149 + .../org/apache/tika/batch/fs/BatchDriverTest.java | 210 + .../org/apache/tika/batch/fs/BatchProcessTest.java | 369 ++ .../org/apache/tika/batch/fs/FSBatchTestBase.java | 301 ++ .../apache/tika/batch/fs/FSFileResourceTest.java | 49 + .../java/org/apache/tika/batch/fs/FSUtilTest.java | 41 +- .../apache/tika/batch/fs/HandlerBuilderTest.java | 120 + .../tika/batch/fs/OutputStreamFactoryTest.java | 101 + .../apache/tika/batch/fs/StringStreamGobbler.java | 64 + .../tika/batch/fs/strawman/StrawmanTest.java | 17 +- .../tika/batch/mock/MockConsumersBuilder.java | 38 + .../tika/batch/mock/MockConsumersManager.java | 77 + .../apache/tika/parser/mock/MockParserFactory.java | 21 +- tika-bundle/pom.xml | 289 +- .../test/java/org/apache/tika/bundle/BundleIT.java | 202 +- tika-core/pom.xml | 39 +- tika-core/src/main/java/org/apache/tika/Tika.java | 201 +- .../ConfigurableThreadPoolExecutor.java} | 61 +- .../tika/concurrent/SimpleThreadPoolExecutor.java | 76 +- .../org/apache/tika/config/LoadErrorHandler.java | 13 +- .../java/org/apache/tika/config/ServiceLoader.java | 72 +- .../java/org/apache/tika/config/TikaConfig.java | 722 ++- .../org/apache/tika/detect/CompositeDetector.java | 32 +- .../org/apache/tika/detect/DefaultDetector.java | 33 +- .../apache/tika/detect/DefaultProbDetector.java | 80 + .../java/org/apache/tika/detect/MagicDetector.java | 14 +- .../apache/tika/detect/NNExampleModelDetector.java | 160 + .../org/apache/tika/detect/NNTrainedModel.java | 103 + .../apache/tika/detect/NNTrainedModelBuilder.java | 76 + .../java/org/apache/tika/detect/NameDetector.java | 4 +- .../TrainedModel.java} | 15 +- .../apache/tika/detect/TrainedModelDetector.java | 176 + .../org/apache/tika/detect/XmlRootExtractor.java | 10 +- .../org/apache/tika/embedder/ExternalEmbedder.java | 4 +- .../tika/exception/AccessPermissionException.java | 31 +- .../tika/extractor/ParserContainerExtractor.java | 5 +- .../ParsingEmbeddedDocumentExtractor.java | 9 +- .../main/java/org/apache/tika/fork/ForkClient.java | 25 +- .../main/java/org/apache/tika/fork/ForkParser.java | 48 +- .../main/java/org/apache/tika/io/EndianUtils.java | 21 + .../java/org/apache/tika/io/FilenameUtils.java | 42 +- .../src/main/java/org/apache/tika/io/IOUtils.java | 28 +- .../org/apache/tika/io/LookaheadInputStream.java | 5 +- .../java/org/apache/tika/io/TaggedInputStream.java | 2 +- .../org/apache/tika/io/TemporaryResources.java | 84 +- .../java/org/apache/tika/io/TikaInputStream.java | 149 +- .../apache/tika/language/LanguageIdentifier.java | 13 +- .../org/apache/tika/language/LanguageProfile.java | 159 + .../tika/language/LanguageProfilerBuilder.java | 9 +- .../tika/language/translate/DefaultTranslator.java | 119 + .../tika/language/translate/EmptyTranslator.java | 26 +- .../apache/tika/language/translate/Translator.java | 71 + .../apache/tika/metadata/AccessPermissions.java | 71 + .../{TikaMetadataKeys.java => Database.java} | 20 +- .../main/java/org/apache/tika/metadata/IPTC.java | 2 +- .../java/org/apache/tika/metadata/Metadata.java | 25 +- .../tika/metadata/OfficeOpenXMLExtended.java | 5 +- .../java/org/apache/tika/metadata/PagedText.java | 2 +- .../java/org/apache/tika/metadata/Photoshop.java | 8 + .../java/org/apache/tika/metadata/RTFMetadata.java | 46 + .../main/java/org/apache/tika/metadata/TIFF.java | 2 +- .../apache/tika/metadata/TikaCoreProperties.java | 48 + .../org/apache/tika/metadata/TikaMetadataKeys.java | 3 + .../main/java/org/apache/tika/metadata/XMPDM.java | 17 +- .../org/apache/tika/mime/MediaTypeRegistry.java | 33 +- .../main/java/org/apache/tika/mime/MimeType.java | 3 +- .../main/java/org/apache/tika/mime/MimeTypes.java | 153 +- .../java/org/apache/tika/mime/MimeTypesReader.java | 2 +- .../mime/ProbabilisticMimeDetectionSelector.java | 539 ++ .../org/apache/tika/parser/AutoDetectParser.java | 4 +- .../org/apache/tika/parser/CompositeParser.java | 70 +- .../java/org/apache/tika/parser/DefaultParser.java | 41 +- .../org/apache/tika/parser/DigestingParser.java | 76 + .../java/org/apache/tika/parser/EmptyParser.java | 2 - .../java/org/apache/tika/parser/ErrorParser.java | 4 +- .../java/org/apache/tika/parser/NetworkParser.java | 10 +- .../org/apache/tika/parser/ParserDecorator.java | 101 +- .../java/org/apache/tika/parser/ParsingReader.java | 14 + .../apache/tika/parser/RecursiveParserWrapper.java | 357 ++ .../tika/parser/external/ExternalParser.java | 83 +- .../external/ExternalParsersConfigReader.java | 5 +- .../parser/external/ExternalParsersFactory.java | 5 +- .../tika/sax/BasicContentHandlerFactory.java | 156 + .../java/org/apache/tika/sax/CleanPhoneText.java | 286 ++ .../ContentHandlerFactory.java} | 19 +- .../org/apache/tika/sax/DIFContentHandler.java | 152 + .../tika/sax/PhoneExtractingContentHandler.java | 111 + .../org/apache/tika/sax/ToTextContentHandler.java | 3 +- .../apache/tika/sax/WriteOutContentHandler.java | 3 +- .../org/apache/tika/sax/XHTMLContentHandler.java | 2 +- .../org/apache/tika/utils/ConcurrentUtils.java | 57 + .../main/java/org/apache/tika/utils/DateUtils.java | 36 +- .../java/org/apache/tika/utils/ExceptionUtils.java | 90 + .../org/apache/tika/utils/ServiceLoaderUtils.java | 48 + .../org/apache/tika/detect/tika-example.nnmodel | 2 + .../main/resources/org/apache/tika/language/fa.ngp | 1001 ++++ .../apache/tika/language/tika.language.properties | 3 +- .../org/apache/tika/mime/tika-mimetypes.xml | 1004 +++- .../java/org/apache/tika/TikaDetectionTest.java | 18 +- .../src/test/java/org/apache/tika/TikaTest.java | 86 +- .../org/apache/tika/TypeDetectionBenchmark.java | 11 +- .../apache/tika/config/AbstractTikaConfigTest.java | 50 + .../java/org/apache/tika/config/DummyExecutor.java | 64 +- .../java/org/apache/tika/config/DummyParser.java | 28 +- .../org/apache/tika/config/TikaConfigTest.java | 180 +- .../org/apache/tika/detect/MagicDetectorTest.java | 35 +- .../tika/detect/MimeDetectionWithNNTest.java | 140 + .../org/apache/tika/detect/TextDetectorTest.java | 5 +- .../java/org/apache/tika/io/EndianUtilsTest.java | 36 +- .../java/org/apache/tika/io/FilenameUtilsTest.java | 27 +- .../java/org/apache/tika/io/TailStreamTest.java | 15 +- .../org/apache/tika/io/TemporaryResourcesTest.java | 34 +- .../org/apache/tika/io/TikaInputStreamTest.java | 65 +- .../tika/language/LanguageIdentifierTest.java | 61 +- .../tika/language/LanguageProfilerBuilderTest.java | 24 +- .../org/apache/tika/metadata/TestMetadata.java | 9 +- .../org/apache/tika/mime/MimeDetectionTest.java | 42 +- .../org/apache/tika/mime/MimeTypesReaderTest.java | 88 +- ...st.java => ProbabilisticMimeDetectionTest.java} | 135 +- ...=> ProbabilisticMimeDetectionTestWithTika.java} | 165 +- .../apache/tika/parser/CompositeParserTest.java | 1 + .../java/org/apache/tika/parser/DummyParser.java | 10 +- .../apache/tika/parser/ParserDecoratorTest.java | 120 + .../org/apache/tika/parser/mock/MockParser.java | 359 ++ .../tika/sax/BasicContentHandlerFactoryTest.java | 341 ++ .../apache/tika/sax/BodyContentHandlerTest.java | 3 +- .../apache/tika/sax/XHTMLContentHandlerTest.java | 20 + .../org/apache/tika/utils/ConcurrentUtilsTest.java | 63 + tika-dotnet/pom.xml | 55 +- tika-example/pom.xml | 134 + .../apache/tika/example/AdvancedTypeDetector.java | 56 + .../apache/tika/example/ContentHandlerExample.java | 137 + .../org/apache/tika/example/CustomMimeInfo.java | 49 + .../org/apache/tika/example/DescribeMetadata.java | 18 +- .../org/apache/tika/example/DirListParser.java | 143 + .../apache/tika/example/DisplayMetInstance.java | 38 +- .../apache/tika/example/DumpTikaConfigExample.java | 314 ++ .../example/EncryptedPrescriptionDetector.java | 59 + .../tika/example/EncryptedPrescriptionParser.java | 51 +- .../apache/tika/example/ExtractEmbeddedFiles.java | 106 + .../tika/example/GrabPhoneNumbersExample.java | 103 + .../org/apache/tika/example/ImportContextImpl.java | 235 + .../tika/example/InterruptableParsingExample.java | 92 + .../java/org/apache/tika/example/Language.java | 58 + .../tika/example/LanguageDetectingParser.java | 48 +- .../tika/example/LanguageIdentifierExample.java | 19 +- .../tika/example/LazyTextExtractorField.java | 210 + .../org/apache/tika/example/LuceneIndexer.java | 37 +- .../apache/tika/example/LuceneIndexerExtended.java | 65 + .../org/apache/tika/example/MediaTypeExample.java | 58 + .../tika/example/MetadataAwareLuceneIndexer.java | 88 + .../java/org/apache/tika/example/MyFirstTika.java | 116 + .../org/apache/tika/example/ParsingExample.java | 217 + .../java/org/apache/tika/example/Pharmacy.java | 19 +- .../apache/tika/example/PrescriptionParser.java | 49 +- .../java/org/apache/tika/example/RecentFiles.java | 145 + .../org/apache/tika/example/RollbackSoftware.java | 137 + .../apache/tika/example/SimpleTextExtractor.java | 24 +- .../apache/tika/example/SimpleTypeDetector.java | 21 +- .../org/apache/tika/example/SpringExample.java | 34 +- .../org/apache/tika/example/TIAParsingExample.java | 201 + .../org/apache/tika/example/TranslatorExample.java | 26 +- .../apache/tika/example/TrecDocumentGenerator.java | 107 + .../java/org/apache/tika/example/ZipListFiles.java | 40 +- .../resources/org/apache/tika/example/spring.xml | 36 + .../resources/org/apache/tika/example/test.doc | Bin 0 -> 9216 bytes .../resources/org/apache/tika/example/test2.doc | Bin 0 -> 10752 bytes .../tika/example/test_recursive_embedded.docx | Bin 0 -> 27082 bytes .../tika/example/AdvancedTypeDetectorTest.java | 20 +- .../tika/example/ContentHandlerExampleTest.java | 105 + .../tika/example/DumpTikaConfigExampleTest.java | 90 + .../tika/example/ExtractEmbeddedFilesTest.java | 62 + .../example/LanguageIdentifierExampleTest.java | 25 +- .../tika/example/SimpleTextExtractorTest.java | 48 + .../tika/example/SimpleTypeDetectorTest.java | 43 + .../apache/tika/example/TestParsingExample.java | 102 + .../apache/tika/example/TranslatorExampleTest.java | 36 +- tika-java7/pom.xml | 43 +- .../filetypedetector/TikaFileTypeDetector.java | 3 +- tika-parent/pom.xml | 141 +- tika-parsers/pom.xml | 246 +- .../src/main/appended-resources/META-INF/LICENSE | 57 + .../apache/tika/parser/asm/XHTMLClassVisitor.java | 2 +- .../org/apache/tika/parser/audio/MidiParser.java | 4 +- .../java/org/apache/tika/parser/chm/ChmParser.java | 39 +- .../chm/accessor/ChmDirectoryListingSet.java | 283 +- .../tika/parser/chm/accessor/ChmItsfHeader.java | 21 +- .../tika/parser/chm/accessor/ChmItspHeader.java | 44 +- .../parser/chm/accessor/ChmLzxcControlData.java | 20 +- .../parser/chm/accessor/ChmLzxcResetTable.java | 15 +- .../tika/parser/chm/accessor/ChmPmgiHeader.java | 36 +- .../tika/parser/chm/accessor/ChmPmglHeader.java | 67 +- .../parser/chm/accessor/DirectoryListingEntry.java | 5 +- .../apache/tika/parser/chm/core/ChmCommons.java | 23 +- .../apache/tika/parser/chm/core/ChmConstants.java | 4 +- .../apache/tika/parser/chm/core/ChmExtractor.java | 54 +- .../apache/tika/parser/chm/lzx/ChmBlockInfo.java | 10 +- .../apache/tika/parser/chm/lzx/ChmLzxBlock.java | 223 +- .../apache/tika/parser/chm/lzx/ChmLzxState.java | 30 +- .../org/apache/tika/parser/chm/lzx/ChmSection.java | 31 +- .../apache/tika/parser/code/SourceCodeParser.java | 37 +- .../org/apache/tika/parser/crypto/Pkcs7Parser.java | 20 +- .../CTAKESAnnotationProperty.java} | 39 +- .../apache/tika/parser/ctakes/CTAKESConfig.java | 336 ++ .../tika/parser/ctakes/CTAKESContentHandler.java | 176 + .../apache/tika/parser/ctakes/CTAKESParser.java | 92 + .../CTAKESSerializer.java} | 31 +- .../org/apache/tika/parser/ctakes/CTAKESUtils.java | 265 + .../apache/tika/parser/dif/DIFContentHandler.java | 152 + .../java/org/apache/tika/parser/dif/DIFParser.java | 86 + .../apache/tika/parser/envi/EnviHeaderParser.java | 84 + .../apache/tika/parser/epub/EpubContentParser.java | 2 +- .../org/apache/tika/parser/epub/EpubParser.java | 6 +- .../tika/parser/executable/MachineMetadata.java | 36 +- .../org/apache/tika/parser/feed/FeedParser.java | 4 +- .../tika/parser/font/AdobeFontMetricParser.java | 33 +- .../apache/tika/parser/font/TrueTypeParser.java | 58 +- .../org/apache/tika/parser/gdal/GDALParser.java | 415 ++ .../apache/tika/parser/geo/topic/GeoParser.java | 155 + .../tika/parser/geo/topic/GeoParserConfig.java | 54 + .../org/apache/tika/parser/geo/topic/GeoTag.java | 65 + .../tika/parser/geo/topic/NameEntityExtractor.java | 127 + .../geoinfo/GeographicInformationParser.java | 391 ++ .../NetCDFParser.java => grib/GribParser.java} | 104 +- .../java/org/apache/tika/parser/hdf/HDFParser.java | 8 +- .../tika/parser/html/BoilerpipeContentHandler.java | 189 +- .../apache/tika/parser/html/DefaultHtmlMapper.java | 30 +- .../tika/parser/html/HtmlEncodingDetector.java | 45 +- .../org/apache/tika/parser/html/HtmlHandler.java | 31 +- .../org/apache/tika/parser/html/HtmlMapper.java | 14 +- .../org/apache/tika/parser/html/HtmlParser.java | 80 +- .../tika/parser/html/IdentityHtmlMapper.java | 2 +- .../tika/parser/html/XHTMLDowngradeHandler.java | 3 +- .../org/apache/tika/parser/image/BPGParser.java | 177 + .../tika/parser/image/ImageMetadataExtractor.java | 367 +- .../org/apache/tika/parser/image/ImageParser.java | 179 +- .../apache/tika/parser/image/MetadataFields.java | 22 +- .../org/apache/tika/parser/image/PSDParser.java | 194 +- .../org/apache/tika/parser/image/TiffParser.java | 6 +- .../image/{TiffParser.java => WebPParser.java} | 14 +- .../tika/parser/image/xmp/JempboxExtractor.java | 14 +- .../tika/parser/image/xmp/XMPPacketScanner.java | 79 +- .../org/apache/tika/parser/internal/Activator.java | 5 +- .../apache/tika/parser/iptc/IptcAnpaParser.java | 41 +- .../org/apache/tika/parser/isatab/ISATabUtils.java | 209 + .../apache/tika/parser/isatab/ISArchiveParser.java | 136 + .../tika/parser/iwork/AutoPageNumberUtils.java | 6 +- .../tika/parser/iwork/IWorkPackageParser.java | 13 +- .../tika/parser/iwork/PagesContentHandler.java | 1 - .../apache/tika/parser/jdbc/AbstractDBParser.java | 189 + .../apache/tika/parser/jdbc/JDBCTableReader.java | 302 ++ .../apache/tika/parser/jdbc/SQLite3DBParser.java | 110 + .../org/apache/tika/parser/jdbc/SQLite3Parser.java | 80 + .../tika/parser/jdbc/SQLite3TableReader.java | 109 + .../tika/parser/journal/GrobidRESTParser.java | 112 + .../TiffParser.java => journal/JournalParser.java} | 57 +- .../org/apache/tika/parser/journal/TEIParser.java | 893 ++++ .../org/apache/tika/parser/jpeg/JpegParser.java | 6 +- .../tika/parser/mail/MailContentHandler.java | 84 +- .../org/apache/tika/parser/mail/RFC822Parser.java | 18 +- .../java/org/apache/tika/parser/mat/MatParser.java | 133 + .../org/apache/tika/parser/mbox/MboxParser.java | 213 +- .../apache/tika/parser/mbox/OutlookPSTParser.java | 203 + .../tika/parser/microsoft/AbstractListManager.java | 269 + .../parser/microsoft/AbstractPOIFSExtractor.java | 170 +- .../tika/parser/microsoft/ExcelExtractor.java | 403 +- .../tika/parser/microsoft/HSLFExtractor.java | 511 +- .../tika/parser/microsoft/JackcessExtractor.java | 345 ++ .../tika/parser/microsoft/JackcessParser.java | 129 + .../apache/tika/parser/microsoft/ListManager.java | 190 + .../apache/tika/parser/microsoft/OfficeParser.java | 272 +- .../tika/parser/microsoft/OldExcelParser.java | 97 + .../tika/parser/microsoft/OutlookExtractor.java | 480 +- .../parser/microsoft/POIFSContainerDetector.java | 354 +- .../tika/parser/microsoft/SummaryExtractor.java | 63 +- .../apache/tika/parser/microsoft/TNEFParser.java | 142 +- .../tika/parser/microsoft/WordExtractor.java | 979 ++-- .../microsoft/ooxml/AbstractOOXMLExtractor.java | 122 +- .../parser/microsoft/ooxml/MetadataExtractor.java | 225 +- .../parser/microsoft/ooxml/OOXMLExtractor.java | 4 +- .../microsoft/ooxml/OOXMLExtractorFactory.java | 39 +- .../tika/parser/microsoft/ooxml/OOXMLParser.java | 61 +- .../ooxml/POIXMLTextExtractorDecorator.java | 2 +- .../ooxml/XSLFPowerPointExtractorDecorator.java | 176 +- .../ooxml/XSSFExcelExtractorDecorator.java | 538 +- .../parser/microsoft/ooxml/XWPFListManager.java | 165 + .../ooxml/XWPFWordExtractorDecorator.java | 611 +-- .../tika/parser/mp3/CompositeTagHandler.java | 26 + .../java/org/apache/tika/parser/mp3/ID3Tags.java | 19 +- .../org/apache/tika/parser/mp3/ID3v1Handler.java | 35 +- .../org/apache/tika/parser/mp3/ID3v22Handler.java | 23 +- .../org/apache/tika/parser/mp3/ID3v23Handler.java | 22 +- .../org/apache/tika/parser/mp3/ID3v24Handler.java | 22 +- .../org/apache/tika/parser/mp3/ID3v2Frame.java | 11 +- .../org/apache/tika/parser/mp3/LyricsHandler.java | 7 +- .../java/org/apache/tika/parser/mp3/Mp3Parser.java | 19 +- .../tika/parser/mp4/DirectFileReadDataSource.java | 100 + .../java/org/apache/tika/parser/mp4/MP4Parser.java | 389 +- .../apache/tika/parser/netcdf/NetCDFParser.java | 75 +- .../apache/tika/parser/ocr/TesseractOCRConfig.java | 256 + .../apache/tika/parser/ocr/TesseractOCRParser.java | 336 ++ .../parser/odf/NSNormalizerContentHandler.java | 9 +- .../tika/parser/odf/OpenDocumentContentParser.java | 504 +- .../tika/parser/odf/OpenDocumentMetaParser.java | 94 +- .../apache/tika/parser/odf/OpenDocumentParser.java | 188 +- .../org/apache/tika/parser/pdf/AccessChecker.java | 81 + .../java/org/apache/tika/parser/pdf/PDF2XHTML.java | 491 +- .../tika/parser/pdf/PDFEncodedStringDecoder.java | 117 + .../java/org/apache/tika/parser/pdf/PDFParser.java | 500 +- .../apache/tika/parser/pdf/PDFParserConfig.java | 316 +- .../apache/tika/parser/pkg/CompressorParser.java | 33 +- .../org/apache/tika/parser/pkg/PackageParser.java | 160 +- .../java/org/apache/tika/parser/pkg/RarParser.java | 110 + .../tika/parser/pkg/ZipContainerDetector.java | 87 +- .../java/org/apache/tika/parser/prt/PRTParser.java | 4 +- .../org/apache/tika/parser/rtf/GroupState.java | 17 +- .../org/apache/tika/parser/rtf/ListDescriptor.java | 3 +- .../apache/tika/parser/rtf/RTFEmbObjHandler.java | 287 ++ .../apache/tika/parser/rtf/RTFObjDataParser.java | 315 ++ .../java/org/apache/tika/parser/rtf/RTFParser.java | 38 +- .../org/apache/tika/parser/rtf/TextExtractor.java | 422 +- .../org/apache/tika/parser/strings/FileConfig.java | 77 + .../tika/parser/strings/Latin1StringsParser.java | 322 ++ .../apache/tika/parser/strings/StringsConfig.java | 187 + .../tika/parser/strings/StringsEncoding.java | 45 + .../apache/tika/parser/strings/StringsParser.java | 335 ++ .../apache/tika/parser/txt/CharsetDetector.java | 452 +- .../org/apache/tika/parser/txt/CharsetMatch.java | 181 +- .../apache/tika/parser/txt/CharsetRecog_2022.java | 147 +- .../apache/tika/parser/txt/CharsetRecog_UTF8.java | 56 +- .../tika/parser/txt/CharsetRecog_Unicode.java | 127 +- .../apache/tika/parser/txt/CharsetRecog_mbcs.java | 916 ++-- .../apache/tika/parser/txt/CharsetRecog_sbcs.java | 1749 +++---- .../apache/tika/parser/txt/CharsetRecognizer.java | 31 +- .../tika/parser/txt/Icu4jEncodingDetector.java | 6 +- .../java/org/apache/tika/parser/txt/TXTParser.java | 21 +- .../tika/parser/txt/UniversalEncodingDetector.java | 2 - .../apache/tika/parser/utils/CommonsDigester.java | 299 ++ .../org/apache/tika/parser/video/FLVParser.java | 4 +- .../java/org/apache/tika/parser/xml/XMLParser.java | 8 +- .../services/org.apache.tika.parser.Parser | 17 + .../tika/parser/ctakes/CTAKESConfig.properties | 10 +- .../tika/parser/external/tika-external-parsers.xml | 29 +- .../GrobidExtractor.properties} | 7 +- .../TesseractOCRConfig.properties} | 12 +- .../apache/tika/parser/pdf/PDFParser.properties | 4 + .../src/test/java/org/apache/tika/TestParsers.java | 19 +- .../apache/tika/config/TikaDetectorConfigTest.java | 144 + .../apache/tika/config/TikaParserConfigTest.java | 157 + .../tika/config/TikaTranslatorConfigTest.java | 72 + .../tika/detect/TestContainerAwareDetector.java | 104 +- .../apache/tika/embedder/ExternalEmbedderTest.java | 8 +- .../java/org/apache/tika/mime/TestMimeTypes.java | 295 +- .../apache/tika/parser/AutoDetectParserTest.java | 165 +- .../apache/tika/parser/DigestingParserTest.java | 136 + .../org/apache/tika/parser/ParsingReaderTest.java | 16 +- .../tika/parser/RecursiveParserWrapperTest.java | 312 ++ .../apache/tika/parser/audio/MidiParserTest.java | 4 +- .../apache/tika/parser/chm/TestChmBlockInfo.java | 9 +- .../apache/tika/parser/chm/TestChmExtraction.java | 122 +- .../apache/tika/parser/chm/TestChmExtractor.java | 13 +- .../apache/tika/parser/chm/TestChmItspHeader.java | 3 +- .../apache/tika/parser/chm/TestChmLzxState.java | 3 +- .../tika/parser/chm/TestChmLzxcControlData.java | 7 +- .../tika/parser/chm/TestChmLzxcResetTable.java | 3 +- .../org/apache/tika/parser/chm/TestParameters.java | 9 +- .../org/apache/tika/parser/chm/TestPmglHeader.java | 3 +- .../tika/parser/code/SourceCodeParserTest.java | 101 + .../apache/tika/parser/crypto/Pkcs7ParserTest.java | 9 +- .../DIFParserTest.java} | 40 +- .../org/apache/tika/parser/dwg/DWGParserTest.java | 27 +- .../tika/parser/envi/EnviHeaderParserTest.java | 60 + .../apache/tika/parser/epub/EpubParserTest.java | 17 +- .../parser/executable/ExecutableParserTest.java | 56 +- .../apache/tika/parser/feed/FeedParserTest.java | 31 +- .../parser/font/AdobeFontMetricParserTest.java | 71 - .../apache/tika/parser/font/FontParsersTest.java | 113 + .../parser/fork/ForkParserIntegrationTest.java | 26 +- .../apache/tika/parser/gdal/TestGDALParser.java | 181 + .../tika/parser/geo/topic/GeoParserTest.java | 91 + .../geoinfo/GeographicInformationParserTest.java | 62 + .../GribParserTest.java} | 33 +- .../org/apache/tika/parser/hdf/HDFParserTest.java | 18 +- .../apache/tika/parser/html/HtmlParserTest.java | 495 +- .../tika/parser/ibooks/iBooksParserTest.java | 17 +- .../apache/tika/parser/image/BPGParserTest.java | 133 + .../parser/image/ImageMetadataExtractorTest.java | 64 +- .../apache/tika/parser/image/ImageParserTest.java | 16 +- .../apache/tika/parser/image/PSDParserTest.java | 12 +- .../apache/tika/parser/image/TiffParserTest.java | 27 +- .../apache/tika/parser/image/WebPParserTest.java | 72 + .../parser/image/xmp/JempboxExtractorTest.java | 25 +- .../tika/parser/isatab/ISArchiveParserTest.java | 60 + .../apache/tika/parser/iwork/IWorkParserTest.java | 177 +- .../apache/tika/parser/jdbc/SQLite3ParserTest.java | 356 ++ .../JournalParserTest.java} | 42 +- .../apache/tika/parser/jpeg/JpegParserTest.java | 106 +- .../apache/tika/parser/mail/RFC822ParserTest.java | 228 +- .../org/apache/tika/parser/mat/MatParserTest.java | 80 + .../apache/tika/parser/mbox/MboxParserTest.java | 203 +- .../tika/parser/mbox/OutlookPSTParserTest.java | 110 + .../AbstractPOIContainerExtractionTest.java | 45 +- .../tika/parser/microsoft/ExcelParserTest.java | 453 +- .../tika/parser/microsoft/JackcessParserTest.java | 194 + ...tectedParserTest.java => OfficeParserTest.java} | 32 +- .../tika/parser/microsoft/OldExcelParserTest.java | 114 + .../tika/parser/microsoft/OutlookParserTest.java | 137 +- .../microsoft/POIContainerExtractionTest.java | 502 +- .../parser/microsoft/PowerPointParserTest.java | 207 +- .../tika/parser/microsoft/ProjectParserTest.java | 97 +- .../tika/parser/microsoft/PublisherParserTest.java | 19 +- .../tika/parser/microsoft/TNEFParserTest.java | 112 +- .../tika/parser/microsoft/VisioParserTest.java | 17 +- .../tika/parser/microsoft/WordParserTest.java | 328 +- .../parser/microsoft/WriteProtectedParserTest.java | 10 +- .../ooxml/OOXMLContainerExtractionTest.java | 396 +- .../parser/microsoft/ooxml/OOXMLParserTest.java | 1116 ++-- .../apache/tika/parser/mock/MockParserTest.java | 247 + .../org/apache/tika/parser/mp3/Mp3ParserTest.java | 201 +- .../org/apache/tika/parser/mp3/MpegStreamTest.java | 3 +- .../org/apache/tika/parser/mp4/MP4ParserTest.java | 31 +- .../tika/parser/netcdf/NetCDFParserTest.java | 30 +- .../tika/parser/ocr/TesseractOCRConfigTest.java | 93 + .../tika/parser/ocr/TesseractOCRParserTest.java | 206 + .../org/apache/tika/parser/odf/ODFParserTest.java | 495 +- .../apache/tika/parser/pdf/AccessCheckerTest.java | 137 + .../org/apache/tika/parser/pdf/PDFParserTest.java | 1108 +++- .../apache/tika/parser/pkg/AbstractPkgTest.java | 7 + .../org/apache/tika/parser/pkg/ArParserTest.java | 158 +- .../apache/tika/parser/pkg/Bzip2ParserTest.java | 61 +- ...zip2ParserTest.java => CompressParserTest.java} | 60 +- .../org/apache/tika/parser/pkg/GzipParserTest.java | 73 +- .../pkg/{TarParserTest.java => RarParserTest.java} | 87 +- .../apache/tika/parser/pkg/Seven7ParserTest.java | 219 + .../org/apache/tika/parser/pkg/TarParserTest.java | 63 +- .../org/apache/tika/parser/pkg/ZipParserTest.java | 109 +- .../org/apache/tika/parser/pkg/ZlibParserTest.java | 77 + .../org/apache/tika/parser/prt/PRTParserTest.java | 28 +- .../org/apache/tika/parser/rtf/RTFParserTest.java | 286 +- .../parser/solidworks/SolidworksParserTest.java | 47 +- .../apache/tika/parser/strings/FileConfigTest.java | 28 + .../parser/strings/Latin1StringsParserTest.java | 69 + .../tika/parser/strings/StringsConfigTest.java | 61 + .../tika/parser/strings/StringsParserTest.java | 74 + .../tika/parser/txt/CharsetDetectorTest.java | 53 +- .../org/apache/tika/parser/txt/TXTParserTest.java | 74 +- .../apache/tika/parser/xml/DcXMLParserTest.java | 30 +- .../EmptyAndDuplicateElementsXMLParserTest.java | 42 +- .../tika/parser/xml/FictionBookParserTest.java | 25 +- .../sax/PhoneExtractingContentHandlerTest.java | 58 + .../apache/tika/utils/ServiceLoaderUtilsTest.java | 57 + tika-serialization/pom.xml | 100 + .../tika/metadata/serialization/JsonMetadata.java | 87 + .../metadata/serialization/JsonMetadataBase.java | 52 + .../serialization/JsonMetadataDeserializer.java | 75 + .../metadata/serialization/JsonMetadataList.java | 96 + .../serialization/JsonMetadataSerializer.java | 97 + .../serialization/PrettyMetadataKeyComparator.java | 44 + .../serialization/JsonMetadataListTest.java | 123 + .../metadata/serialization/JsonMetadataTest.java | 132 + tika-server/Dockerfile | 37 + tika-server/README | 35 - tika-server/README.md | 45 + tika-server/pom.xml | 196 +- .../java/org/apache/tika/server/HTMLHelper.java | 64 + .../java/org/apache/tika/server/MetadataEP.java | 164 - .../server/{TikaVersion.java => MetadataList.java} | 25 +- .../org/apache/tika/server/MetadataResource.java | 93 - .../apache/tika/server/RichTextContentHandler.java | 34 +- .../org/apache/tika/server/TikaLoggingFilter.java | 51 + .../java/org/apache/tika/server/TikaResource.java | 343 -- .../java/org/apache/tika/server/TikaServerCli.java | 256 +- ...onMapper.java => TikaServerParseException.java} | 27 +- .../server/TikaServerParseExceptionMapper.java | 90 + .../org/apache/tika/server/UnpackerResource.java | 258 - .../java/org/apache/tika/server/ZipWriter.java | 85 - .../tika/server/resource/DetectorResource.java | 64 + .../tika/server/resource/LanguageResource.java | 75 + .../tika/server/resource/MetadataResource.java | 134 + .../server/resource/RecursiveMetadataResource.java | 146 + .../apache/tika/server/resource/TikaDetectors.java | 123 + .../apache/tika/server/resource/TikaMimeTypes.java | 173 + .../apache/tika/server/resource/TikaParsers.java | 242 + .../apache/tika/server/resource/TikaResource.java | 426 ++ .../tika/server/{ => resource}/TikaVersion.java | 10 +- .../apache/tika/server/resource/TikaWelcome.java | 232 + .../tika/server/resource/TranslateResource.java | 111 + .../tika/server/resource/UnpackerResource.java | 261 + .../server/{ => writer}/CSVMessageBodyWriter.java | 49 +- .../server/{ => writer}/JSONMessageBodyWriter.java | 62 +- .../writer/MetadataListMessageBodyWriter.java | 68 + .../apache/tika/server/{ => writer}/TarWriter.java | 49 +- .../tika/server/writer/TextMessageBodyWriter.java | 76 + .../tika/server/writer/XMPMessageBodyWriter.java | 68 + .../org/apache/tika/server/writer/ZipWriter.java | 86 + .../src/main/resources/tikaserver-template.html | 32 + .../main/resources/tikaserver-version.properties | 18 - .../java/org/apache/tika/server/CXFTestBase.java | 197 +- .../apache/tika/server/DetectorResourceTest.java | 107 + .../apache/tika/server/LanguageResourceTest.java | 109 + .../org/apache/tika/server/MetadataEPTest.java | 187 - .../apache/tika/server/MetadataResourceTest.java | 259 +- .../tika/server/RecursiveMetadataResourceTest.java | 277 + .../org/apache/tika/server/StackTraceOffTest.java | 150 + .../org/apache/tika/server/StackTraceTest.java | 146 + .../org/apache/tika/server/TikaDetectorsTest.java | 142 + .../org/apache/tika/server/TikaMimeTypesTest.java | 121 + .../org/apache/tika/server/TikaParsersTest.java | 186 + .../org/apache/tika/server/TikaResourceTest.java | 282 +- .../org/apache/tika/server/TikaVersionTest.java | 85 +- .../org/apache/tika/server/TikaWelcomeTest.java | 112 + .../apache/tika/server/TranslateResourceTest.java | 86 + .../apache/tika/server/UnpackerResourceTest.java | 371 +- tika-translate/pom.xml | 160 + .../tika/language/translate/CachedTranslator.java | 179 + .../language/translate/ExternalTranslator.java | 101 + .../tika/language/translate/GoogleTranslator.java | 118 + .../tika/language/translate/Lingo24Translator.java | 114 + .../language/translate/MicrosoftTranslator.java | 149 + .../tika/language/translate/MosesTranslator.java | 140 + .../org.apache.tika.language.translate.Translator | 10 +- .../translate/translator.google.properties | 11 +- .../translate/translator.lingo24.properties | 11 +- .../translate/translator.microsoft.properties | 12 +- .../language/translate/translator.moses.properties | 13 +- .../language/translate/CachedTranslatorTest.java | 85 + .../language/translate/GoogleTranslatorTest.java | 83 + .../language/translate/Lingo24TranslatorTest.java | 78 + .../translate/MicrosoftTranslatorTest.java | 76 + .../language/translate/MosesTranslatorTest.java | 38 +- tika-xmp/pom.xml | 38 +- .../java/org/apache/tika/xmp/XMPMetadataTest.java | 8 +- 625 files changed, 57037 insertions(+), 26907 deletions(-) -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/tika.git _______________________________________________ pkg-java-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-java-commits

