This is an automated email from the ASF dual-hosted git repository. lfcnassif pushed a commit to branch branch_1x in repository https://gitbox.apache.org/repos/asf/tika.git
commit d6f6c4bad5b0654065f2fbcc684151400ba33cef Merge: e11d428 013556a Author: Nassif <[email protected]> AuthorDate: Tue Nov 19 00:57:17 2019 -0200 Merge branch 'branch_1x' of https://github.com/apache/tika.git into branch_1x .gitignore | 1 + CHANGES.txt | 51 +- LICENSE.txt | 26 +- README.md | 53 +- assembly.xml | 3 +- pom.xml | 2 +- tika-app/pom.xml | 2 +- .../src/main/java/org/apache/tika/cli/TikaCLI.java | 2 +- .../tika/cli/TikaCLIBatchCommandLineTest.java | 5 +- .../test/java/org/apache/tika/cli/TikaCLITest.java | 10 +- tika-batch/pom.xml | 2 +- .../RecursiveParserWrapperFSConsumerTest.java | 3 +- .../org/apache/tika/batch/fs/FSBatchTestBase.java | 2 + tika-bundle/pom.xml | 20 +- tika-core/pom.xml | 2 +- .../org/apache/tika/detect/AutoDetectReader.java | 11 +- .../main/java/org/apache/tika/metadata/Office.java | 3 + .../main/java/org/apache/tika/metadata/PDF.java | 15 + .../apache/tika/metadata/TikaCoreProperties.java | 1 + .../java/org/apache/tika/mime/MimeTypesReader.java | 4 +- .../apache/tika/parser/RecursiveParserWrapper.java | 62 +- .../sax/AbstractRecursiveParserWrapperHandler.java | 14 +- .../org/apache/tika/sax/XHTMLContentHandler.java | 10 + .../java/org/apache/tika/utils/ProcessUtils.java | 8 + .../apache/tika/utils/RereadableInputStream.java | 6 + .../java/org/apache/tika/utils/XMLReaderUtils.java | 23 +- .../org/apache/tika/mime/tika-mimetypes.xml | 34 +- .../src/test/java/org/apache/tika/TikaTest.java | 66 +- .../apache/tika/sax/XHTMLContentHandlerTest.java | 43 + tika-dl/pom.xml | 10 +- .../tika/dl/imagerec/DL4JInceptionV3NetTest.java | 32 +- .../apache/tika/dl/imagerec/DL4JVGG16NetTest.java | 35 +- tika-eval/.gitignore | 1 + tika-eval/pom.xml | 25 +- .../org/apache/tika/eval/AbstractProfiler.java | 290 +- .../java/org/apache/tika/eval/ExtractComparer.java | 31 +- .../java/org/apache/tika/eval/ExtractProfiler.java | 3 +- .../tika/eval/batch/EvalConsumersBuilder.java | 6 +- .../java/org/apache/tika/eval/db/MimeBuffer.java | 4 +- .../java/org/apache/tika/eval/langid/Language.java | 24 +- .../apache/tika/eval/langid/LanguageIDWrapper.java | 121 + .../tika/eval/langid/ProbingLanguageDetector.java | 431 + .../apache/tika/eval/reports/ResultsReporter.java | 2 + .../textstats/BasicTokenCountStatsCalculator.java | 16 +- .../apache/tika/eval/textstats/CommonTokens.java | 68 + .../eval/textstats/CommonTokensBhattacharyya.java | 56 + .../tika/eval/textstats/CommonTokensCosine.java | 69 + .../tika/eval/textstats/CommonTokensHellinger.java | 56 + .../tika/eval/textstats/CommonTokensKLDNormed.java | 62 + .../eval/textstats/CommonTokensKLDivergence.java | 56 + .../textstats/CompositeTextStatsCalculator.java | 125 + .../eval/textstats/ContentLengthCalculator.java | 17 +- .../textstats/LanguageAwareTokenCountStats.java | 19 +- .../tika/eval/textstats/StringStatsCalculator.java | 18 +- .../tika/eval/textstats/TextStatsCalculator.java | 16 +- .../eval/textstats/TokenCountPriorityQueue.java | 50 + .../eval/textstats/TokenCountStatsCalculator.java | 18 +- .../apache/tika/eval/textstats/TokenEntropy.java | 28 +- .../apache/tika/eval/textstats/TokenLengths.java | 26 +- .../org/apache/tika/eval/textstats/TopNTokens.java | 49 + .../tika/eval/textstats/UnicodeBlockCounter.java | 73 + .../eval/tokens/AlphaIdeographFilterFactory.java | 13 +- .../apache/tika/eval/tokens/AnalyzerManager.java | 2 +- .../tika/eval/tokens/CommonTokenCountManager.java | 91 +- .../apache/tika/eval/tokens/CommonTokenResult.java | 8 + .../org/apache/tika/eval/tokens/LangModel.java | 78 + .../apache/tika/eval/tokens/TokenContraster.java | 25 +- .../org/apache/tika/eval/tokens/TokenCounter.java | 7 + .../org/apache/tika/eval/tokens/TokenCounts.java} | 50 +- .../tokens/URLEmailNormalizingFilterFactory.java | 82 + .../eval/tools/BatchTopCommonTokenCounter.java | 34 +- .../tika/eval/tools/CommonTokenOverlapCounter.java | 69 + .../org/apache/tika/eval/tools/LeipzigHelper.java | 57 + .../org/apache/tika/eval/tools/LeipzigSampler.java | 76 + .../eval/tools/SlowCompositeReaderWrapper.java | 34 +- .../tika/eval/tools/TopCommonTokenCounter.java | 120 +- .../org/apache/tika/eval/tools/TrainTestSplit.java | 114 + .../apache/tika/eval/util/EvalExceptionUtils.java} | 35 +- .../apache/tika/eval/util/LanguageIDWrapper.java | 105 - ....apache.lucene.analysis.util.TokenFilterFactory | 3 +- tika-eval/src/main/resources/common_tokens/afr | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/amh | 8901 ++++++ tika-eval/src/main/resources/common_tokens/ar | 20002 ------------ tika-eval/src/main/resources/common_tokens/ara | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/asm | 5084 ++++ tika-eval/src/main/resources/common_tokens/ast | 14031 +++++++++ tika-eval/src/main/resources/common_tokens/aze | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/azj | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/bak | 23078 ++++++++++++++ tika-eval/src/main/resources/common_tokens/ban | 4210 +++ tika-eval/src/main/resources/common_tokens/bel | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/ben | 13922 +++++++++ tika-eval/src/main/resources/common_tokens/bn | 20002 ------------ tika-eval/src/main/resources/common_tokens/bos | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/bre | 8909 ++++++ tika-eval/src/main/resources/common_tokens/bul | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/cat | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/ceb | 25584 ++++++++++++++++ tika-eval/src/main/resources/common_tokens/ces | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/che | 3941 +++ tika-eval/src/main/resources/common_tokens/ckb | 4689 +++ tika-eval/src/main/resources/common_tokens/cmn | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/cym | 10814 +++++++ tika-eval/src/main/resources/common_tokens/dan | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/de | 20002 ------------ tika-eval/src/main/resources/common_tokens/deu | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/div | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/ekk | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/el | 20002 ------------ tika-eval/src/main/resources/common_tokens/ell | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/en | 20002 ------------ tika-eval/src/main/resources/common_tokens/eng | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/epo | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/es | 20002 ------------ tika-eval/src/main/resources/common_tokens/est | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/eus | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/fa | 20002 ------------ tika-eval/src/main/resources/common_tokens/fao | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/fas | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/fin | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/fr | 20002 ------------ tika-eval/src/main/resources/common_tokens/fra | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/fry | 16964 +++++++++++ tika-eval/src/main/resources/common_tokens/gle | 20946 +++++++++++++ tika-eval/src/main/resources/common_tokens/glg | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/gsw | 12576 ++++++++ tika-eval/src/main/resources/common_tokens/guj | 27714 +++++++++++++++++ tika-eval/src/main/resources/common_tokens/hat | 3688 +++ tika-eval/src/main/resources/common_tokens/he | 20002 ------------ tika-eval/src/main/resources/common_tokens/heb | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/hi | 20002 ------------ tika-eval/src/main/resources/common_tokens/hin | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/hrv | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/hun | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/hye | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/id | 20002 ------------ tika-eval/src/main/resources/common_tokens/ind | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/isl | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/it | 20002 ------------ tika-eval/src/main/resources/common_tokens/ita | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/ja | 20002 ------------ tika-eval/src/main/resources/common_tokens/jav | 12859 ++++++++ tika-eval/src/main/resources/common_tokens/jpn | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/kan | 13550 +++++++++ tika-eval/src/main/resources/common_tokens/kat | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/kaz | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/kin | 5265 ++++ tika-eval/src/main/resources/common_tokens/kir | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/ko | 20002 ------------ tika-eval/src/main/resources/common_tokens/kor | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/kur | 1377 + tika-eval/src/main/resources/common_tokens/lat | 12572 ++++++++ tika-eval/src/main/resources/common_tokens/lav | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/lim | 11981 ++++++++ tika-eval/src/main/resources/common_tokens/lit | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/ltz | 10389 +++++++ tika-eval/src/main/resources/common_tokens/lug | 27083 +++++++++++++++++ tika-eval/src/main/resources/common_tokens/lvs | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/mal | 1798 ++ tika-eval/src/main/resources/common_tokens/mar | 26728 +++++++++++++++++ tika-eval/src/main/resources/common_tokens/mhr | 3610 +++ tika-eval/src/main/resources/common_tokens/min | 3773 +++ tika-eval/src/main/resources/common_tokens/mkd | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/mlg | 4107 +++ tika-eval/src/main/resources/common_tokens/mlt | 26806 +++++++++++++++++ tika-eval/src/main/resources/common_tokens/mon | 14640 +++++++++ tika-eval/src/main/resources/common_tokens/mri | 5584 ++++ tika-eval/src/main/resources/common_tokens/msa | 28380 ++++++++++++++++++ tika-eval/src/main/resources/common_tokens/nan | 3579 +++ tika-eval/src/main/resources/common_tokens/nds | 10300 +++++++ tika-eval/src/main/resources/common_tokens/nep | 20104 +++++++++++++ tika-eval/src/main/resources/common_tokens/nl | 20002 ------------ tika-eval/src/main/resources/common_tokens/nld | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/nno | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/nob | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/oci | 12539 ++++++++ tika-eval/src/main/resources/common_tokens/ori | 3841 +++ tika-eval/src/main/resources/common_tokens/pan | 9331 ++++++ tika-eval/src/main/resources/common_tokens/pes | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/plt | 2532 ++ tika-eval/src/main/resources/common_tokens/pnb | 9817 ++++++ tika-eval/src/main/resources/common_tokens/pol | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/por | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/pt | 20002 ------------ tika-eval/src/main/resources/common_tokens/pus | 13290 ++++++++ tika-eval/src/main/resources/common_tokens/ron | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/ru | 20002 ------------ tika-eval/src/main/resources/common_tokens/rus | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/san | 2370 ++ tika-eval/src/main/resources/common_tokens/sin | 7010 +++++ tika-eval/src/main/resources/common_tokens/slk | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/slv | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/snd | 4838 +++ tika-eval/src/main/resources/common_tokens/som | 13867 +++++++++ tika-eval/src/main/resources/common_tokens/spa | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/sqi | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/srp | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/sun | 20671 +++++++++++++ tika-eval/src/main/resources/common_tokens/swa | 9604 ++++++ tika-eval/src/main/resources/common_tokens/swe | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/tam | 12885 ++++++++ tika-eval/src/main/resources/common_tokens/tat | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/tel | 24371 +++++++++++++++ tika-eval/src/main/resources/common_tokens/tgk | 19793 ++++++++++++ tika-eval/src/main/resources/common_tokens/tgl | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/tha | 3088 ++ tika-eval/src/main/resources/common_tokens/tuk | 16112 ++++++++++ tika-eval/src/main/resources/common_tokens/tur | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/uig | 10518 +++++++ tika-eval/src/main/resources/common_tokens/ukr | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/ur | 20002 ------------ tika-eval/src/main/resources/common_tokens/urd | 30022 +++++++++++++++++++ tika-eval/src/main/resources/common_tokens/uzb | 26447 ++++++++++++++++ tika-eval/src/main/resources/common_tokens/vi | 20002 ------------ tika-eval/src/main/resources/common_tokens/vie | 26194 ++++++++++++++++ tika-eval/src/main/resources/common_tokens/vol | 1401 + tika-eval/src/main/resources/common_tokens/war | 17050 +++++++++++ tika-eval/src/main/resources/common_tokens/xho | 10688 +++++++ tika-eval/src/main/resources/common_tokens/yid | 4497 +++ tika-eval/src/main/resources/common_tokens/zh-cn | 20002 ------------ tika-eval/src/main/resources/common_tokens/zh-tw | 20002 ------------ tika-eval/src/main/resources/common_tokens/zul | 15643 ++++++++++ tika-eval/src/main/resources/lucene-analyzers.json | 19 +- .../src/main/resources/opennlp/model_20190626.bin | Bin 0 -> 11579707 bytes .../org/apache/tika/eval/AnalyzerManagerTest.java | 2 +- .../org/apache/tika/eval/SimpleComparerTest.java | 22 +- .../java/org/apache/tika/eval/TikaEvalCLITest.java | 57 +- .../org/apache/tika/eval/io/ExtractReaderTest.java | 2 +- .../org/apache/tika/eval/langid/LangIdTest.java | 55 + .../apache/tika/eval/tokens/TokenCounterTest.java | 2 +- .../org/apache/tika/eval/util/LanguageIdTest.java | 15 +- .../tika/tools/TopCommonTokenCounterTest.java | 21 +- tika-eval/src/test/resources/common_tokens/en | 20 + tika-eval/src/test/resources/common_tokens/es | 20 + tika-eval/src/test/resources/common_tokens/zh-cn | 20 + tika-eval/src/test/resources/common_tokens/zh-tw | 20 + tika-example/pom.xml | 13 +- .../apache/tika/example/TextStatsFromTikaEval.java | 62 + .../resources/org/apache/tika/example/spring.xml | 2 +- .../tika/example/TextStatsFromTikaEvalTest.java | 20 +- tika-java7/pom.xml | 2 +- tika-langdetect/pom.xml | 2 +- tika-nlp/pom.xml | 4 +- tika-parent/pom.xml | 40 +- tika-parsers/pom.xml | 57 +- .../apache/tika/parser/hwp/HwpStreamReader.java | 134 + .../apache/tika/parser/hwp/HwpTextExtractorV5.java | 514 + .../Word2006MLParser.java => hwp/HwpV5Parser.java} | 45 +- .../tika/parser/image/ImageMetadataExtractor.java | 7 +- .../tika/parser/image/xmp/JempboxExtractor.java | 4 +- .../apache/tika/parser/mbox/OutlookPSTParser.java | 1 + .../apache/tika/parser/microsoft/EMFParser.java | 61 +- .../apache/tika/parser/microsoft/OfficeParser.java | 2 +- .../parser/microsoft/POIFSContainerDetector.java | 6 +- .../apache/tika/parser/microsoft/WMFParser.java | 16 +- .../microsoft/ooxml/OOXMLExtractorFactory.java | 9 + .../tika/parser/microsoft/ooxml/OOXMLParser.java | 3 + .../ooxml/XSSFExcelExtractorDecorator.java | 2 +- .../ooxml/xwpf/ml2006/Word2006MLParser.java | 10 +- .../microsoft/xml/AbstractXML2003Parser.java | 8 +- .../tika/parser/microsoft/xml/WordMLParser.java | 23 +- .../apache/tika/parser/ocr/TesseractOCRParser.java | 24 +- .../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 17 +- .../java/org/apache/tika/parser/pdf/PDF2XHTML.java | 137 +- .../java/org/apache/tika/parser/pdf/PDFParser.java | 322 +- .../apache/tika/parser/pdf/PDFParserConfig.java | 21 +- .../tika/parser/pdf/PDMetadataExtractor.java | 275 + .../org/apache/tika/parser/pkg/PackageParser.java | 1 + .../parser/pkg/StreamingZipContainerDetector.java | 6 +- .../tika/parser/pkg/ZipContainerDetectorBase.java | 4 +- .../java/org/apache/tika/parser/rtf/RTFParser.java | 4 + .../org/apache/tika/parser/rtf/TextExtractor.java | 118 +- .../tika/parser/xliff/XLIFF12ContentHandler.java | 116 + .../XLIFF12Parser.java} | 65 +- .../org/apache/tika/parser/xliff/XLZParser.java | 146 + .../services/org.apache.tika.parser.Parser | 3 + .../apache/tika/parser/pdf/PDFParser.properties | 1 + .../java/org/apache/tika/TestCorruptedFiles.java | 9 +- .../src/test/java/org/apache/tika/TestParsers.java | 4 +- .../org/apache/tika/TestXMLEntityExpansion.java | 9 +- .../test/java/org/apache/tika/TestXXEInXML.java | 73 +- .../tika/config/TikaEncodingDetectorTest.java | 2 +- .../apache/tika/parser/AutoDetectParserTest.java | 2 +- .../tika/parser/AutoDetectReaderParserTest.java | 102 + .../parser/BouncyCastleDigestingParserTest.java | 11 +- .../apache/tika/parser/DigestingParserTest.java | 9 +- .../org/apache/tika/parser/ParsingReaderTest.java | 5 +- .../tika/parser/RecursiveParserWrapperTest.java | 64 +- .../org/apache/tika/parser/TabularFormatsTest.java | 54 +- .../apache/tika/parser/chm/TestChmExtraction.java | 5 +- .../tika/parser/code/SourceCodeParserTest.java | 9 +- .../org/apache/tika/parser/dbf/DBFParserTest.java | 7 +- .../apache/tika/parser/font/FontParsersTest.java | 18 +- .../apache/tika/parser/html/HtmlParserTest.java | 6 +- .../apache/tika/parser/hwp/HwpV5ParserTest.java | 68 + .../apache/tika/parser/image/TiffParserTest.java | 7 + .../apache/tika/parser/image/WebPParserTest.java | 25 +- .../apache/tika/parser/iwork/IWorkParserTest.java | 244 +- .../tika/parser/iwork/iwana/IWork13ParserTest.java | 6 +- .../apache/tika/parser/jdbc/SQLite3ParserTest.java | 37 +- .../apache/tika/parser/mail/RFC822ParserTest.java | 33 +- .../org/apache/tika/parser/mat/MatParserTest.java | 16 +- .../apache/tika/parser/mbox/MboxParserTest.java | 2 - .../tika/parser/mbox/OutlookPSTParserTest.java | 5 +- .../tika/parser/microsoft/ExcelParserTest.java | 5 +- .../tika/parser/microsoft/JackcessParserTest.java | 17 +- .../tika/parser/microsoft/OutlookParserTest.java | 19 +- .../parser/microsoft/ooxml/OOXMLParserTest.java | 482 +- .../parser/microsoft/ooxml/SXSLFExtractorTest.java | 16 +- .../parser/microsoft/ooxml/SXWPFExtractorTest.java | 25 +- .../ooxml/xwpf/ml2006/Word2006MLParserTest.java | 35 +- .../parser/microsoft/xml/XML2003ParserTest.java | 48 +- .../apache/tika/parser/mock/MockParserTest.java | 5 +- .../org/apache/tika/parser/mp3/Mp3ParserTest.java | 169 +- .../org/apache/tika/parser/mp4/MP4ParserTest.java | 16 +- .../tika/parser/ocr/TesseractOCRParserTest.java | 44 +- .../org/apache/tika/parser/odf/ODFParserTest.java | 7 +- .../org/apache/tika/parser/pdf/PDFParserTest.java | 126 +- .../apache/tika/parser/pkg/AbstractPkgTest.java | 7 +- .../org/apache/tika/parser/pkg/ArParserTest.java | 12 +- .../apache/tika/parser/pkg/Bzip2ParserTest.java | 8 +- .../apache/tika/parser/pkg/CompressParserTest.java | 22 +- .../tika/parser/pkg/CompressorParserTest.java | 13 + .../org/apache/tika/parser/pkg/GzipParserTest.java | 11 +- .../org/apache/tika/parser/pkg/RarParserTest.java | 40 +- .../apache/tika/parser/pkg/Seven7ParserTest.java | 26 +- .../org/apache/tika/parser/pkg/TarParserTest.java | 8 +- .../org/apache/tika/parser/pkg/ZipParserTest.java | 31 +- .../org/apache/tika/parser/pkg/ZlibParserTest.java | 8 +- .../org/apache/tika/parser/rtf/RTFParserTest.java | 114 +- .../apache/tika/parser/sas/SAS7BDATParserTest.java | 4 +- .../tika/parser/xliff/XLIFF12ParserTest.java | 48 + .../apache/tika/parser/xliff/XLZParserTest.java | 47 + .../sax/PhoneExtractingContentHandlerTest.java | 9 +- .../sax/StandardsExtractingContentHandlerTest.java | 14 +- .../tika/parser/ner/opennlp/ModelGetter.groovy | 2 +- .../apache/tika/parser/pdf/tika-inline-config.xml | 2 +- ...{tika-inline-config.xml => tika-ocr-config.xml} | 20 +- .../parser/rtf/ignoreListMarkup-tika-config.xml | 26 + .../src/test/resources/test-documents/droste.zip | Bin 0 -> 28809 bytes .../src/test/resources/test-documents/quine.gz | Bin 0 -> 204 bytes .../test-documents/test-documents-enc.rar | Bin 0 -> 68636 bytes .../resources/test-documents/testEXCEL_signed.xlsx | Bin 0 -> 15221 bytes .../resources/test-documents/testHWP-v5-dist.hwp | Bin 0 -> 19968 bytes .../test/resources/test-documents/testHWP-v5b.hwp | Bin 0 -> 70144 bytes .../resources/test-documents/testPPT_signed.pptx | Bin 0 -> 39761 bytes .../resources/test-documents/testRTFTIKA_2899.rtf | 836 + .../resources/test-documents/testWORD_signed.docx | Bin 0 -> 18245 bytes .../test/resources/test-documents/testXLIFF12.xlf | 35 + .../test/resources/test-documents/testXLIFF12.xlz | Bin 0 -> 1004 bytes tika-serialization/pom.xml | 2 +- tika-server/pom.xml | 6 +- .../tika/server/DefaultInputStreamFactory.java | 7 + .../org/apache/tika/server/InputStreamFactory.java | 5 +- .../apache/tika/server/ServerStatusWatcher.java | 8 +- .../tika/server/URLEnabledInputStreamFactory.java | 18 + .../tika/server/resource/DetectorResource.java | 2 +- .../tika/server/resource/MetadataResource.java | 18 +- .../server/resource/RecursiveMetadataResource.java | 10 +- .../apache/tika/server/resource/TikaResource.java | 37 +- .../tika/server/resource/UnpackerResource.java | 4 +- .../org/apache/tika/server/TikaResourceTest.java | 5 +- .../tika/server/TikaServerIntegrationTest.java | 4 +- .../apache/tika/server/UnpackerResourceTest.java | 7 +- .../test/resources/mock/testStaticStdOutErr.xml | 26 +- .../src/test/resources/mock/testStdOutErr.xml | 20 +- tika-translate/pom.xml | 17 +- tika-xmp/pom.xml | 2 +- 368 files changed, 2556115 insertions(+), 422473 deletions(-)
