This is an automated email from the ASF dual-hosted git repository. mattmann pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/tika.git
commit e7b481dd763ff4f586a94b981281923e4604010a Merge: 43b84c2 790c124 Author: Chris Mattmann <chris.a.mattm...@jpl.nasa.gov> AuthorDate: Sun Jul 8 19:35:49 2018 -0700 Merge branch 'master' of github.com:/apache/tika CHANGES.txt | 13 + .../src/main/resources/tika-app-batch-config.xml | 1 + .../tika/cli/TikaCLIBatchIntegrationTest.java | 38 +- .../batch/fs/RecursiveParserWrapperFSConsumer.java | 46 +- ...FSConsumer.java => StreamOutRPWFSConsumer.java} | 141 +++--- .../fs/builders/BasicTikaFSConsumersBuilder.java | 24 +- .../tika/batch/fs/default-tika-batch-config.xml | 4 + .../RecursiveParserWrapperFSConsumerTest.java | 5 +- .../org/apache/tika/batch/fs/BatchDriverTest.java | 41 ++ .../org/apache/tika/batch/fs/FSBatchTestBase.java | 37 +- .../test-input/system_exit/test0_system_exit.xml | 25 ++ .../test-input/system_exit/test1_system_exit.xml | 25 ++ .../test-input/system_exit/test2_system_exit.xml | 25 ++ .../resources/test-input/system_exit/test3_ok.xml | 23 + .../resources/test-input/system_exit/test4_ok.xml | 23 + .../resources/test-input/system_exit/test5_ok.xml | 23 + .../thread_interrupt/test0_thread_interrupt.xml | 24 + .../thread_interrupt/test1_thread_interrupt.xml | 24 + .../thread_interrupt/test2_thread_interrupt.xml | 24 + .../test-input/thread_interrupt/test3_ok.xml | 23 + .../test-input/thread_interrupt/test4_ok.xml | 23 + .../test-input/thread_interrupt/test5_ok.xml | 23 + tika-bundle/pom.xml | 11 +- .../test/java/org/apache/tika/bundle/BundleIT.java | 5 +- tika-core/pom.xml | 1 + .../apache/tika/detect/TrainedModelDetector.java | 3 +- .../main/java/org/apache/tika/fork/ForkClient.java | 146 ++++-- .../main/java/org/apache/tika/fork/ForkParser.java | 162 ++++++- .../main/java/org/apache/tika/fork/ForkServer.java | 202 ++++++--- .../org/apache/tika/fork/ParserFactoryFactory.java | 56 +++ .../fork/RecursiveMetadataContentHandlerProxy.java | 147 ++++++ .../RecursiveMetadataContentHandlerResource.java | 87 ++++ .../TimeoutLimits.java} | 39 +- .../java/org/apache/tika/io/TikaInputStream.java | 11 +- .../org/apache/tika/mime/MediaTypeRegistry.java | 5 +- .../main/java/org/apache/tika/mime/MimeTypes.java | 5 +- .../tika/parser/AutoDetectParserFactory.java | 63 +++ .../ParserFactory.java} | 33 +- .../apache/tika/parser/RecursiveParserWrapper.java | 20 +- .../sax/AbstractRecursiveParserWrapperHandler.java | 8 +- .../org/apache/tika/sax/ContentHandlerFactory.java | 3 +- .../tika/sax/RecursiveParserWrapperHandler.java | 4 +- .../org/apache/tika/sax/TaggedSAXException.java | 1 - .../org/apache/tika/sax/XHTMLContentHandler.java | 7 +- .../java/org/apache/tika/utils/ParserUtils.java | 2 +- .../java/org/apache/tika/utils/ProcessUtils.java | 25 ++ .../java/org/apache/tika/utils/SystemUtils.java | 61 +++ .../org/apache/tika/MultiThreadedTikaTest.java | 81 ++-- .../java/org/apache/tika/fork/ForkParserTest.java | 387 +++++++++++++++- .../apache/tika/fork/ForkParserTikaBinTest.java | 228 ++++++++++ .../tika/fork/UpperCasingContentHandler.java | 23 + .../org/apache/tika/mime/MimeTypesReaderTest.java | 22 + .../org/apache/tika/parser/mock/MockParser.java | 26 +- .../apache/tika/parser/mock/MockParserFactory.java | 66 +++ .../org/apache/tika/parser/mock/VowelParser.java | 60 +++ .../apache/tika/sax/BodyContentHandlerTest.java | 34 +- .../tika/config/TIKA-2653-vowel-parser-ae.xml | 26 ++ .../tika/config/TIKA-2653-vowel-parser-iou.xml | 26 ++ .../resources/test-documents/basic_embedded.xml | 35 ++ .../resources/test-documents/embedded_then_npe.xml | 36 ++ .../resources/test-documents/embedded_with_npe.xml | 37 ++ .../src/test/resources/test-documents/example.xml | 47 ++ tika-dl/pom.xml | 2 +- .../tika/eval/tokens/CommonTokenCountManager.java | 6 +- tika-langdetect/pom.xml | 23 + tika-parent/pom.xml | 3 +- tika-parsers/pom.xml | 30 +- .../org/apache/tika/parser/audio/AudioParser.java | 16 + .../tika/parser/html/HtmlEncodingDetector.java | 8 +- .../org/apache/tika/parser/html/HtmlHandler.java | 3 + .../org/apache/tika/parser/html/HtmlParser.java | 25 ++ .../parser/html/StrictHtmlEncodingDetector.java | 491 +++++++++++++++++++++ .../java/org/apache/tika/parser/mat/MatParser.java | 1 + .../apache/tika/parser/odf/OpenDocumentParser.java | 7 +- .../tika/parser/pkg/ZipContainerDetector.java | 100 +++-- .../org/apache/tika/parser/rtf/TextExtractor.java | 19 +- .../tika/parser/html/whatwg-encoding-labels.tsv | 234 ++++++++++ .../tika/detect/TestContainerAwareDetector.java | 15 + .../tika/parser/RecursiveParserWrapperTest.java | 7 +- .../apache/tika/parser/chm/TestChmExtraction.java | 6 +- .../parser/fork/ForkParserIntegrationTest.java | 46 +- .../tika/parser/html/HtmlEncodingDetectorTest.java | 142 ++++++ .../apache/tika/parser/html/HtmlParserTest.java | 21 +- .../html/StrictHtmlEncodingDetectorTest.java | 300 +++++++++++++ .../org/apache/tika/parser/mat/MatParserTest.java | 7 +- .../apache/tika/parser/mbox/MboxParserTest.java | 16 +- .../tika/parser/microsoft/JackcessParserTest.java | 3 +- .../tika/parser/microsoft/OutlookParserTest.java | 4 +- .../parser/microsoft/ooxml/OOXMLParserTest.java | 6 + .../org/apache/tika/parser/odf/ODFParserTest.java | 23 + .../tika/parser/ner/opennlp/ModelGetter.groovy | 24 +- .../resources/test-documents/mock/system_exit.xml | 25 ++ .../test-documents/mock/thread_interrupt.xml | 25 ++ .../test-documents/testODTnotaZipFile.odt | 1 + .../src/test/resources/test-documents/testPST.pst | Bin 271360 -> 271360 bytes .../test-documents/testPST_variousBodyTypes.pst | Bin 271360 -> 271360 bytes .../test-documents/testZIP_corrupted_oom.zip | Bin 0 -> 181 bytes .../metadata/serialization/JsonMetadataList.java | 15 + .../serialization/JsonStreamingSerializer.java | 65 +++ .../serialization/JsonMetadataListTest.java | 46 ++ tika-server/pom.xml | 12 + .../apache/tika/server/resource/TikaResource.java | 17 +- .../java/org/apache/tika/server/CXFTestBase.java | 4 +- .../org/apache/tika/server/TikaParsersTest.java | 69 +-- .../org/apache/tika/server/TikaResourceTest.java | 38 ++ .../tika/server/tika-config-for-server-tests.xml | 29 ++ .../src/test/resources/testPDFTwoTextBoxes.pdf | Bin 0 -> 57100 bytes 107 files changed, 4317 insertions(+), 493 deletions(-)