This is an automated email from the ASF dual-hosted git repository.

mattmann pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit e7b481dd763ff4f586a94b981281923e4604010a
Merge: 43b84c2 790c124
Author: Chris Mattmann <chris.a.mattm...@jpl.nasa.gov>
AuthorDate: Sun Jul 8 19:35:49 2018 -0700

    Merge branch 'master' of github.com:/apache/tika

 CHANGES.txt                                        |  13 +
 .../src/main/resources/tika-app-batch-config.xml   |   1 +
 .../tika/cli/TikaCLIBatchIntegrationTest.java      |  38 +-
 .../batch/fs/RecursiveParserWrapperFSConsumer.java |  46 +-
 ...FSConsumer.java => StreamOutRPWFSConsumer.java} | 141 +++---
 .../fs/builders/BasicTikaFSConsumersBuilder.java   |  24 +-
 .../tika/batch/fs/default-tika-batch-config.xml    |   4 +
 .../RecursiveParserWrapperFSConsumerTest.java      |   5 +-
 .../org/apache/tika/batch/fs/BatchDriverTest.java  |  41 ++
 .../org/apache/tika/batch/fs/FSBatchTestBase.java  |  37 +-
 .../test-input/system_exit/test0_system_exit.xml   |  25 ++
 .../test-input/system_exit/test1_system_exit.xml   |  25 ++
 .../test-input/system_exit/test2_system_exit.xml   |  25 ++
 .../resources/test-input/system_exit/test3_ok.xml  |  23 +
 .../resources/test-input/system_exit/test4_ok.xml  |  23 +
 .../resources/test-input/system_exit/test5_ok.xml  |  23 +
 .../thread_interrupt/test0_thread_interrupt.xml    |  24 +
 .../thread_interrupt/test1_thread_interrupt.xml    |  24 +
 .../thread_interrupt/test2_thread_interrupt.xml    |  24 +
 .../test-input/thread_interrupt/test3_ok.xml       |  23 +
 .../test-input/thread_interrupt/test4_ok.xml       |  23 +
 .../test-input/thread_interrupt/test5_ok.xml       |  23 +
 tika-bundle/pom.xml                                |  11 +-
 .../test/java/org/apache/tika/bundle/BundleIT.java |   5 +-
 tika-core/pom.xml                                  |   1 +
 .../apache/tika/detect/TrainedModelDetector.java   |   3 +-
 .../main/java/org/apache/tika/fork/ForkClient.java | 146 ++++--
 .../main/java/org/apache/tika/fork/ForkParser.java | 162 ++++++-
 .../main/java/org/apache/tika/fork/ForkServer.java | 202 ++++++---
 .../org/apache/tika/fork/ParserFactoryFactory.java |  56 +++
 .../fork/RecursiveMetadataContentHandlerProxy.java | 147 ++++++
 .../RecursiveMetadataContentHandlerResource.java   |  87 ++++
 .../TimeoutLimits.java}                            |  39 +-
 .../java/org/apache/tika/io/TikaInputStream.java   |  11 +-
 .../org/apache/tika/mime/MediaTypeRegistry.java    |   5 +-
 .../main/java/org/apache/tika/mime/MimeTypes.java  |   5 +-
 .../tika/parser/AutoDetectParserFactory.java       |  63 +++
 .../ParserFactory.java}                            |  33 +-
 .../apache/tika/parser/RecursiveParserWrapper.java |  20 +-
 .../sax/AbstractRecursiveParserWrapperHandler.java |   8 +-
 .../org/apache/tika/sax/ContentHandlerFactory.java |   3 +-
 .../tika/sax/RecursiveParserWrapperHandler.java    |   4 +-
 .../org/apache/tika/sax/TaggedSAXException.java    |   1 -
 .../org/apache/tika/sax/XHTMLContentHandler.java   |   7 +-
 .../java/org/apache/tika/utils/ParserUtils.java    |   2 +-
 .../java/org/apache/tika/utils/ProcessUtils.java   |  25 ++
 .../java/org/apache/tika/utils/SystemUtils.java    |  61 +++
 .../org/apache/tika/MultiThreadedTikaTest.java     |  81 ++--
 .../java/org/apache/tika/fork/ForkParserTest.java  | 387 +++++++++++++++-
 .../apache/tika/fork/ForkParserTikaBinTest.java    | 228 ++++++++++
 .../tika/fork/UpperCasingContentHandler.java       |  23 +
 .../org/apache/tika/mime/MimeTypesReaderTest.java  |  22 +
 .../org/apache/tika/parser/mock/MockParser.java    |  26 +-
 .../apache/tika/parser/mock/MockParserFactory.java |  66 +++
 .../org/apache/tika/parser/mock/VowelParser.java   |  60 +++
 .../apache/tika/sax/BodyContentHandlerTest.java    |  34 +-
 .../tika/config/TIKA-2653-vowel-parser-ae.xml      |  26 ++
 .../tika/config/TIKA-2653-vowel-parser-iou.xml     |  26 ++
 .../resources/test-documents/basic_embedded.xml    |  35 ++
 .../resources/test-documents/embedded_then_npe.xml |  36 ++
 .../resources/test-documents/embedded_with_npe.xml |  37 ++
 .../src/test/resources/test-documents/example.xml  |  47 ++
 tika-dl/pom.xml                                    |   2 +-
 .../tika/eval/tokens/CommonTokenCountManager.java  |   6 +-
 tika-langdetect/pom.xml                            |  23 +
 tika-parent/pom.xml                                |   3 +-
 tika-parsers/pom.xml                               |  30 +-
 .../org/apache/tika/parser/audio/AudioParser.java  |  16 +
 .../tika/parser/html/HtmlEncodingDetector.java     |   8 +-
 .../org/apache/tika/parser/html/HtmlHandler.java   |   3 +
 .../org/apache/tika/parser/html/HtmlParser.java    |  25 ++
 .../parser/html/StrictHtmlEncodingDetector.java    | 491 +++++++++++++++++++++
 .../java/org/apache/tika/parser/mat/MatParser.java |   1 +
 .../apache/tika/parser/odf/OpenDocumentParser.java |   7 +-
 .../tika/parser/pkg/ZipContainerDetector.java      | 100 +++--
 .../org/apache/tika/parser/rtf/TextExtractor.java  |  19 +-
 .../tika/parser/html/whatwg-encoding-labels.tsv    | 234 ++++++++++
 .../tika/detect/TestContainerAwareDetector.java    |  15 +
 .../tika/parser/RecursiveParserWrapperTest.java    |   7 +-
 .../apache/tika/parser/chm/TestChmExtraction.java  |   6 +-
 .../parser/fork/ForkParserIntegrationTest.java     |  46 +-
 .../tika/parser/html/HtmlEncodingDetectorTest.java | 142 ++++++
 .../apache/tika/parser/html/HtmlParserTest.java    |  21 +-
 .../html/StrictHtmlEncodingDetectorTest.java       | 300 +++++++++++++
 .../org/apache/tika/parser/mat/MatParserTest.java  |   7 +-
 .../apache/tika/parser/mbox/MboxParserTest.java    |  16 +-
 .../tika/parser/microsoft/JackcessParserTest.java  |   3 +-
 .../tika/parser/microsoft/OutlookParserTest.java   |   4 +-
 .../parser/microsoft/ooxml/OOXMLParserTest.java    |   6 +
 .../org/apache/tika/parser/odf/ODFParserTest.java  |  23 +
 .../tika/parser/ner/opennlp/ModelGetter.groovy     |  24 +-
 .../resources/test-documents/mock/system_exit.xml  |  25 ++
 .../test-documents/mock/thread_interrupt.xml       |  25 ++
 .../test-documents/testODTnotaZipFile.odt          |   1 +
 .../src/test/resources/test-documents/testPST.pst  | Bin 271360 -> 271360 bytes
 .../test-documents/testPST_variousBodyTypes.pst    | Bin 271360 -> 271360 bytes
 .../test-documents/testZIP_corrupted_oom.zip       | Bin 0 -> 181 bytes
 .../metadata/serialization/JsonMetadataList.java   |  15 +
 .../serialization/JsonStreamingSerializer.java     |  65 +++
 .../serialization/JsonMetadataListTest.java        |  46 ++
 tika-server/pom.xml                                |  12 +
 .../apache/tika/server/resource/TikaResource.java  |  17 +-
 .../java/org/apache/tika/server/CXFTestBase.java   |   4 +-
 .../org/apache/tika/server/TikaParsersTest.java    |  69 +--
 .../org/apache/tika/server/TikaResourceTest.java   |  38 ++
 .../tika/server/tika-config-for-server-tests.xml   |  29 ++
 .../src/test/resources/testPDFTwoTextBoxes.pdf     | Bin 0 -> 57100 bytes
 107 files changed, 4317 insertions(+), 493 deletions(-)

Reply via email to