Merge remote-tracking branch 'upstream/master' into TIKA-1913
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/80b27e6d Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/80b27e6d Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/80b27e6d Branch: refs/heads/master Commit: 80b27e6d843d221de87591d34632e546c0d23b1a Parents: f39c087 0dc29d0 Author: manali <[email protected]> Authored: Thu Apr 21 12:15:26 2016 -0700 Committer: manali <[email protected]> Committed: Thu Apr 21 12:15:26 2016 -0700 ---------------------------------------------------------------------- CHANGES.txt | 25 +- .../batch/builders/BatchProcessBuilder.java | 15 +- .../builders/CommandLineParserBuilder.java | 16 +- .../java/org/apache/tika/config/TikaConfig.java | 14 +- .../tika/config/TikaConfigSerializer.java | 4 +- .../tika/metadata/TikaCoreProperties.java | 9 + .../org/apache/tika/mime/MimeTypesReader.java | 20 +- .../org/apache/tika/parser/ParseContext.java | 142 ++++++- .../external/ExternalParsersConfigReader.java | 11 +- .../src/main/java/org/apache/tika/sax/Link.java | 4 + .../java/org/apache/tika/sax/LinkBuilder.java | 6 +- .../org/apache/tika/sax/LinkContentHandler.java | 18 +- .../org/apache/tika/mime/tika-mimetypes.xml | 38 +- .../org/apache/tika/parser/mock/MockParser.java | 12 +- .../apache/tika/sax/LinkContentHandlerTest.java | 36 +- tika-parent/pom.xml | 3 +- tika-parsers/pom.xml | 12 +- .../tika/parser/epub/EpubContentParser.java | 33 +- .../org/apache/tika/parser/epub/EpubParser.java | 8 +- .../tika/parser/microsoft/WordExtractor.java | 11 +- .../ooxml/XSSFExcelExtractorDecorator.java | 11 +- .../parser/odf/OpenDocumentContentParser.java | 37 +- .../org/apache/tika/parser/pdf/PDF2XHTML.java | 66 +++- .../org/apache/tika/parser/pdf/PDFParser.java | 54 ++- .../apache/tika/parser/pdf/PDFParserConfig.java | 32 ++ .../apache/tika/parser/pdf/XFAExtractor.java | 30 +- .../tika/parser/pot/PooledTimeSeriesParser.java | 394 ++++++++++--------- .../apache/tika/parser/pdf/PDFParser.properties | 3 +- .../org/apache/tika/mime/TestMimeTypes.java | 7 +- .../apache/tika/parser/pdf/PDFParserTest.java | 39 ++ .../test/resources/test-documents/testHFA.hfa | Bin 0 -> 1024 bytes .../test-documents/testPDF_bad_page_303226.pdf | Bin 0 -> 138027 bytes 32 files changed, 725 insertions(+), 385 deletions(-) ----------------------------------------------------------------------
