Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/tika into TIKA-1872
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/da1fe243 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/da1fe243 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/da1fe243 Branch: refs/heads/master Commit: da1fe243ace68cc2b5c4bde022113233ad95ea15 Parents: 3279a11 52851a4 Author: Chris Mattmann <[email protected]> Authored: Mon Apr 18 19:06:47 2016 -0700 Committer: Chris Mattmann <[email protected]> Committed: Mon Apr 18 19:06:47 2016 -0700 ---------------------------------------------------------------------- CHANGES.txt | 31 +- .../tika/cli/BatchCommandLineBuilder.java | 7 - .../main/resources/tika-app-batch-config.xml | 10 +- .../tika/cli/TikaCLIBatchCommandLineTest.java | 1 - tika-batch/pom.xml | 2 +- .../batch/builders/BatchProcessBuilder.java | 15 +- .../builders/CommandLineParserBuilder.java | 16 +- .../apache/tika/batch/fs/FSBatchProcessCLI.java | 4 +- .../builders/BasicTikaFSConsumersBuilder.java | 51 ++- .../tika/batch/fs/default-tika-batch-config.xml | 50 +- .../apache/tika/batch/fs/BatchProcessTest.java | 19 +- .../tika/batch/fs/HandlerBuilderTest.java | 4 - .../tika-batch-config-MockConsumersBuilder.xml | 2 +- .../test/resources/tika-batch-config-broken.xml | 2 +- .../tika-batch-config-test-suffix-override.xml | 112 +++++ .../test/resources/tika-batch-config-test.xml | 2 +- .../java/org/apache/tika/config/TikaConfig.java | 14 +- .../tika/config/TikaConfigSerializer.java | 4 +- .../java/org/apache/tika/fork/ForkClient.java | 10 +- .../tika/metadata/TikaCoreProperties.java | 9 + .../org/apache/tika/mime/MimeTypesReader.java | 20 +- .../org/apache/tika/parser/ParseContext.java | 142 +++++- .../org/apache/tika/parser/ParserDecorator.java | 35 +- .../external/ExternalParsersConfigReader.java | 11 +- .../tika/sax/BasicContentHandlerFactory.java | 8 + .../src/main/java/org/apache/tika/sax/Link.java | 4 + .../java/org/apache/tika/sax/LinkBuilder.java | 6 +- .../org/apache/tika/sax/LinkContentHandler.java | 18 +- .../org/apache/tika/mime/tika-mimetypes.xml | 16 +- .../src/test/java/org/apache/tika/TikaTest.java | 59 ++- .../org/apache/tika/parser/mock/MockParser.java | 12 +- .../apache/tika/sax/LinkContentHandlerTest.java | 36 +- .../org/apache/tika/example/ParsingExample.java | 14 +- tika-parent/pom.xml | 5 +- tika-parsers/pom.xml | 22 +- .../tika/parser/epub/EpubContentParser.java | 33 +- .../parser/executable/ExecutableParser.java | 2 +- .../geoinfo/GeographicInformationParser.java | 30 +- .../parser/image/ImageMetadataExtractor.java | 2 + .../tika/parser/isatab/ISArchiveParser.java | 54 ++- .../tika/parser/jdbc/AbstractDBParser.java | 13 +- .../tika/parser/jdbc/JDBCTableReader.java | 68 ++- .../tika/parser/jdbc/SQLite3DBParser.java | 31 +- .../apache/tika/parser/jdbc/SQLite3Parser.java | 6 +- .../tika/parser/jdbc/SQLite3TableReader.java | 45 +- .../org/apache/tika/parser/mat/MatParser.java | 27 +- .../tika/parser/microsoft/OfficeParser.java | 3 +- .../microsoft/POIFSContainerDetector.java | 21 +- .../microsoft/ooxml/AbstractOOXMLExtractor.java | 22 +- .../ooxml/XSSFExcelExtractorDecorator.java | 11 +- .../apache/tika/parser/netcdf/NetCDFParser.java | 8 +- .../parser/odf/OpenDocumentContentParser.java | 37 +- .../tika/parser/odf/OpenDocumentParser.java | 62 ++- .../org/apache/tika/parser/pdf/PDF2XHTML.java | 66 ++- .../org/apache/tika/parser/pdf/PDFParser.java | 54 ++- .../apache/tika/parser/pdf/PDFParserConfig.java | 32 ++ .../apache/tika/parser/pdf/XFAExtractor.java | 30 +- .../apache/tika/parser/pdf/PDFParser.properties | 3 +- .../parser/executable/ExecutableParserTest.java | 73 ++- .../GeographicInformationParserTest.java | 48 +- .../tika/parser/jdbc/SQLite3ParserTest.java | 106 +++-- .../apache/tika/parser/jpeg/JpegParserTest.java | 21 +- .../AbstractPOIContainerExtractionTest.java | 4 +- .../microsoft/POIContainerExtractionTest.java | 35 +- .../ooxml/OOXMLContainerExtractionTest.java | 23 +- .../parser/microsoft/ooxml/OOXMLParserTest.java | 9 +- .../apache/tika/parser/odf/ODFParserTest.java | 10 +- .../apache/tika/parser/pdf/PDFParserTest.java | 453 +++++++------------ .../testMSChart-govdocs-428996.ppt | Bin 0 -> 41472 bytes .../testMSChart-govdocs-428996.pptx | Bin 0 -> 56224 bytes .../testMSChart-govdocs-428996.xls | Bin 0 -> 35328 bytes .../testMSChart-govdocs-428996.xlsx | Bin 0 -> 17112 bytes .../resources/test-documents/testODTNoMeta.odt | Bin 0 -> 5847 bytes .../test-documents/testPDF_bad_page_303226.pdf | Bin 0 -> 138027 bytes .../resources/test-documents/testSqlite3b.db | Bin 27648 -> 27648 bytes tika-serialization/pom.xml | 2 +- .../org/apache/tika/server/CXFTestBase.java | 26 +- 77 files changed, 1383 insertions(+), 860 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/da1fe243/tika-parent/pom.xml ---------------------------------------------------------------------- diff --cc tika-parent/pom.xml index 9db770a,9ea75aa..f2ae819 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@@ -300,9 -300,9 +300,10 @@@ <properties> <maven.compiler.source>1.7</maven.compiler.source> <maven.compiler.target>1.7</maven.compiler.target> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.reporting.outputEncoding>${project.build.sourceEncoding}</project.reporting.outputEncoding> - <commons.compress.version>1.10</commons.compress.version> + <!-- NOTE: sync tukaani version with commons-compress in tika-parsers --> + <commons.compress.version>1.11</commons.compress.version> <commons.io.version>2.4</commons.io.version> <slf4j.version>1.7.12</slf4j.version> </properties>
