Merge branch '2.x' of https://git-wip-us.apache.org/repos/asf/tika into 2.x
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/573527bb Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/573527bb Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/573527bb Branch: refs/heads/2.x Commit: 573527bbc608d495c40f26c02c7286197c3c723b Parents: bd3ecfc 2a7e52e Author: Lewis John McGibbney <[email protected]> Authored: Thu Jun 30 12:32:01 2016 -0700 Committer: Lewis John McGibbney <[email protected]> Committed: Thu Jun 30 12:32:01 2016 -0700 ---------------------------------------------------------------------- CHANGES.txt | 36 +- tika-app/pom.xml | 8 + .../org/apache/tika/mime/TestMimeTypes.java | 33 +- .../tika/parser/AutoDetectParserTest.java | 24 +- .../ConfigurableThreadPoolExecutor.java | 64 +- .../concurrent/SimpleThreadPoolExecutor.java | 80 +- .../apache/tika/detect/AbstractDetector.java | 86 +- .../org/apache/tika/detect/DetectorProxy.java | 134 +- .../tika/detect/EncodingDetectorProxy.java | 82 +- .../java/org/apache/tika/io/EndianUtils.java | 830 ++--- .../java/org/apache/tika/io/StringUtil.java | 242 +- .../tika/metadata/TikaCoreProperties.java | 7 + .../tika/osgi/TikaAbstractBundleActivator.java | 142 +- .../java/org/apache/tika/osgi/TikaService.java | 50 +- .../tika/osgi/internal/TikaServiceImpl.java | 162 +- .../org/apache/tika/parser/AbstractParser.java | 24 - .../org/apache/tika/parser/ParserProxy.java | 148 +- .../org/apache/tika/utils/ConcurrentUtils.java | 114 +- .../org/apache/tika/mime/tika-mimetypes.xml | 54 +- .../java/org/apache/tika/TikaDetectionTest.java | 2 +- .../src/test/java/org/apache/tika/TikaTest.java | 6 +- .../org/apache/tika/config/DummyExecutor.java | 60 +- .../apache/tika/detect/DetectorProxyTest.java | 112 +- .../apache/tika/detect/DummyProxyDetector.java | 62 +- .../org/apache/tika/io/EndianUtilsTest.java | 35 + .../apache/tika/parser/DummyProxyParser.java | 88 +- .../org/apache/tika/parser/ParserProxyTest.java | 130 +- .../apache/tika/utils/ConcurrentUtilsTest.java | 126 +- .../services/org.apache.tika.parser.Parser | 34 +- .../apache/tika/config/TIKA-1762-executors.xml | 56 +- tika-parent/pom.xml | 9 + tika-parser-bundles/pom.xml | 350 +-- .../tika-parser-advanced-bundle/pom.xml | 162 +- .../tika-parser-cad-bundle/pom.xml | 144 +- .../tika-parser-code-bundle/pom.xml | 148 +- .../tika-parser-crypto-bundle/pom.xml | 156 +- .../tika-parser-database-bundle/pom.xml | 134 +- .../tika-parser-ebook-bundle/pom.xml | 142 +- .../tika-parser-journal-bundle/pom.xml | 158 +- .../apache/tika/module/journal/BundleIT.java | 2 +- .../tika-parser-multimedia-bundle/pom.xml | 168 +- .../tika-parser-office-bundle/pom.xml | 280 +- .../org/apache/tika/module/office/BundleIT.java | 24 +- .../tika-parser-package-bundle/pom.xml | 158 +- .../tika-parser-pdf-bundle/pom.xml | 197 +- .../org/apache/tika/module/pdf/BundleIT.java | 2 +- .../tika-parser-scientific-bundle/pom.xml | 402 +-- .../tika-parser-text-bundle/pom.xml | 156 +- .../tika-parser-web-bundle/pom.xml | 184 +- tika-parser-modules/pom.xml | 410 +-- .../tika-parser-advanced-module/pom.xml | 136 +- .../module/advanced/internal/Activator.java | 72 +- .../tika-parser-cad-module/pom.xml | 110 +- .../tika/module/cad/internal/Activator.java | 72 +- .../org/apache/tika/parser/dwg/DWGParser.java | 712 ++--- .../org/apache/tika/parser/prt/PRTParser.java | 555 ++-- .../apache/tika/parser/dwg/DWGParserTest.java | 372 ++- .../apache/tika/parser/prt/PRTParserTest.java | 214 +- .../tika-parser-code-module/pom.xml | 136 +- .../tika/module/code/internal/Activator.java | 72 +- .../org/apache/tika/parser/asm/ClassParser.java | 108 +- .../tika/parser/asm/XHTMLClassVisitor.java | 646 ++-- .../tika/parser/code/SourceCodeParser.java | 284 +- .../apache/tika/parser/asm/ClassParserTest.java | 118 +- .../tika/parser/code/SourceCodeParserTest.java | 202 +- .../tika-parser-crypto-module/pom.xml | 104 +- .../tika/module/crypto/internal/Activator.java | 72 +- .../tika/parser/crypto/Pkcs7ParserTest.java | 94 +- .../tika-parser-database-module/pom.xml | 132 +- .../module/database/internal/Activator.java | 72 +- .../tika-parser-ebook-module/pom.xml | 94 +- .../tika/module/ebook/internal/Activator.java | 72 +- .../tika/parser/epub/EpubContentParser.java | 118 +- .../org/apache/tika/parser/epub/EpubParser.java | 238 +- .../apache/tika/parser/epub/EpubParserTest.java | 116 +- .../tika-parser-journal-module/pom.xml | 134 +- .../tika/module/journal/internal/Activator.java | 72 +- .../tika-parser-multimedia-module/pom.xml | 206 +- .../module/multimedia/internal/Activator.java | 72 +- .../apache/tika/parser/audio/AudioParser.java | 278 +- .../apache/tika/parser/audio/MidiParser.java | 242 +- .../apache/tika/parser/font/TrueTypeParser.java | 222 +- .../parser/image/ImageMetadataExtractor.java | 1124 +++---- .../apache/tika/parser/image/ImageParser.java | 406 +-- .../tika/parser/image/MetadataFields.java | 168 +- .../apache/tika/parser/image/TiffParser.java | 136 +- .../org/apache/tika/parser/jpeg/JpegParser.java | 138 +- .../org/apache/tika/parser/mp3/AudioFrame.java | 504 ++-- .../tika/parser/mp3/CompositeTagHandler.java | 284 +- .../org/apache/tika/parser/mp3/ID3Tags.java | 508 ++-- .../apache/tika/parser/mp3/ID3v1Handler.java | 366 +-- .../apache/tika/parser/mp3/ID3v22Handler.java | 318 +- .../apache/tika/parser/mp3/ID3v23Handler.java | 276 +- .../apache/tika/parser/mp3/ID3v24Handler.java | 286 +- .../org/apache/tika/parser/mp3/ID3v2Frame.java | 848 +++--- .../apache/tika/parser/mp3/LyricsHandler.java | 312 +- .../org/apache/tika/parser/mp3/MP3Frame.java | 50 +- .../org/apache/tika/parser/mp3/Mp3Parser.java | 492 +-- .../tika/parser/ocr/TesseractOCRParser.java | 93 +- .../org/apache/tika/parser/video/FLVParser.java | 536 ++-- .../parser/ocr/TesseractOCRConfig.properties | 40 +- .../tika/parser/audio/AudioParserTest.java | 150 +- .../tika/parser/audio/MidiParserTest.java | 84 +- .../image/ImageMetadataExtractorTest.java | 278 +- .../tika/parser/image/ImageParserTest.java | 324 +- .../tika/parser/image/MetadataFieldsTest.java | 72 +- .../tika/parser/image/TiffParserTest.java | 132 +- .../apache/tika/parser/jpeg/JpegParserTest.java | 568 ++-- .../apache/tika/parser/mp3/Mp3ParserTest.java | 828 ++--- .../tika/parser/ocr/TesseractOCRConfigTest.java | 184 +- .../tika/parser/ocr/TesseractOCRParserTest.java | 527 ++-- .../apache/tika/parser/video/FLVParserTest.java | 88 +- .../tika-parser-office-module/pom.xml | 250 +- .../tika/module/office/internal/Activator.java | 72 +- .../parser/apple/AppleSingleFileParser.java | 204 ++ .../org/apache/tika/parser/chm/ChmParser.java | 224 +- .../tika/parser/chm/accessor/ChmAccessor.java | 78 +- .../chm/accessor/ChmDirectoryListingSet.java | 796 ++--- .../tika/parser/chm/accessor/ChmItsfHeader.java | 984 +++--- .../tika/parser/chm/accessor/ChmItspHeader.java | 1096 +++---- .../parser/chm/accessor/ChmLzxcControlData.java | 638 ++-- .../parser/chm/accessor/ChmLzxcResetTable.java | 682 ++--- .../tika/parser/chm/accessor/ChmPmgiHeader.java | 352 +-- .../tika/parser/chm/accessor/ChmPmglHeader.java | 412 +-- .../chm/accessor/DirectoryListingEntry.java | 302 +- .../tika/parser/chm/assertion/ChmAssert.java | 338 +-- .../apache/tika/parser/chm/core/ChmCommons.java | 722 ++--- .../tika/parser/chm/core/ChmConstants.java | 204 +- .../tika/parser/chm/core/ChmExtractor.java | 784 ++--- .../apache/tika/parser/chm/core/ChmWrapper.java | 294 +- .../chm/exception/ChmParsingException.java | 54 +- .../tika/parser/chm/lzx/ChmBlockInfo.java | 470 +-- .../apache/tika/parser/chm/lzx/ChmLzxBlock.java | 1826 +++++------ .../apache/tika/parser/chm/lzx/ChmLzxState.java | 654 ++-- .../apache/tika/parser/chm/lzx/ChmSection.java | 444 +-- .../org/apache/tika/parser/mbox/MboxParser.java | 418 +-- .../tika/parser/mbox/OutlookPSTParser.java | 406 +-- .../microsoft/AbstractPOIFSExtractor.java | 32 +- .../tika/parser/microsoft/HSLFExtractor.java | 18 +- .../parser/microsoft/JackcessExtractor.java | 4 +- .../parser/microsoft/MSOwnerFileParser.java | 80 + .../tika/parser/microsoft/OfficeParser.java | 2 +- .../tika/parser/microsoft/WordExtractor.java | 22 +- .../microsoft/ooxml/AbstractOOXMLExtractor.java | 12 +- .../microsoft/xml/AbstractXML2003Parser.java | 93 +- .../microsoft/xml/SpreadsheetMLParser.java | 42 +- .../tika/parser/microsoft/xml/WordMLParser.java | 121 +- .../parser/odf/NSNormalizerContentHandler.java | 198 +- .../parser/odf/OpenDocumentContentParser.java | 992 +++--- .../tika/parser/odf/OpenDocumentMetaParser.java | 398 +-- .../tika/parser/odf/OpenDocumentParser.java | 450 +-- .../org/apache/tika/parser/opc/OPCDetector.java | 310 +- .../parser/opendocument/OpenOfficeParser.java | 56 +- .../org/apache/tika/parser/rtf/GroupState.java | 134 +- .../apache/tika/parser/rtf/ListDescriptor.java | 70 +- .../tika/parser/rtf/RTFEmbObjHandler.java | 7 +- .../tika/parser/rtf/RTFObjDataParser.java | 43 +- .../org/apache/tika/parser/rtf/RTFParser.java | 186 +- .../apache/tika/parser/rtf/TextExtractor.java | 2853 +++++++++--------- .../services/org.apache.tika.parser.Parser | 3 +- .../parser/apple/AppleSingleFileParserTest.java | 43 + .../tika/parser/chm/TestChmBlockInfo.java | 250 +- .../tika/parser/chm/TestChmExtraction.java | 424 +-- .../tika/parser/chm/TestChmExtractor.java | 126 +- .../tika/parser/chm/TestChmItsfHeader.java | 244 +- .../tika/parser/chm/TestChmItspHeader.java | 320 +- .../apache/tika/parser/chm/TestChmLzxState.java | 202 +- .../tika/parser/chm/TestChmLzxcControlData.java | 288 +- .../tika/parser/chm/TestChmLzxcResetTable.java | 312 +- .../parser/chm/TestDirectoryListingEntry.java | 170 +- .../apache/tika/parser/chm/TestParameters.java | 208 +- .../apache/tika/parser/chm/TestPmgiHeader.java | 90 +- .../apache/tika/parser/chm/TestPmglHeader.java | 152 +- .../apache/tika/parser/dbf/DBFParserTest.java | 2 + .../apache/tika/parser/mbox/MboxParserTest.java | 312 +- .../tika/parser/mbox/OutlookPSTParserTest.java | 220 +- .../AbstractPOIContainerExtractionTest.java | 150 +- .../tika/parser/microsoft/ExcelParserTest.java | 817 ++--- .../parser/microsoft/MSOwnerFileParserTest.java | 31 + .../tika/parser/microsoft/OfficeParserTest.java | 92 +- .../parser/microsoft/OutlookParserTest.java | 478 +-- .../microsoft/POIContainerExtractionTest.java | 764 ++--- .../parser/microsoft/PowerPointParserTest.java | 492 +-- .../parser/microsoft/PublisherParserTest.java | 106 +- .../tika/parser/microsoft/TNEFParserTest.java | 196 +- .../tika/parser/microsoft/VisioParserTest.java | 102 +- .../tika/parser/microsoft/WordParserTest.java | 1011 ++++--- .../ooxml/OOXMLContainerExtractionTest.java | 2 +- .../parser/microsoft/ooxml/OOXMLParserTest.java | 27 + .../parser/microsoft/xml/XML2003ParserTest.java | 40 +- .../apache/tika/parser/odf/ODFParserTest.java | 680 ++--- .../apache/tika/parser/rtf/RTFParserTest.java | 1050 ++++--- .../tika-parser-package-module/pom.xml | 150 +- .../tika/module/pkg/internal/Activator.java | 72 +- .../tika/parser/iwork/AutoPageNumberUtils.java | 224 +- .../tika/parser/iwork/IWorkPackageParser.java | 438 +-- .../parser/iwork/KeynoteContentHandler.java | 348 +-- .../parser/iwork/NumbersContentHandler.java | 462 +-- .../tika/parser/iwork/PagesContentHandler.java | 896 +++--- .../apache/tika/parser/pkg/PackageParser.java | 574 ++-- .../tika/parser/pkg/ZipContainerDetector.java | 648 ++-- .../parser/iwork/AutoPageNumberUtilsTest.java | 156 +- .../tika/parser/iwork/IWorkParserTest.java | 932 +++--- .../apache/tika/parser/pkg/AbstractPkgTest.java | 186 +- .../apache/tika/parser/pkg/Bzip2ParserTest.java | 178 +- .../apache/tika/parser/pkg/GzipParserTest.java | 204 +- .../apache/tika/parser/pkg/TarParserTest.java | 210 +- .../apache/tika/parser/pkg/ZipParserTest.java | 384 +-- .../tika-parser-pdf-module/pom.xml | 246 +- .../tika/module/pdf/internal/Activator.java | 72 +- .../tika/parser/pdf/AbstractPDF2XHTML.java | 579 ++++ .../org/apache/tika/parser/pdf/OCR2XHTML.java | 125 + .../org/apache/tika/parser/pdf/PDF2XHTML.java | 518 +--- .../org/apache/tika/parser/pdf/PDFParser.java | 8 + .../apache/tika/parser/pdf/PDFParserConfig.java | 274 +- .../apache/tika/parser/pdf/PDFParser.properties | 10 +- .../apache/tika/parser/pdf/PDFParserTest.java | 49 +- .../tika-parser-scientific-module/pom.xml | 270 +- .../module/scientific/internal/Activator.java | 72 +- .../org/apache/tika/parser/hdf/HDFParser.java | 244 +- .../apache/tika/parser/hdf/HDFParserTest.java | 144 +- .../tika/parser/netcdf/NetCDFParserTest.java | 122 +- .../tika-parser-text-module/pom.xml | 132 +- .../tika/module/text/internal/Activator.java | 40 +- .../apache/tika/parser/txt/CharsetDetector.java | 1088 +++---- .../apache/tika/parser/txt/CharsetMatch.java | 572 ++-- .../tika/parser/txt/CharsetRecog_2022.java | 326 +- .../tika/parser/txt/CharsetRecog_UTF8.java | 198 +- .../tika/parser/txt/CharsetRecog_Unicode.java | 278 +- .../tika/parser/txt/CharsetRecog_mbcs.java | 1064 +++---- .../tika/parser/txt/CharsetRecog_sbcs.java | 2706 ++++++++--------- .../tika/parser/txt/CharsetRecognizer.java | 108 +- .../org/apache/tika/parser/txt/TXTParser.java | 196 +- .../parser/xml/AbstractMetadataHandler.java | 186 +- .../xml/AttributeDependantMetadataHandler.java | 164 +- .../parser/xml/AttributeMetadataHandler.java | 122 +- .../org/apache/tika/parser/xml/DcXMLParser.java | 120 +- .../tika/parser/xml/ElementMetadataHandler.java | 510 ++-- .../tika/parser/xml/FictionBookParser.java | 234 +- .../apache/tika/parser/xml/MetadataHandler.java | 170 +- .../org/apache/tika/parser/xml/XMLParser.java | 178 +- .../apache/tika/parser/txt/TXTParserTest.java | 548 ++-- .../apache/tika/parser/xml/DcXMLParserTest.java | 174 +- .../EmptyAndDuplicateElementsXMLParserTest.java | 232 +- .../tika/parser/xml/FictionBookParserTest.java | 108 +- .../tika-parser-web-module/pom.xml | 178 +- .../tika/module/web/internal/Activator.java | 72 +- .../org/apache/tika/parser/feed/FeedParser.java | 254 +- .../parser/html/BoilerpipeContentHandler.java | 694 ++--- .../tika/parser/html/DefaultHtmlMapper.java | 274 +- .../apache/tika/parser/html/HtmlHandler.java | 618 ++-- .../org/apache/tika/parser/html/HtmlMapper.java | 138 +- .../org/apache/tika/parser/html/HtmlParser.java | 388 +-- .../tika/parser/html/IdentityHtmlMapper.java | 86 +- .../tika/parser/html/XHTMLDowngradeHandler.java | 156 +- .../tika/parser/mail/MailContentHandler.java | 752 ++--- .../apache/tika/parser/mail/RFC822Parser.java | 190 +- .../apache/tika/parser/feed/FeedParserTest.java | 150 +- .../apache/tika/parser/html/HtmlParserTest.java | 2262 +++++++------- .../tika/parser/mail/RFC822ParserTest.java | 970 +++--- .../tika/parser/xmp/JempboxExtractor.java | 30 + .../tika/parser/xmp/JempboxExtractorTest.java | 29 +- .../test-documents/testAppleSingleFile.pdf | Bin 0 -> 1893 bytes .../test/resources/test-documents/testDJVU.djvu | Bin 0 -> 89 bytes .../test-documents/testEXCEL_embeddedPDF.xls | Bin 0 -> 38400 bytes .../test-documents/testEXCEL_embeddedPDF.xlsx | Bin 0 -> 25602 bytes .../test-documents/testEndNoteImportFile.enw | 10 + .../resources/test-documents/testICalendar.ics | 15 + .../resources/test-documents/testMSOwnerFile | Bin 0 -> 162 bytes .../test-documents/testPPT_embeddedPDF.ppt | Bin 0 -> 187392 bytes .../test-documents/testPPT_embeddedPDF.pptx | Bin 0 -> 108637 bytes .../resources/test-documents/testVCalendar.vcs | 10 + .../test-documents/testWindowsMediaMeta.asx | 6 + .../test/resources/test-documents/testXMP.xmp | 178 ++ .../test-documents/test_recursive_embedded.doc | Bin 0 -> 31744 bytes 275 files changed, 39074 insertions(+), 37550 deletions(-) ----------------------------------------------------------------------
