move test files to parser-modules
Project: http://git-wip-us.apache.org/repos/asf/tika/repo Commit: http://git-wip-us.apache.org/repos/asf/tika/commit/38916f89 Tree: http://git-wip-us.apache.org/repos/asf/tika/tree/38916f89 Diff: http://git-wip-us.apache.org/repos/asf/tika/diff/38916f89 Branch: refs/heads/2.x-test-doc-reshuffle Commit: 38916f89c179ec9d2fc3e425edc4b87e1470167e Parents: 18d0285 Author: tballison <[email protected]> Authored: Fri Feb 19 15:23:10 2016 -0500 Committer: tballison <[email protected]> Committed: Fri Feb 19 15:24:06 2016 -0500 ---------------------------------------------------------------------- pom.xml | 1 - tika-parser-modules/pom.xml | 11 +- .../tika/parser/ner/opennlp/ModelGetter.groovy | 93 + .../tika/parser/ner/opennlp/get-models.sh | 26 + .../apache/tika/parser/ner/regex/ner-regex.txt | 17 + .../org/apache/tika/parser/ner/tika-config.xml | 27 + .../resources/test-documents/testCADKEY.prt | Bin 0 -> 10246 bytes .../resources/test-documents/testCADKEY2.prt | Bin 0 -> 41664 bytes .../resources/test-documents/testDWG2000.dwg | Bin 0 -> 675048 bytes .../resources/test-documents/testDWG2004.dwg | Bin 0 -> 39335 bytes .../test-documents/testDWG2004_no_header.dwg | Bin 0 -> 38178 bytes .../resources/test-documents/testDWG2007.dwg | Bin 0 -> 73088 bytes .../resources/test-documents/testDWG2010.dwg | Bin 0 -> 59562 bytes .../test-documents/testDWG2010_custom_props.dwg | Bin 0 -> 73791 bytes .../test-documents/testDWGmech2004.dwg | Bin 0 -> 439438 bytes .../test-documents/testDWGmech2004DX.dwg | Bin 0 -> 439563 bytes .../test-documents/testDWGmech2005.dwg | Bin 0 -> 439627 bytes .../test-documents/testDWGmech2006.dwg | Bin 0 -> 439982 bytes .../test-documents/testDWGmech2007.dwg | Bin 0 -> 479904 bytes .../test-documents/testDWGmech2008.dwg | Bin 0 -> 487456 bytes .../test-documents/testDWGmech2009.dwg | Bin 0 -> 483840 bytes .../test-documents/testDWGmech2010.dwg | Bin 0 -> 467023 bytes .../test-documents/testDWGmech2011.dwg | Bin 0 -> 466891 bytes .../resources/test-documents/testDWGmech6.dwg | Bin 0 -> 687882 bytes .../test/resources/test-documents/testCPP.cpp | 14 + .../resources/test-documents/testGROOVY.groovy | 9 + .../test/resources/test-documents/testJAVA.java | 14 + .../resources/test-documents/testLinux-x86-32 | Bin 0 -> 7175 bytes .../test-documents/testWindows-x86-32.exe | Bin 0 -> 11723 bytes .../resources/test-documents/testSqlite3b.db | Bin 0 -> 27648 bytes .../test/resources/test-documents/testEPUB.epub | Bin 0 -> 29719 bytes .../resources/test-documents/testiBooks.ibooks | Bin 0 -> 970636 bytes .../test-documents/testJournalParser.pdf | Bin 0 -> 985125 bytes .../src/test/resources/test-documents/test2.swf | Bin 0 -> 42534 bytes .../test/resources/test-documents/testAFM.afm | 50 + .../test/resources/test-documents/testAIFF.aif | Bin 0 -> 3894 bytes .../src/test/resources/test-documents/testAU.au | Bin 0 -> 3868 bytes .../test/resources/test-documents/testBMP.bmp | Bin 0 -> 22554 bytes .../test/resources/test-documents/testBPG.bpg | Bin 0 -> 1824 bytes .../resources/test-documents/testBPG_GEO.bpg | Bin 0 -> 2042 bytes .../test-documents/testBPG_commented.bpg | Bin 0 -> 10281 bytes .../testBPG_commented_xnviewmp026.bpg | Bin 0 -> 12374 bytes .../test/resources/test-documents/testFLV.flv | Bin 0 -> 90580 bytes .../test/resources/test-documents/testGIF.gif | Bin 0 -> 8495 bytes .../test/resources/test-documents/testJPEG.jpg | Bin 0 -> 7686 bytes .../resources/test-documents/testJPEG_EXIF.jpg | Bin 0 -> 16357 bytes .../testJPEG_EXIF_emptyDateTime.jpg | Bin 0 -> 24597 bytes .../resources/test-documents/testJPEG_GEO.jpg | Bin 0 -> 16482 bytes .../resources/test-documents/testJPEG_GEO_2.jpg | Bin 0 -> 20844 bytes .../test-documents/testJPEG_commented.jpg | Bin 0 -> 13325 bytes .../testJPEG_commented_pspcs2mac.jpg | Bin 0 -> 26173 bytes .../testJPEG_commented_xnviewmp026.jpg | Bin 0 -> 13910 bytes .../test-documents/testJPEG_oddTagComponent.jpg | Bin 0 -> 8330 bytes .../test/resources/test-documents/testMID.mid | Bin 0 -> 322 bytes .../resources/test-documents/testMP3i18n.mp3 | Bin 0 -> 40832 bytes .../resources/test-documents/testMP3id3v1.mp3 | Bin 0 -> 39416 bytes .../test-documents/testMP3id3v1_v2.mp3 | Bin 0 -> 40960 bytes .../resources/test-documents/testMP3id3v2.mp3 | Bin 0 -> 39577 bytes .../resources/test-documents/testMP3id3v24.mp3 | Bin 0 -> 39471 bytes .../resources/test-documents/testMP3lyrics.mp3 | Bin 0 -> 34688 bytes .../resources/test-documents/testMP3noid3.mp3 | Bin 0 -> 39288 bytes .../test-documents/testMP3truncated.mp3 | Bin 0 -> 65536 bytes .../test/resources/test-documents/testMP4.m4a | Bin 0 -> 4770 bytes .../test-documents/testNakedUTF16BOM.mp3 | Bin 0 -> 2625 bytes .../test/resources/test-documents/testOCR.docx | Bin 0 -> 62041 bytes .../test/resources/test-documents/testOCR.jpg | Bin 0 -> 3408 bytes .../test/resources/test-documents/testOCR.pdf | Bin 0 -> 41936 bytes .../test/resources/test-documents/testOCR.pptx | Bin 0 -> 78550 bytes .../test/resources/test-documents/testPNG.png | Bin 0 -> 17041 bytes .../test/resources/test-documents/testPSD.psd | Bin 0 -> 69410 bytes .../test/resources/test-documents/testPSD2.psd | Bin 0 -> 31315 bytes .../test-documents/testRFC822-multipart | 111 + .../test/resources/test-documents/testTIFF.tif | Bin 0 -> 25584 bytes .../resources/test-documents/testTrueType3.ttf | Bin 0 -> 224592 bytes .../test/resources/test-documents/testWAV.wav | Bin 0 -> 3884 bytes .../test/resources/test-documents/testWEBP.webp | Bin 0 -> 3442 bytes .../test-documents/testWebp_Alpha_Lossless.webp | Bin 0 -> 92312 bytes .../test-documents/testWebp_Alpha_Lossy.webp | Bin 0 -> 23404 bytes .../TesseractOCRConfig-full.properties | 22 + .../TesseractOCRConfig-partial.properties | 18 + .../test/resources/test-documents/Doc1_ole.doc | Bin 0 -> 89600 bytes .../test-documents/EmbeddedDocument.docx | Bin 0 -> 13219 bytes .../test-documents/EmbeddedOutlook.docx | Bin 0 -> 113242 bytes .../resources/test-documents/EmbeddedPDF.docx | Bin 0 -> 99389 bytes .../resources/test-documents/NullHeader.docx | Bin 0 -> 4355 bytes .../resources/test-documents/chm/IMJPCL.CHM | Bin 0 -> 757069 bytes .../resources/test-documents/chm/IMJPCLE.CHM | Bin 0 -> 256718 bytes .../resources/test-documents/chm/IMTCEN.CHM | Bin 0 -> 452547 bytes .../test/resources/test-documents/chm/admin.chm | Bin 0 -> 49749 bytes .../resources/test-documents/chm/cmak_ops.CHM | Bin 0 -> 82895 bytes .../resources/test-documents/chm/comexp.CHM | Bin 0 -> 109882 bytes .../resources/test-documents/chm/gpedit.CHM | Bin 0 -> 49537 bytes .../test/resources/test-documents/chm/tcpip.CHM | Bin 0 -> 33186 bytes .../resources/test-documents/chm/wmicontrol.CHM | Bin 0 -> 32096 bytes .../test/resources/test-documents/complex.mbox | 291 + .../resources/test-documents/footnotes.docx | Bin 0 -> 12823 bytes .../resources/test-documents/headerPic.docx | Bin 0 -> 16206 bytes .../test/resources/test-documents/headers.mbox | 7 + .../src/test/resources/test-documents/jxl.xls | Bin 0 -> 614912 bytes .../resources/test-documents/multiline.mbox | 5 + .../test/resources/test-documents/pictures.ppt | Bin 0 -> 75776 bytes .../test/resources/test-documents/protect.xlsx | Bin 0 -> 12968 bytes .../resources/test-documents/protectedFile.xlsx | Bin 0 -> 12968 bytes .../test-documents/protectedSheets.xlsx | Bin 0 -> 11236 bytes .../test/resources/test-documents/quoted.mbox | 4 + .../test/resources/test-documents/simple.mbox | 7 + .../resources/test-documents/test-outlook.msg | Bin 0 -> 19968 bytes .../test-documents/test-outlook2003.msg | Bin 0 -> 83968 bytes .../src/test/resources/test-documents/test.doc | Bin 0 -> 9216 bytes .../resources/test-documents/testAccess2.accdb | Bin 0 -> 794624 bytes .../test-documents/testAccess2_2000.mdb | Bin 0 -> 421888 bytes .../test-documents/testAccess2_2002-2003.mdb | Bin 0 -> 417792 bytes .../test-documents/testAccess2_encrypted.accdb | Bin 0 -> 557056 bytes .../test-documents/testAccess_V1997.mdb | Bin 0 -> 118784 bytes .../test-documents/testBinControlWord.rtf | 2 + .../test/resources/test-documents/testChm.chm | Bin 0 -> 186259 bytes .../test/resources/test-documents/testChm2.chm | Bin 0 -> 10807437 bytes .../test/resources/test-documents/testChm3.chm | Bin 0 -> 900481 bytes .../test-documents/testControlCharacters.doc | Bin 0 -> 448000 bytes .../test-documents/testDOCX_Thumbnail.docx | Bin 0 -> 13810 bytes .../test-documents/testDocumentLink.doc | Bin 0 -> 812032 bytes .../test-documents/testEXCEL-charts.xls | Bin 0 -> 15360 bytes .../test-documents/testEXCEL-formats.xls | Bin 0 -> 13824 bytes .../test-documents/testEXCEL-formats.xlsx | Bin 0 -> 8303 bytes .../test-documents/testEXCEL.strict.xlsx | Bin 0 -> 10006 bytes .../test/resources/test-documents/testEXCEL.xls | Bin 0 -> 13824 bytes .../resources/test-documents/testEXCEL.xlsb | Bin 0 -> 9161 bytes .../resources/test-documents/testEXCEL.xlsx | Bin 0 -> 9453 bytes .../resources/test-documents/testEXCEL_1img.xls | Bin 0 -> 20992 bytes .../test-documents/testEXCEL_1img.xlsx | Bin 0 -> 14552 bytes .../resources/test-documents/testEXCEL_4.xls | Bin 0 -> 39942 bytes .../resources/test-documents/testEXCEL_5.xls | Bin 0 -> 7168 bytes .../resources/test-documents/testEXCEL_95.xls | Bin 0 -> 20992 bytes .../test-documents/testEXCEL_custom_props.xls | Bin 0 -> 17408 bytes .../test-documents/testEXCEL_custom_props.xlsx | Bin 0 -> 9230 bytes .../test-documents/testEXCEL_embeded.xls | Bin 0 -> 303104 bytes .../test-documents/testEXCEL_embeded.xlsx | Bin 0 -> 348405 bytes .../testEXCEL_headers_footers.xls | Bin 0 -> 33792 bytes .../testEXCEL_headers_footers.xlsx | Bin 0 -> 11740 bytes .../testEXCEL_protected_passtika.xls | Bin 0 -> 17408 bytes .../testEXCEL_protected_passtika.xlsx | Bin 0 -> 12800 bytes .../test-documents/testEXCEL_textbox.xlsx | Bin 0 -> 11017 bytes .../resources/test-documents/testException1.doc | Bin 0 -> 49152 bytes .../resources/test-documents/testException2.doc | Bin 0 -> 58368 bytes .../testFontAfterBufferedText.rtf | 7 + .../resources/test-documents/testFooter.ods | Bin 0 -> 7207 bytes .../resources/test-documents/testFooter.odt | Bin 0 -> 8381 bytes .../test/resources/test-documents/testMSG.msg | Bin 0 -> 20480 bytes .../test-documents/testMSG_att_doc.msg | Bin 0 -> 52224 bytes .../test-documents/testMSG_att_msg.msg | Bin 0 -> 71680 bytes .../test-documents/testMSG_chinese.msg | Bin 0 -> 48129 bytes .../test-documents/testMSG_forwarded.msg | Bin 0 -> 25600 bytes .../test-documents/testMasterFooter.odp | Bin 0 -> 13975 bytes .../test-documents/testNPEOpenDocument.odt | Bin 0 -> 18304 bytes .../test-documents/testODFwithOOo3.odt | Bin 0 -> 24286 bytes .../test-documents/testODT-TIKA-6000.odt | Bin 0 -> 3888830 bytes .../test-documents/testOpenOffice2.odf | Bin 0 -> 10977 bytes .../test-documents/testOpenOffice2.odt | Bin 0 -> 26448 bytes .../test/resources/test-documents/testPPM.ppm | 4 + .../test/resources/test-documents/testPPT.potm | Bin 0 -> 40102 bytes .../test/resources/test-documents/testPPT.ppsm | Bin 0 -> 36545 bytes .../test/resources/test-documents/testPPT.ppsx | Bin 0 -> 36521 bytes .../test/resources/test-documents/testPPT.ppt | Bin 0 -> 16384 bytes .../test/resources/test-documents/testPPT.pptm | Bin 0 -> 36541 bytes .../test/resources/test-documents/testPPT.pptx | Bin 0 -> 36518 bytes .../test/resources/test-documents/testPPT.thmx | Bin 0 -> 42485 bytes .../test/resources/test-documents/testPPT.xps | Bin 0 -> 75442 bytes .../test-documents/testPPTX_Thumbnail.pptx | Bin 0 -> 42580 bytes .../resources/test-documents/testPPT_2imgs.pptx | Bin 0 -> 59246 bytes .../test-documents/testPPT_autodate.ppt | Bin 0 -> 148992 bytes .../test-documents/testPPT_autodate.pptx | Bin 0 -> 47707 bytes .../test-documents/testPPT_comment.ppt | Bin 0 -> 86016 bytes .../test-documents/testPPT_comment.pptx | Bin 0 -> 30939 bytes .../test-documents/testPPT_custom_props.ppt | Bin 0 -> 104960 bytes .../test-documents/testPPT_custom_props.pptx | Bin 0 -> 37864 bytes .../test-documents/testPPT_embedded2.ppt | Bin 0 -> 92160 bytes .../testPPT_embedded_two_slides.pptx | Bin 0 -> 255364 bytes .../test-documents/testPPT_embeded.ppt | Bin 0 -> 224768 bytes .../test-documents/testPPT_embeded.pptx | Bin 0 -> 202969 bytes .../test-documents/testPPT_masterFooter.ppt | Bin 0 -> 139776 bytes .../test-documents/testPPT_masterFooter.pptx | Bin 0 -> 35128 bytes .../test-documents/testPPT_masterText.ppt | Bin 0 -> 117760 bytes .../test-documents/testPPT_masterText.pptx | Bin 0 -> 32270 bytes .../test-documents/testPPT_masterText2.ppt | Bin 0 -> 102912 bytes .../test-documents/testPPT_masterText2.pptx | Bin 0 -> 32291 bytes .../testPPT_protected_passtika.ppt | Bin 0 -> 43008 bytes .../testPPT_protected_passtika.pptx | Bin 0 -> 41472 bytes .../test-documents/testPPT_various.ppt | Bin 0 -> 160768 bytes .../test-documents/testPPT_various.pptx | Bin 0 -> 56659 bytes .../test-documents/testPROJECT2003.mpp | Bin 0 -> 125440 bytes .../test-documents/testPROJECT2007.mpp | Bin 0 -> 147968 bytes .../test/resources/test-documents/testPST.pst | Bin 0 -> 271360 bytes .../resources/test-documents/testPUBLISHER.pub | Bin 0 -> 65536 bytes .../resources/test-documents/testRTF-ms932.rtf | 30 + .../test/resources/test-documents/testRTF.rtf | 17 + .../test-documents/testRTFBoldItalic.rtf | 164 + .../test-documents/testRTFControls.rtf | 165 + .../testRTFCorruptListOverride.rtf | 95 + .../test-documents/testRTFEmbeddedFiles.rtf | 6856 ++++++++++++++++++ .../test-documents/testRTFEmbeddedLink.rtf | 1438 ++++ .../testRTFHexEscapeInsideWord.rtf | 4 + .../test-documents/testRTFHyperlink.rtf | 598 ++ .../testRTFIgnoredControlWord.rtf | 17 + .../test-documents/testRTFInvalidUnicode.rtf | 11 + .../test-documents/testRTFJapanese.rtf | 87 + .../test-documents/testRTFListLibreOffice.rtf | 67 + .../test-documents/testRTFListMicrosoftWord.rtf | 227 + .../test-documents/testRTFListOverride.rtf | 424 ++ .../test-documents/testRTFNewlines.rtf | 27 + .../test-documents/testRTFRegularImages.rtf | 1241 ++++ .../testRTFTableCellSeparation.rtf | 7 + .../testRTFTableCellSeparation2.rtf | 3 + .../test-documents/testRTFUmlautSpaces.rtf | 3 + .../test-documents/testRTFUmlautSpaces2.rtf | 8 + .../test-documents/testRTFUnicodeGothic.rtf | 5 + ...TFUnicodeUCNControlWordCharacterDoubling.rtf | 8 + .../resources/test-documents/testRTFVarious.rtf | 329 + .../testRTFWindowsCodepage1250.rtf | 5 + .../test-documents/testRTFWithCurlyBraces.rtf | 44 + .../testRTFWord2010CzechCharacters.rtf | 190 + .../testRTFWordPadCzechCharacters.rtf | 5 + .../resources/test-documents/testStyles.odt | Bin 0 -> 11663 bytes .../test/resources/test-documents/testVISIO.vsd | Bin 0 -> 45568 bytes .../resources/test-documents/testWINMAIL.dat | Bin 0 -> 66276 bytes .../test/resources/test-documents/testWORD.doc | Bin 0 -> 32768 bytes .../test/resources/test-documents/testWORD6.doc | Bin 0 -> 6656 bytes .../resources/test-documents/testWORD_1img.doc | Bin 0 -> 14848 bytes .../resources/test-documents/testWORD_1img.docx | Bin 0 -> 8325 bytes .../resources/test-documents/testWORD_3imgs.doc | Bin 0 -> 36352 bytes .../test-documents/testWORD_3imgs.docx | Bin 0 -> 31303 bytes .../testWORD_bold_character_runs.doc | Bin 0 -> 22016 bytes .../testWORD_bold_character_runs.docx | Bin 0 -> 12912 bytes .../testWORD_bold_character_runs2.doc | Bin 0 -> 22016 bytes .../testWORD_bold_character_runs2.docx | Bin 0 -> 12863 bytes .../testWORD_closingSmartQInHyperLink.doc | Bin 0 -> 26624 bytes .../test-documents/testWORD_custom_props.doc | Bin 0 -> 22528 bytes .../test-documents/testWORD_custom_props.docx | Bin 0 -> 13942 bytes .../test-documents/testWORD_embedded_pdf.doc | Bin 0 -> 1491456 bytes .../test-documents/testWORD_embedded_pdf.docx | Bin 0 -> 63294 bytes .../test-documents/testWORD_embedded_rtf.doc | Bin 0 -> 16384 bytes .../test-documents/testWORD_embeded.doc | Bin 0 -> 319488 bytes .../test-documents/testWORD_embeded.docx | Bin 0 -> 157830 bytes .../testWORD_header_hyperlink.doc | Bin 0 -> 22528 bytes .../testWORD_missing_ooxml_bean1.docx | Bin 0 -> 17913 bytes .../test-documents/testWORD_missing_text.docx | Bin 0 -> 31592 bytes .../test-documents/testWORD_multi_authors.doc | Bin 0 -> 22528 bytes .../test-documents/testWORD_multi_authors.docx | Bin 0 -> 12054 bytes .../test-documents/testWORD_no_format.doc | Bin 0 -> 74752 bytes .../test-documents/testWORD_no_format.docx | Bin 0 -> 37018 bytes .../test-documents/testWORD_null_style.docx | Bin 0 -> 29018 bytes .../test-documents/testWORD_numbered_list.doc | Bin 0 -> 44032 bytes .../test-documents/testWORD_numbered_list.docx | Bin 0 -> 24696 bytes .../testWORD_override_list_numbering.doc | Bin 0 -> 56320 bytes .../testWORD_override_list_numbering.docx | Bin 0 -> 15746 bytes .../testWORD_protected_passtika.docx | Bin 0 -> 14336 bytes .../test-documents/testWORD_tabular_symbol.doc | Bin 0 -> 10240 bytes .../test-documents/testWORD_text_box.docx | Bin 0 -> 25271 bytes .../test-documents/testWORD_various.doc | Bin 0 -> 35328 bytes .../test-documents/testWORD_various.docx | Bin 0 -> 19169 bytes .../test-documents/testWORKSSpreadsheet7.0.xlr | Bin 0 -> 10752 bytes .../resources/test-documents/testWordArt.pptx | Bin 0 -> 37792 bytes .../test-documents/testXLSX_Thumbnail.xlsx | Bin 0 -> 10318 bytes .../resources/test-documents/test_TIKA-1251.doc | Bin 0 -> 50688 bytes .../test-documents/test_embedded_package.rtf | 71 + .../test-documents/test_embedded_zip.pptx | Bin 0 -> 345027 bytes .../testsolidworksAssembly2013SP2.SLDASM | Bin 0 -> 209408 bytes .../testsolidworksAssembly2014SP0.SLDASM | Bin 0 -> 238080 bytes .../testsolidworksDrawing2013SP2.SLDDRW | Bin 0 -> 180224 bytes .../testsolidworksDrawing2014SP0.SLDDRW | Bin 0 -> 201216 bytes .../testsolidworksPart2013SP2.SLDPRT | Bin 0 -> 1010176 bytes .../testsolidworksPart2014SP0.SLDPRT | Bin 0 -> 1043456 bytes .../src/test/resources/test-documents/moby.zip | Bin 0 -> 606033 bytes .../test-documents/tableHeaders.numbers | Bin 0 -> 89554 bytes .../resources/test-documents/tableNames.numbers | Bin 0 -> 88246 bytes .../resources/test-documents/test-documents.7z | Bin 0 -> 66817 bytes .../resources/test-documents/test-documents.rar | Bin 0 -> 67945 bytes .../resources/test-documents/test-documents.tar | Bin 0 -> 133120 bytes .../test-documents/test-documents.tar.Z | Bin 0 -> 103647 bytes .../test-documents/test-documents.tbz2 | Bin 0 -> 71127 bytes .../resources/test-documents/test-documents.tgz | Bin 0 -> 69060 bytes .../resources/test-documents/test-documents.zip | Bin 0 -> 68403 bytes .../test-documents/test7Z_protected_passTika.7z | Bin 0 -> 260 bytes .../resources/test-documents/testARofSND.ar | Bin 0 -> 3936 bytes .../resources/test-documents/testARofText.ar | 5 + .../test-documents/testBulletPoints.key | Bin 0 -> 213830 bytes .../resources/test-documents/testEmbedded.zip | Bin 0 -> 340 bytes .../resources/test-documents/testKeynote.key | Bin 0 -> 221745 bytes .../test-documents/testMasterSlideTable.key | Bin 0 -> 220184 bytes .../test-documents/testNumbers.numbers | Bin 0 -> 134571 bytes .../test-documents/testNumbersCharts.numbers | Bin 0 -> 104547 bytes .../test-documents/testOpenOffice2.odt | Bin 0 -> 26448 bytes .../resources/test-documents/testPages.pages | Bin 0 -> 134152 bytes .../test-documents/testPagesComments.pages | Bin 0 -> 154546 bytes .../testPagesHeadersFootersAlphaLower.pages | Bin 0 -> 168501 bytes .../testPagesHeadersFootersAlphaUpper.pages | Bin 0 -> 168995 bytes .../testPagesHeadersFootersFootnotes.pages | Bin 0 -> 177328 bytes .../testPagesHeadersFootersRomanLower.pages | Bin 0 -> 103923 bytes .../testPagesHeadersFootersRomanUpper.pages | Bin 0 -> 174197 bytes .../test-documents/testPagesLayout.pages | Bin 0 -> 66480 bytes .../test-documents/testPagesPwdProtected.pages | Bin 0 -> 33166 bytes .../test/resources/test-documents/testSVG.svgz | Bin 0 -> 222 bytes .../test/resources/test-documents/testTXT.zlib | Bin 0 -> 55 bytes .../resources/test-documents/testTables.key | Bin 0 -> 216497 bytes .../resources/test-documents/testTextBoxes.key | Bin 0 -> 208981 bytes .../test-documents/testAnnotations.pdf | Bin 0 -> 18580 bytes .../test-documents/testExtraSpaces.pdf | Bin 0 -> 20743 bytes .../test-documents/testOverlappingText.pdf | Bin 0 -> 899 bytes .../test-documents/testPDF-custommetadata.pdf | Bin 0 -> 7495 bytes .../test/resources/test-documents/testPDF.pdf | Bin 0 -> 34824 bytes .../testPDFEmbeddingAndEmbedded.docx | Bin 0 -> 34139 bytes .../testPDFFileEmbInAnnotation.pdf | Bin 0 -> 97211 bytes .../resources/test-documents/testPDFPackage.pdf | Bin 0 -> 92359 bytes .../test-documents/testPDFTripleLangTitle.pdf | Bin 0 -> 1719 bytes .../test-documents/testPDFTwoTextBoxes.pdf | Bin 0 -> 57100 bytes .../resources/test-documents/testPDFVarious.pdf | Bin 0 -> 205491 bytes .../testPDF_PDFEncodedStringInXMP.pdf | Bin 0 -> 7210 bytes .../test-documents/testPDF_Version.10.x.pdf | Bin 0 -> 5811 bytes .../testPDF_Version.11.x.PDFA-1b.pdf | Bin 0 -> 23081 bytes .../test-documents/testPDF_Version.4.x.pdf | Bin 0 -> 10007 bytes .../test-documents/testPDF_Version.5.x.pdf | Bin 0 -> 5953 bytes .../test-documents/testPDF_Version.6.x.pdf | Bin 0 -> 5903 bytes .../test-documents/testPDF_Version.7.x.pdf | Bin 0 -> 5903 bytes .../test-documents/testPDF_Version.8.x.pdf | Bin 0 -> 5903 bytes .../test-documents/testPDF_Version.9.x.pdf | Bin 0 -> 5998 bytes .../test-documents/testPDF_acroform3.pdf | Bin 0 -> 26746 bytes .../resources/test-documents/testPDF_bom.pdf | Bin 0 -> 7645 bytes .../test-documents/testPDF_bookmarks.pdf | Bin 0 -> 9487 bytes .../test-documents/testPDF_childAttachments.pdf | Bin 0 -> 2318262 bytes .../testPDF_multiFormatEmbFiles.pdf | Bin 0 -> 2662 bytes ..._no_extract_no_accessibility_owner_empty.pdf | 87 + ...F_no_extract_no_accessibility_owner_user.pdf | 87 + ...no_extract_yes_accessibility_owner_empty.pdf | 87 + ..._no_extract_yes_accessibility_owner_user.pdf | 87 + .../test-documents/testPDF_protected.pdf | Bin 0 -> 506064 bytes .../test-documents/testPDF_twoAuthors.pdf | Bin 0 -> 12628 bytes .../test-documents/WFPC2u5780205r_c0fx.fits | 1814 +++++ .../resources/test-documents/Zamora2010.dif | 169 + .../breidamerkurjokull_radar_profiles_2009.mat | Bin 0 -> 14748772 bytes .../test-documents/envi_test_header.hdr | 16 + .../gdas1.forecmwf.2014062612.grib2 | Bin 0 -> 2489194 bytes .../test-documents/sampleFile.iso19139 | 453 ++ .../sresa1b_ncar_ccsm3_0_run1_200001.nc | Bin 0 -> 2767916 bytes .../src/test/resources/test-documents/test.hdf | Bin 0 -> 542529 bytes .../src/test/resources/test-documents/test.he5 | Bin 0 -> 1396916 bytes ...-2_metabolite profiling_NMR spectroscopy.txt | 51 + .../testISATab_BII-I-1/a_metabolome.txt | 112 + .../testISATab_BII-I-1/a_microarray.txt | 15 + .../testISATab_BII-I-1/a_proteome.txt | 19 + .../testISATab_BII-I-1/a_transcriptome.txt | 49 + .../testISATab_BII-I-1/i_investigation.txt | 164 + .../testISATab_BII-I-1/s_BII-S-1.txt | 165 + .../testISATab_BII-I-1/s_BII-S-2.txt | 15 + .../resources/test-documents/test_mat_text.mat | Bin 0 -> 183 bytes .../resources/test-documents/english.cp500.txt | 1 + .../test/resources/test-documents/resume.html | 73 + .../resources/test-documents/russian.cp866.txt | 6 + .../src/test/resources/test-documents/test.fb2 | 350 + .../test-documents/testOCTET_header.dbase3 | Bin 0 -> 194 bytes .../test/resources/test-documents/testXML.xml | 48 + .../test/resources/test-documents/testXML2.xml | 1 + .../test/resources/test-documents/testXML3.xml | 23 + .../StringsConfig-full.properties | 18 + .../StringsConfig-partial.properties | 16 + .../resources/test-documents/big-preamble.html | 827 +++ .../test-documents/boilerplate-whitespace.html | 27 + .../resources/test-documents/boilerplate.html | 41 + .../test/resources/test-documents/rsstest.rss | 36 + .../test/resources/test-documents/testATOM.atom | 27 + .../test/resources/test-documents/testHTML.html | 28 + .../testHTMLNoisyMetaEncoding_1.html | 77 + .../testHTMLNoisyMetaEncoding_2.html | 77 + .../testHTMLNoisyMetaEncoding_3.html | 77 + .../testHTMLNoisyMetaEncoding_4.html | 77 + .../test/resources/test-documents/testRFC822 | 41 + .../test-documents/testRFC822-limitedheaders | 9 + .../test-documents/testRFC822-multipart | 111 + .../resources/test-documents/testRFC822_base64 | 8 + .../test-documents/testRFC822_encrypted_zip | 61 + .../test-documents/testRFC822_i18nheaders | 9 + .../test-documents/testRFC822_normal_zip | 61 + .../resources/test-documents/testRFC822_oddfrom | 2105 ++++++ .../resources/test-documents/testRFC822_quoted | 13 + .../test-documents/testUserDefinedCharset.mhtml | 21 + .../resources/test-documents/testXHTML.html | 29 + .../test/resources/test-documents/tika434.html | 914 +++ tika-parsers/pom.xml | 10 +- .../parser/fork/ForkParserIntegrationTest.java | 2 +- .../src/test/resources/log4j.properties | 24 + .../apache/tika/config/TIKA-1558-blacklist.xml | 29 + .../tika/config/TIKA-1558-blacklistsub.xml | 24 + .../config/TIKA-1702-detector-blacklist.xml | 31 + .../config/TIKA-1702-translator-default.xml | 24 + .../TIKA-1702-translator-empty-default.xml | 22 + .../tika/config/TIKA-1702-translator-empty.xml | 20 + .../config/TIKA-1708-detector-composite.xml | 25 + .../tika/config/TIKA-1708-detector-default.xml | 26 + .../org/apache/tika/mime/custom-mimetypes.xml | 23 + .../GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb | Bin 0 -> 1362900 bytes .../resources/test-documents/NUTCH-1997.cbor | 30 + .../test/resources/test-documents/TIKA-216.tgz | Bin 0 -> 1270 bytes ...tive_layer_arcss_grid_barrow_alaska_2012.dif | 61 + ...lues_of_alkanes_extracted_from_paleosols.dif | 84 + .../test-documents/egyl03.gdas.200811.00Z.grb2 | Bin 0 -> 3447292 bytes .../test-documents/mock/embedded_then_npe.xml | 36 + .../resources/test-documents/mock/example.xml | 51 + .../resources/test-documents/mock/fake_oom.xml | 25 + .../test-documents/mock/heavy_hang.xml | 25 + .../test-documents/mock/nothing_bad.xml | 26 + .../test-documents/mock/null_pointer.xml | 25 + .../test-documents/mock/null_pointer_no_msg.xml | 25 + .../resources/test-documents/mock/real_oom.xml | 24 + .../resources/test-documents/mock/sleep.xml | 25 + .../test-documents/mock/sleep_interruptible.xml | 25 + .../mock/sleep_not_interruptible.xml | 25 + .../test-documents/test-documents-spanned.z01 | Bin 0 -> 65536 bytes .../test-documents/test-documents-spanned.zip | Bin 0 -> 3488 bytes .../test-documents/test-documents.cpio | Bin 0 -> 116224 bytes .../test-documents/test-zip-of-zip.zip | Bin 0 -> 299 bytes .../src/test/resources/test-documents/test1.swf | Bin 0 -> 21054 bytes .../src/test/resources/test-documents/test3.swf | Bin 0 -> 51562 bytes .../resources/test-documents/testACCESS.mdb | Bin 0 -> 110592 bytes .../resources/test-documents/testAMR-WB.amr | Bin 0 -> 3609 bytes .../test/resources/test-documents/testAMR.amr | Bin 0 -> 3620 bytes .../test/resources/test-documents/testAPK.apk | Bin 0 -> 11740 bytes .../test/resources/test-documents/testASF.asf | Bin 0 -> 62439 bytes .../resources/test-documents/testASiCE.asice | Bin 0 -> 2916 bytes .../resources/test-documents/testASiCS.asics | Bin 0 -> 2705 bytes .../resources/test-documents/testBDB_btree_2.db | Bin 0 -> 8192 bytes .../resources/test-documents/testBDB_btree_3.db | Bin 0 -> 8192 bytes .../resources/test-documents/testBDB_btree_4.db | Bin 0 -> 8192 bytes .../resources/test-documents/testBDB_btree_5.db | Bin 0 -> 8192 bytes .../resources/test-documents/testBDB_hash_2.db | Bin 0 -> 12288 bytes .../resources/test-documents/testBDB_hash_3.db | Bin 0 -> 12288 bytes .../resources/test-documents/testBDB_hash_4.db | Bin 0 -> 12288 bytes .../resources/test-documents/testBDB_hash_5.db | Bin 0 -> 12288 bytes .../resources/test-documents/testBIBTEX.bib | 21 + .../test/resources/test-documents/testBMPfp.txt | 3 + .../src/test/resources/test-documents/testC.c | 6 + .../test/resources/test-documents/testCOREL.shw | Bin 0 -> 77824 bytes .../test/resources/test-documents/testCSS.css | 48 + .../test/resources/test-documents/testCSV.csv | 23 + .../resources/test-documents/testComment.doc | Bin 0 -> 22528 bytes .../resources/test-documents/testComment.docx | Bin 0 -> 11019 bytes .../resources/test-documents/testComment.pdf | Bin 0 -> 68398 bytes .../resources/test-documents/testComment.ppt | Bin 0 -> 101376 bytes .../resources/test-documents/testComment.pptx | Bin 0 -> 34979 bytes .../resources/test-documents/testComment.rtf | 169 + .../resources/test-documents/testComment.xls | Bin 0 -> 23040 bytes .../resources/test-documents/testComment.xlsx | Bin 0 -> 9692 bytes .../test/resources/test-documents/testDITA.dita | 34 + .../resources/test-documents/testDITA.ditamap | 23 + .../resources/test-documents/testDITA2.dita | 33 + .../test/resources/test-documents/testDOTM.dotm | Bin 0 -> 65527 bytes .../resources/test-documents/testDetached.p7s | Bin 0 -> 2941 bytes .../test/resources/test-documents/testEAR.ear | Bin 0 -> 1086 bytes .../test/resources/test-documents/testEMF.emf | Bin 0 -> 60400 bytes .../test/resources/test-documents/testEMLX.emlx | 55 + .../test/resources/test-documents/testFITS.fits | 5 + .../test/resources/test-documents/testFLAC.flac | Bin 0 -> 10604 bytes .../test/resources/test-documents/testFLAC.oga | Bin 0 -> 10820 bytes .../resources/test-documents/testFOXMAIL.box | 1327 ++++ .../resources/test-documents/testFreeBSD-x86-64 | Bin 0 -> 6369 bytes .../test-documents/testGroupWiseEml.eml | 58 + .../src/test/resources/test-documents/testH.h | 5 + .../resources/test-documents/testHTML_utf8.html | 25 + .../resources/test-documents/testHWP_3.0.hwp | Bin 0 -> 9287 bytes .../resources/test-documents/testHWP_5.0.hwp | Bin 0 -> 16384 bytes .../test/resources/test-documents/testINDD.indd | Bin 0 -> 880640 bytes .../test/resources/test-documents/testIPA.ipa | Bin 0 -> 163803 bytes .../test/resources/test-documents/testJAR.jar | Bin 0 -> 441 bytes .../test-documents/testJAR_with_HTML.jar | Bin 0 -> 5594 bytes .../test-documents/testJAR_with_PEHDR.jar | Bin 0 -> 35343 bytes .../test-documents/testJAVAPROPS.properties | 22 + .../resources/test-documents/testJNILIB.jnilib | Bin 0 -> 35332 bytes .../test/resources/test-documents/testJPEG.jp2 | Bin 0 -> 25725 bytes .../src/test/resources/test-documents/testJS.js | 53 + .../test/resources/test-documents/testKML.kml | 917 +++ .../test/resources/test-documents/testKMZ.kmz | Bin 0 -> 8106 bytes .../resources/test-documents/testLinux-arm-32le | Bin 0 -> 5517 bytes .../test-documents/testLinux-mips-32be | Bin 0 -> 8125 bytes .../test-documents/testLinux-mips-32le | Bin 0 -> 38051 bytes .../resources/test-documents/testLinux-ppc-32be | Bin 0 -> 248480 bytes .../resources/test-documents/testLinux-x86-64 | Bin 0 -> 8377 bytes .../resources/test-documents/testLotusEml.eml | 71 + .../test/resources/test-documents/testMATLAB.m | 4 + .../test-documents/testMATLAB_barcast.m | 383 + .../test-documents/testMATLAB_wtsgaus.m | 52 + .../test-documents/testMHTMLFirefox.mhtml | 455 ++ .../test/resources/test-documents/testMKV.mkv | Bin 0 -> 82969 bytes .../test/resources/test-documents/testMYSQL.MYD | Bin 0 -> 24 bytes .../test/resources/test-documents/testMYSQL.MYI | Bin 0 -> 1024 bytes .../test/resources/test-documents/testMYSQL.frm | Bin 0 -> 8594 bytes .../test/resources/test-documents/testOPUS.opus | Bin 0 -> 1059 bytes .../test-documents/testOptionalHyphen.doc | Bin 0 -> 22016 bytes .../test-documents/testOptionalHyphen.docx | Bin 0 -> 10382 bytes .../test-documents/testOptionalHyphen.pdf | Bin 0 -> 44954 bytes .../test-documents/testOptionalHyphen.ppt | Bin 0 -> 100864 bytes .../test-documents/testOptionalHyphen.pptx | Bin 0 -> 33173 bytes .../test-documents/testOptionalHyphen.rtf | 158 + .../test/resources/test-documents/testPBM.pbm | 3 + .../test/resources/test-documents/testPGM.pgm | 4 + .../test/resources/test-documents/testPICT.pct | Bin 0 -> 23454 bytes .../resources/test-documents/testPPT_2imgs.ppt | Bin 0 -> 124928 bytes .../resources/test-documents/testPageNumber.pdf | Bin 0 -> 52020 bytes .../test-documents/testPhoneNumberExtractor.odt | Bin 0 -> 15244 bytes .../test-documents/testPopupAnnotation.pdf | Bin 0 -> 9081 bytes .../resources/test-documents/testQUATTRO.qpw | Bin 0 -> 4608 bytes .../resources/test-documents/testQUATTRO.wb3 | Bin 0 -> 5120 bytes .../test/resources/test-documents/testRDF.rdf | 23 + .../resources/test-documents/testRFC822-CC-BCC | 44 + .../resources/test-documents/testRFC822-big | 199 + .../resources/test-documents/testSQLITE3.db | Bin 0 -> 2048 bytes .../test/resources/test-documents/testSVG.svg | 7 + .../resources/test-documents/testSolaris-x86-32 | Bin 0 -> 6404 bytes .../test-documents/testStarOffice-5.2-calc.sdc | Bin 0 -> 17408 bytes .../test-documents/testStarOffice-5.2-draw.sda | Bin 0 -> 29184 bytes .../testStarOffice-5.2-impress.sdd | Bin 0 -> 29184 bytes .../testStarOffice-5.2-writer.sdw | Bin 0 -> 8192 bytes .../resources/test-documents/testTXT-tika.axx | Bin 0 -> 334 bytes .../test/resources/test-documents/testTXT.txt | 2 + .../test/resources/test-documents/testTXT.zlib0 | Bin 0 -> 58 bytes .../test/resources/test-documents/testTXT.zlib5 | Bin 0 -> 55 bytes .../test/resources/test-documents/testTXT.zlib9 | Bin 0 -> 55 bytes .../test-documents/testTXTNonASCIIUTF8.txt | 7 + .../test-documents/testThunderbirdEml.eml | 32 + .../resources/test-documents/testTinyPE.exe | Bin 0 -> 1024 bytes .../resources/test-documents/testVISIO.vsdm | Bin 0 -> 32360 bytes .../resources/test-documents/testVISIO.vsdx | Bin 0 -> 32350 bytes .../resources/test-documents/testVISIO.vssm | Bin 0 -> 32358 bytes .../resources/test-documents/testVISIO.vssx | Bin 0 -> 32349 bytes .../resources/test-documents/testVISIO.vstm | Bin 0 -> 32361 bytes .../resources/test-documents/testVISIO.vstx | Bin 0 -> 32350 bytes .../resources/test-documents/testVORBIS.ogg | Bin 0 -> 4241 bytes .../test-documents/testVORCalcTemplate.vor | Bin 0 -> 17408 bytes .../test-documents/testVORDrawTemplate.vor | Bin 0 -> 29696 bytes .../test-documents/testVORImpressTemplate.vor | Bin 0 -> 30208 bytes .../test-documents/testVORWriterTemplate.vor | Bin 0 -> 8192 bytes .../test/resources/test-documents/testWAR.war | Bin 0 -> 1003 bytes .../test-documents/testWEBARCHIVE.webarchive | 646 ++ .../test/resources/test-documents/testWEBM.webm | Bin 0 -> 39745 bytes .../test/resources/test-documents/testWMA.wma | Bin 0 -> 27747 bytes .../test/resources/test-documents/testWMF.wmf | Bin 0 -> 51590 bytes .../test/resources/test-documents/testWMV.wmv | Bin 0 -> 113878 bytes .../test/resources/test-documents/testWORD.docx | Bin 0 -> 13436 bytes .../testWORD_protected_passtika.doc | Bin 0 -> 22016 bytes .../test/resources/test-documents/testWORKS.wps | Bin 0 -> 9728 bytes .../resources/test-documents/testWORKS2000.wps | Bin 0 -> 5120 bytes .../testWORKSWordProcessor3.0.wps | Bin 0 -> 3072 bytes .../testWORKSWordProcessor4.0.wps | Bin 0 -> 3584 bytes .../resources/test-documents/testWebVTT.vtt | 33 + .../test-documents/test_list_override.rtf | 21 + .../test-documents/test_recursive_embedded.docx | Bin 0 -> 27082 bytes .../test_recursive_embedded_npe.docx | Bin 0 -> 27817 bytes tika-test-resources/pom.xml | 90 - .../src/test/resources/log4j.properties | 24 - .../apache/tika/config/TIKA-1558-blacklist.xml | 29 - .../tika/config/TIKA-1558-blacklistsub.xml | 24 - .../config/TIKA-1702-detector-blacklist.xml | 31 - .../config/TIKA-1702-translator-default.xml | 24 - .../TIKA-1702-translator-empty-default.xml | 22 - .../tika/config/TIKA-1702-translator-empty.xml | 20 - .../config/TIKA-1708-detector-composite.xml | 25 - .../tika/config/TIKA-1708-detector-default.xml | 26 - .../org/apache/tika/mime/custom-mimetypes.xml | 23 - .../tika/parser/ner/opennlp/ModelGetter.groovy | 93 - .../tika/parser/ner/opennlp/get-models.sh | 26 - .../apache/tika/parser/ner/regex/ner-regex.txt | 17 - .../org/apache/tika/parser/ner/tika-config.xml | 27 - .../test-documents/AutoDetectParser.class | Bin 3794 -> 0 bytes .../test/resources/test-documents/Doc1_ole.doc | Bin 89600 -> 0 bytes .../test-documents/EmbeddedDocument.docx | Bin 13219 -> 0 bytes .../test-documents/EmbeddedOutlook.docx | Bin 113242 -> 0 bytes .../resources/test-documents/EmbeddedPDF.docx | Bin 99389 -> 0 bytes .../GLDAS_CLM10SUBP_3H.A19790202.0000.001.grb | Bin 1362900 -> 0 bytes .../resources/test-documents/NUTCH-1997.cbor | 30 - .../resources/test-documents/NullHeader.docx | Bin 4355 -> 0 bytes .../test/resources/test-documents/TIKA-216.tgz | Bin 1270 -> 0 bytes .../test-documents/WFPC2u5780205r_c0fx.fits | 1814 ----- .../resources/test-documents/Zamora2010.dif | 169 - ...tive_layer_arcss_grid_barrow_alaska_2012.dif | 61 - .../resources/test-documents/big-preamble.html | 827 --- .../test-documents/boilerplate-whitespace.html | 27 - .../resources/test-documents/boilerplate.html | 41 - .../breidamerkurjokull_radar_profiles_2009.mat | Bin 14748772 -> 0 bytes ...lues_of_alkanes_extracted_from_paleosols.dif | 84 - .../resources/test-documents/chm/IMJPCL.CHM | Bin 757069 -> 0 bytes .../resources/test-documents/chm/IMJPCLE.CHM | Bin 256718 -> 0 bytes .../resources/test-documents/chm/IMTCEN.CHM | Bin 452547 -> 0 bytes .../test/resources/test-documents/chm/admin.chm | Bin 49749 -> 0 bytes .../resources/test-documents/chm/cmak_ops.CHM | Bin 82895 -> 0 bytes .../resources/test-documents/chm/comexp.CHM | Bin 109882 -> 0 bytes .../resources/test-documents/chm/gpedit.CHM | Bin 49537 -> 0 bytes .../test/resources/test-documents/chm/tcpip.CHM | Bin 33186 -> 0 bytes .../resources/test-documents/chm/wmicontrol.CHM | Bin 32096 -> 0 bytes .../test/resources/test-documents/complex.mbox | 291 - .../test-documents/egyl03.gdas.200811.00Z.grb2 | Bin 3447292 -> 0 bytes .../resources/test-documents/english.cp500.txt | 1 - .../test-documents/envi_test_header.hdr | 16 - .../resources/test-documents/footnotes.docx | Bin 12823 -> 0 bytes .../gdas1.forecmwf.2014062612.grib2 | Bin 2489194 -> 0 bytes .../resources/test-documents/headerPic.docx | Bin 16206 -> 0 bytes .../test/resources/test-documents/headers.mbox | 7 - .../src/test/resources/test-documents/jxl.xls | Bin 614912 -> 0 bytes .../src/test/resources/test-documents/moby.zip | Bin 606033 -> 0 bytes .../test-documents/mock/embedded_then_npe.xml | 36 - .../resources/test-documents/mock/example.xml | 51 - .../resources/test-documents/mock/fake_oom.xml | 25 - .../test-documents/mock/heavy_hang.xml | 25 - .../test-documents/mock/nothing_bad.xml | 26 - .../test-documents/mock/null_pointer.xml | 25 - .../test-documents/mock/null_pointer_no_msg.xml | 25 - .../resources/test-documents/mock/real_oom.xml | 24 - .../resources/test-documents/mock/sleep.xml | 25 - .../test-documents/mock/sleep_interruptible.xml | 25 - .../mock/sleep_not_interruptible.xml | 25 - .../resources/test-documents/multiline.mbox | 5 - .../test/resources/test-documents/pictures.ppt | Bin 75776 -> 0 bytes .../test/resources/test-documents/protect.xlsx | Bin 12968 -> 0 bytes .../resources/test-documents/protectedFile.xlsx | Bin 12968 -> 0 bytes .../test-documents/protectedSheets.xlsx | Bin 11236 -> 0 bytes .../test/resources/test-documents/quoted.mbox | 4 - .../test/resources/test-documents/resume.html | 73 - .../test/resources/test-documents/rsstest.rss | 36 - .../resources/test-documents/russian.cp866.txt | 6 - .../test-documents/sampleFile.iso19139 | 453 -- .../test/resources/test-documents/simple.mbox | 7 - .../sresa1b_ncar_ccsm3_0_run1_200001.nc | Bin 2767916 -> 0 bytes .../test-documents/tableHeaders.numbers | Bin 89554 -> 0 bytes .../resources/test-documents/tableNames.numbers | Bin 88246 -> 0 bytes .../test-documents/test-documents-spanned.z01 | Bin 65536 -> 0 bytes .../test-documents/test-documents-spanned.zip | Bin 3488 -> 0 bytes .../resources/test-documents/test-documents.7z | Bin 66817 -> 0 bytes .../test-documents/test-documents.cpio | Bin 116224 -> 0 bytes .../resources/test-documents/test-documents.rar | Bin 67945 -> 0 bytes .../resources/test-documents/test-documents.tar | Bin 133120 -> 0 bytes .../test-documents/test-documents.tar.Z | Bin 103647 -> 0 bytes .../test-documents/test-documents.tbz2 | Bin 71127 -> 0 bytes .../resources/test-documents/test-documents.tgz | Bin 69060 -> 0 bytes .../resources/test-documents/test-documents.zip | Bin 68403 -> 0 bytes .../resources/test-documents/test-outlook.msg | Bin 19968 -> 0 bytes .../test-documents/test-outlook2003.msg | Bin 83968 -> 0 bytes .../test-documents/test-zip-of-zip.zip | Bin 299 -> 0 bytes .../src/test/resources/test-documents/test.doc | Bin 9216 -> 0 bytes .../src/test/resources/test-documents/test.fb2 | 350 - .../src/test/resources/test-documents/test.hdf | Bin 542529 -> 0 bytes .../src/test/resources/test-documents/test.he5 | Bin 1396916 -> 0 bytes .../src/test/resources/test-documents/test1.swf | Bin 21054 -> 0 bytes .../src/test/resources/test-documents/test2.swf | Bin 42534 -> 0 bytes .../src/test/resources/test-documents/test3.swf | Bin 51562 -> 0 bytes .../test-documents/test7Z_protected_passTika.7z | Bin 260 -> 0 bytes .../resources/test-documents/testACCESS.mdb | Bin 110592 -> 0 bytes .../test/resources/test-documents/testAFM.afm | 50 - .../test/resources/test-documents/testAIFF.aif | Bin 3894 -> 0 bytes .../resources/test-documents/testAMR-WB.amr | Bin 3609 -> 0 bytes .../test/resources/test-documents/testAMR.amr | Bin 3620 -> 0 bytes .../test/resources/test-documents/testAPK.apk | Bin 11740 -> 0 bytes .../resources/test-documents/testARofSND.ar | Bin 3936 -> 0 bytes .../resources/test-documents/testARofText.ar | 5 - .../test/resources/test-documents/testASF.asf | Bin 62439 -> 0 bytes .../resources/test-documents/testASiCE.asice | Bin 2916 -> 0 bytes .../resources/test-documents/testASiCS.asics | Bin 2705 -> 0 bytes .../test/resources/test-documents/testATOM.atom | 27 - .../src/test/resources/test-documents/testAU.au | Bin 3868 -> 0 bytes .../resources/test-documents/testAccess2.accdb | Bin 794624 -> 0 bytes .../test-documents/testAccess2_2000.mdb | Bin 421888 -> 0 bytes .../test-documents/testAccess2_2002-2003.mdb | Bin 417792 -> 0 bytes .../test-documents/testAccess2_encrypted.accdb | Bin 557056 -> 0 bytes .../test-documents/testAccess_V1997.mdb | Bin 118784 -> 0 bytes .../test-documents/testAnnotations.pdf | Bin 18580 -> 0 bytes .../resources/test-documents/testBDB_btree_2.db | Bin 8192 -> 0 bytes .../resources/test-documents/testBDB_btree_3.db | Bin 8192 -> 0 bytes .../resources/test-documents/testBDB_btree_4.db | Bin 8192 -> 0 bytes .../resources/test-documents/testBDB_btree_5.db | Bin 8192 -> 0 bytes .../resources/test-documents/testBDB_hash_2.db | Bin 12288 -> 0 bytes .../resources/test-documents/testBDB_hash_3.db | Bin 12288 -> 0 bytes .../resources/test-documents/testBDB_hash_4.db | Bin 12288 -> 0 bytes .../resources/test-documents/testBDB_hash_5.db | Bin 12288 -> 0 bytes .../resources/test-documents/testBIBTEX.bib | 21 - .../test/resources/test-documents/testBMP.bmp | Bin 22554 -> 0 bytes .../test/resources/test-documents/testBMPfp.txt | 3 - .../test/resources/test-documents/testBPG.bpg | Bin 1824 -> 0 bytes .../resources/test-documents/testBPG_GEO.bpg | Bin 2042 -> 0 bytes .../test-documents/testBPG_commented.bpg | Bin 10281 -> 0 bytes .../testBPG_commented_xnviewmp026.bpg | Bin 12374 -> 0 bytes .../test-documents/testBinControlWord.rtf | 2 - .../test-documents/testBulletPoints.key | Bin 213830 -> 0 bytes .../src/test/resources/test-documents/testC.c | 6 - .../resources/test-documents/testCADKEY.prt | Bin 10246 -> 0 bytes .../resources/test-documents/testCADKEY2.prt | Bin 41664 -> 0 bytes .../test/resources/test-documents/testCOREL.shw | Bin 77824 -> 0 bytes .../test/resources/test-documents/testCPP.cpp | 14 - .../test/resources/test-documents/testCSS.css | 48 - .../test/resources/test-documents/testCSV.csv | 23 - .../test/resources/test-documents/testChm.chm | Bin 186259 -> 0 bytes .../test/resources/test-documents/testChm2.chm | Bin 10807437 -> 0 bytes .../test/resources/test-documents/testChm3.chm | Bin 900481 -> 0 bytes .../resources/test-documents/testComment.doc | Bin 22528 -> 0 bytes .../resources/test-documents/testComment.docx | Bin 11019 -> 0 bytes .../resources/test-documents/testComment.pdf | Bin 68398 -> 0 bytes .../resources/test-documents/testComment.ppt | Bin 101376 -> 0 bytes .../resources/test-documents/testComment.pptx | Bin 34979 -> 0 bytes .../resources/test-documents/testComment.rtf | 169 - .../resources/test-documents/testComment.xls | Bin 23040 -> 0 bytes .../resources/test-documents/testComment.xlsx | Bin 9692 -> 0 bytes .../test-documents/testControlCharacters.doc | Bin 448000 -> 0 bytes .../test/resources/test-documents/testDITA.dita | 34 - .../resources/test-documents/testDITA.ditamap | 23 - .../resources/test-documents/testDITA2.dita | 33 - .../test-documents/testDOCX_Thumbnail.docx | Bin 13810 -> 0 bytes .../test/resources/test-documents/testDOTM.dotm | Bin 65527 -> 0 bytes .../resources/test-documents/testDWG2000.dwg | Bin 675048 -> 0 bytes .../resources/test-documents/testDWG2004.dwg | Bin 39335 -> 0 bytes .../test-documents/testDWG2004_no_header.dwg | Bin 38178 -> 0 bytes .../resources/test-documents/testDWG2007.dwg | Bin 73088 -> 0 bytes .../resources/test-documents/testDWG2010.dwg | Bin 59562 -> 0 bytes .../test-documents/testDWG2010_custom_props.dwg | Bin 73791 -> 0 bytes .../test-documents/testDWGmech2004.dwg | Bin 439438 -> 0 bytes .../test-documents/testDWGmech2004DX.dwg | Bin 439563 -> 0 bytes .../test-documents/testDWGmech2005.dwg | Bin 439627 -> 0 bytes .../test-documents/testDWGmech2006.dwg | Bin 439982 -> 0 bytes .../test-documents/testDWGmech2007.dwg | Bin 479904 -> 0 bytes .../test-documents/testDWGmech2008.dwg | Bin 487456 -> 0 bytes .../test-documents/testDWGmech2009.dwg | Bin 483840 -> 0 bytes .../test-documents/testDWGmech2010.dwg | Bin 467023 -> 0 bytes .../test-documents/testDWGmech2011.dwg | Bin 466891 -> 0 bytes .../resources/test-documents/testDWGmech6.dwg | Bin 687882 -> 0 bytes .../resources/test-documents/testDetached.p7s | Bin 2941 -> 0 bytes .../test-documents/testDocumentLink.doc | Bin 812032 -> 0 bytes .../test/resources/test-documents/testEAR.ear | Bin 1086 -> 0 bytes .../test/resources/test-documents/testEMF.emf | Bin 60400 -> 0 bytes .../test/resources/test-documents/testEMLX.emlx | 55 - .../test/resources/test-documents/testEPUB.epub | Bin 29719 -> 0 bytes .../test-documents/testEXCEL-charts.xls | Bin 15360 -> 0 bytes .../test-documents/testEXCEL-formats.xls | Bin 13824 -> 0 bytes .../test-documents/testEXCEL-formats.xlsx | Bin 8303 -> 0 bytes .../test-documents/testEXCEL.strict.xlsx | Bin 10006 -> 0 bytes .../test/resources/test-documents/testEXCEL.xls | Bin 13824 -> 0 bytes .../resources/test-documents/testEXCEL.xlsb | Bin 9161 -> 0 bytes .../resources/test-documents/testEXCEL.xlsx | Bin 9453 -> 0 bytes .../resources/test-documents/testEXCEL_1img.xls | Bin 20992 -> 0 bytes .../test-documents/testEXCEL_1img.xlsx | Bin 14552 -> 0 bytes .../resources/test-documents/testEXCEL_4.xls | Bin 39942 -> 0 bytes .../resources/test-documents/testEXCEL_5.xls | Bin 7168 -> 0 bytes .../resources/test-documents/testEXCEL_95.xls | Bin 20992 -> 0 bytes .../test-documents/testEXCEL_custom_props.xls | Bin 17408 -> 0 bytes .../test-documents/testEXCEL_custom_props.xlsx | Bin 9230 -> 0 bytes .../test-documents/testEXCEL_embeded.xls | Bin 303104 -> 0 bytes .../test-documents/testEXCEL_embeded.xlsx | Bin 348405 -> 0 bytes .../testEXCEL_headers_footers.xls | Bin 33792 -> 0 bytes .../testEXCEL_headers_footers.xlsx | Bin 11740 -> 0 bytes .../testEXCEL_protected_passtika.xls | Bin 17408 -> 0 bytes .../testEXCEL_protected_passtika.xlsx | Bin 12800 -> 0 bytes .../test-documents/testEXCEL_textbox.xlsx | Bin 11017 -> 0 bytes .../resources/test-documents/testEmbedded.zip | Bin 340 -> 0 bytes .../resources/test-documents/testException1.doc | Bin 49152 -> 0 bytes .../resources/test-documents/testException2.doc | Bin 58368 -> 0 bytes .../test-documents/testExtraSpaces.pdf | Bin 20743 -> 0 bytes .../test/resources/test-documents/testFITS.fits | 5 - .../test/resources/test-documents/testFLAC.flac | Bin 10604 -> 0 bytes .../test/resources/test-documents/testFLAC.oga | Bin 10820 -> 0 bytes .../test/resources/test-documents/testFLV.flv | Bin 90580 -> 0 bytes .../resources/test-documents/testFOXMAIL.box | 1327 ---- .../testFontAfterBufferedText.rtf | 7 - .../resources/test-documents/testFooter.ods | Bin 7207 -> 0 bytes .../resources/test-documents/testFooter.odt | Bin 8381 -> 0 bytes .../resources/test-documents/testFreeBSD-x86-64 | Bin 6369 -> 0 bytes .../test/resources/test-documents/testGIF.gif | Bin 8495 -> 0 bytes .../resources/test-documents/testGROOVY.groovy | 9 - .../test-documents/testGroupWiseEml.eml | 58 - .../src/test/resources/test-documents/testH.h | 5 - .../test/resources/test-documents/testHTML.html | 28 - .../testHTMLNoisyMetaEncoding_1.html | 77 - .../testHTMLNoisyMetaEncoding_2.html | 77 - .../testHTMLNoisyMetaEncoding_3.html | 77 - .../testHTMLNoisyMetaEncoding_4.html | 77 - .../resources/test-documents/testHTML_utf8.html | 25 - .../resources/test-documents/testHWP_3.0.hwp | Bin 9287 -> 0 bytes .../resources/test-documents/testHWP_5.0.hwp | Bin 16384 -> 0 bytes .../test/resources/test-documents/testINDD.indd | Bin 880640 -> 0 bytes .../test/resources/test-documents/testIPA.ipa | Bin 163803 -> 0 bytes ...-2_metabolite profiling_NMR spectroscopy.txt | 51 - .../testISATab_BII-I-1/a_metabolome.txt | 112 - .../testISATab_BII-I-1/a_microarray.txt | 15 - .../testISATab_BII-I-1/a_proteome.txt | 19 - .../testISATab_BII-I-1/a_transcriptome.txt | 49 - .../testISATab_BII-I-1/i_investigation.txt | 164 - .../testISATab_BII-I-1/s_BII-S-1.txt | 165 - .../testISATab_BII-I-1/s_BII-S-2.txt | 15 - .../test/resources/test-documents/testJAR.jar | Bin 441 -> 0 bytes .../test-documents/testJAR_with_HTML.jar | Bin 5594 -> 0 bytes .../test-documents/testJAR_with_PEHDR.jar | Bin 35343 -> 0 bytes .../test/resources/test-documents/testJAVA.java | 14 - .../test-documents/testJAVAPROPS.properties | 22 - .../resources/test-documents/testJNILIB.jnilib | Bin 35332 -> 0 bytes .../test/resources/test-documents/testJPEG.jp2 | Bin 25725 -> 0 bytes .../test/resources/test-documents/testJPEG.jpg | Bin 7686 -> 0 bytes .../resources/test-documents/testJPEG_EXIF.jpg | Bin 16357 -> 0 bytes .../testJPEG_EXIF_emptyDateTime.jpg | Bin 24597 -> 0 bytes .../resources/test-documents/testJPEG_GEO.jpg | Bin 16482 -> 0 bytes .../resources/test-documents/testJPEG_GEO_2.jpg | Bin 20844 -> 0 bytes .../test-documents/testJPEG_commented.jpg | Bin 13325 -> 0 bytes .../testJPEG_commented_pspcs2mac.jpg | Bin 26173 -> 0 bytes .../testJPEG_commented_xnviewmp026.jpg | Bin 13910 -> 0 bytes .../test-documents/testJPEG_oddTagComponent.jpg | Bin 8330 -> 0 bytes .../src/test/resources/test-documents/testJS.js | 53 - .../test-documents/testJournalParser.pdf | Bin 985125 -> 0 bytes .../test/resources/test-documents/testKML.kml | 917 --- .../test/resources/test-documents/testKMZ.kmz | Bin 8106 -> 0 bytes .../resources/test-documents/testKeynote.key | Bin 221745 -> 0 bytes .../resources/test-documents/testLinux-arm-32le | Bin 5517 -> 0 bytes .../test-documents/testLinux-mips-32be | Bin 8125 -> 0 bytes .../test-documents/testLinux-mips-32le | Bin 38051 -> 0 bytes .../resources/test-documents/testLinux-ppc-32be | Bin 248480 -> 0 bytes .../resources/test-documents/testLinux-x86-32 | Bin 7175 -> 0 bytes .../resources/test-documents/testLinux-x86-64 | Bin 8377 -> 0 bytes .../resources/test-documents/testLotusEml.eml | 71 - .../test/resources/test-documents/testMATLAB.m | 4 - .../test-documents/testMATLAB_barcast.m | 383 - .../test-documents/testMATLAB_wtsgaus.m | 52 - .../test-documents/testMHTMLFirefox.mhtml | 455 -- .../test/resources/test-documents/testMID.mid | Bin 322 -> 0 bytes .../test/resources/test-documents/testMKV.mkv | Bin 82969 -> 0 bytes .../resources/test-documents/testMP3i18n.mp3 | Bin 40832 -> 0 bytes .../resources/test-documents/testMP3id3v1.mp3 | Bin 39416 -> 0 bytes .../test-documents/testMP3id3v1_v2.mp3 | Bin 40960 -> 0 bytes .../resources/test-documents/testMP3id3v2.mp3 | Bin 39577 -> 0 bytes .../resources/test-documents/testMP3id3v24.mp3 | Bin 39471 -> 0 bytes .../resources/test-documents/testMP3lyrics.mp3 | Bin 34688 -> 0 bytes .../resources/test-documents/testMP3noid3.mp3 | Bin 39288 -> 0 bytes .../test-documents/testMP3truncated.mp3 | Bin 65536 -> 0 bytes .../test/resources/test-documents/testMP4.m4a | Bin 4770 -> 0 bytes .../test/resources/test-documents/testMSG.msg | Bin 20480 -> 0 bytes .../test-documents/testMSG_att_doc.msg | Bin 52224 -> 0 bytes .../test-documents/testMSG_att_msg.msg | Bin 71680 -> 0 bytes .../test-documents/testMSG_chinese.msg | Bin 48129 -> 0 bytes .../test-documents/testMSG_forwarded.msg | Bin 25600 -> 0 bytes .../test/resources/test-documents/testMYSQL.MYD | Bin 24 -> 0 bytes .../test/resources/test-documents/testMYSQL.MYI | Bin 1024 -> 0 bytes .../test/resources/test-documents/testMYSQL.frm | Bin 8594 -> 0 bytes .../test-documents/testMasterFooter.odp | Bin 13975 -> 0 bytes .../test-documents/testMasterSlideTable.key | Bin 220184 -> 0 bytes .../test-documents/testNPEOpenDocument.odt | Bin 18304 -> 0 bytes .../test-documents/testNakedUTF16BOM.mp3 | Bin 2625 -> 0 bytes .../test-documents/testNumbers.numbers | Bin 134571 -> 0 bytes .../test-documents/testNumbersCharts.numbers | Bin 104547 -> 0 bytes .../test/resources/test-documents/testOCR.docx | Bin 62041 -> 0 bytes .../test/resources/test-documents/testOCR.jpg | Bin 3408 -> 0 bytes .../test/resources/test-documents/testOCR.pdf | Bin 41936 -> 0 bytes .../test/resources/test-documents/testOCR.pptx | Bin 78550 -> 0 bytes .../test-documents/testOCTET_header.dbase3 | Bin 194 -> 0 bytes .../test-documents/testODFwithOOo3.odt | Bin 24286 -> 0 bytes .../test-documents/testODT-TIKA-6000.odt | Bin 3888830 -> 0 bytes .../test/resources/test-documents/testOPUS.opus | Bin 1059 -> 0 bytes .../test-documents/testOpenOffice2.odf | Bin 10977 -> 0 bytes .../test-documents/testOpenOffice2.odt | Bin 26448 -> 0 bytes .../test-documents/testOptionalHyphen.doc | Bin 22016 -> 0 bytes .../test-documents/testOptionalHyphen.docx | Bin 10382 -> 0 bytes .../test-documents/testOptionalHyphen.pdf | Bin 44954 -> 0 bytes .../test-documents/testOptionalHyphen.ppt | Bin 100864 -> 0 bytes .../test-documents/testOptionalHyphen.pptx | Bin 33173 -> 0 bytes .../test-documents/testOptionalHyphen.rtf | 158 - .../test-documents/testOverlappingText.pdf | Bin 899 -> 0 bytes .../test/resources/test-documents/testPBM.pbm | 3 - .../test-documents/testPDF-custommetadata.pdf | Bin 7495 -> 0 bytes .../test/resources/test-documents/testPDF.pdf | Bin 34824 -> 0 bytes .../testPDFEmbeddingAndEmbedded.docx | Bin 34139 -> 0 bytes .../testPDFFileEmbInAnnotation.pdf | Bin 97211 -> 0 bytes .../resources/test-documents/testPDFPackage.pdf | Bin 92359 -> 0 bytes .../test-documents/testPDFTripleLangTitle.pdf | Bin 1719 -> 0 bytes .../test-documents/testPDFTwoTextBoxes.pdf | Bin 57100 -> 0 bytes .../resources/test-documents/testPDFVarious.pdf | Bin 205491 -> 0 bytes .../testPDF_PDFEncodedStringInXMP.pdf | Bin 7210 -> 0 bytes .../test-documents/testPDF_Version.10.x.pdf | Bin 5811 -> 0 bytes .../testPDF_Version.11.x.PDFA-1b.pdf | Bin 23081 -> 0 bytes .../test-documents/testPDF_Version.4.x.pdf | Bin 10007 -> 0 bytes .../test-documents/testPDF_Version.5.x.pdf | Bin 5953 -> 0 bytes .../test-documents/testPDF_Version.6.x.pdf | Bin 5903 -> 0 bytes .../test-documents/testPDF_Version.7.x.pdf | Bin 5903 -> 0 bytes .../test-documents/testPDF_Version.8.x.pdf | Bin 5903 -> 0 bytes .../test-documents/testPDF_Version.9.x.pdf | Bin 5998 -> 0 bytes .../test-documents/testPDF_acroform3.pdf | Bin 26746 -> 0 bytes .../resources/test-documents/testPDF_bom.pdf | Bin 7645 -> 0 bytes .../test-documents/testPDF_bookmarks.pdf | Bin 9487 -> 0 bytes .../test-documents/testPDF_childAttachments.pdf | Bin 2318262 -> 0 bytes .../testPDF_multiFormatEmbFiles.pdf | Bin 2662 -> 0 bytes ..._no_extract_no_accessibility_owner_empty.pdf | 87 - ...F_no_extract_no_accessibility_owner_user.pdf | 87 - ...no_extract_yes_accessibility_owner_empty.pdf | 87 - ..._no_extract_yes_accessibility_owner_user.pdf | 87 - .../test-documents/testPDF_protected.pdf | Bin 506064 -> 0 bytes .../test-documents/testPDF_twoAuthors.pdf | Bin 12628 -> 0 bytes .../test/resources/test-documents/testPGM.pgm | 4 - .../test/resources/test-documents/testPICT.pct | Bin 23454 -> 0 bytes .../test/resources/test-documents/testPNG.png | Bin 17041 -> 0 bytes .../test/resources/test-documents/testPPM.ppm | 4 - .../test/resources/test-documents/testPPT.potm | Bin 40102 -> 0 bytes .../test/resources/test-documents/testPPT.ppsm | Bin 36545 -> 0 bytes .../test/resources/test-documents/testPPT.ppsx | Bin 36521 -> 0 bytes .../test/resources/test-documents/testPPT.ppt | Bin 16384 -> 0 bytes .../test/resources/test-documents/testPPT.pptm | Bin 36541 -> 0 bytes .../test/resources/test-documents/testPPT.pptx | Bin 36518 -> 0 bytes .../test/resources/test-documents/testPPT.thmx | Bin 42485 -> 0 bytes .../test/resources/test-documents/testPPT.xps | Bin 75442 -> 0 bytes .../test-documents/testPPTX_Thumbnail.pptx | Bin 42580 -> 0 bytes .../resources/test-documents/testPPT_2imgs.ppt | Bin 124928 -> 0 bytes .../resources/test-documents/testPPT_2imgs.pptx | Bin 59246 -> 0 bytes .../test-documents/testPPT_autodate.ppt | Bin 148992 -> 0 bytes .../test-documents/testPPT_autodate.pptx | Bin 47707 -> 0 bytes .../test-documents/testPPT_comment.ppt | Bin 86016 -> 0 bytes .../test-documents/testPPT_comment.pptx | Bin 30939 -> 0 bytes .../test-documents/testPPT_custom_props.ppt | Bin 104960 -> 0 bytes .../test-documents/testPPT_custom_props.pptx | Bin 37864 -> 0 bytes .../test-documents/testPPT_embedded2.ppt | Bin 92160 -> 0 bytes .../testPPT_embedded_two_slides.pptx | Bin 255364 -> 0 bytes .../test-documents/testPPT_embeded.ppt | Bin 224768 -> 0 bytes .../test-documents/testPPT_embeded.pptx | Bin 202969 -> 0 bytes .../test-documents/testPPT_masterFooter.ppt | Bin 139776 -> 0 bytes .../test-documents/testPPT_masterFooter.pptx | Bin 35128 -> 0 bytes .../test-documents/testPPT_masterText.ppt | Bin 117760 -> 0 bytes .../test-documents/testPPT_masterText.pptx | Bin 32270 -> 0 bytes .../test-documents/testPPT_masterText2.ppt | Bin 102912 -> 0 bytes .../test-documents/testPPT_masterText2.pptx | Bin 32291 -> 0 bytes .../testPPT_protected_passtika.ppt | Bin 43008 -> 0 bytes .../testPPT_protected_passtika.pptx | Bin 41472 -> 0 bytes .../test-documents/testPPT_various.ppt | Bin 160768 -> 0 bytes .../test-documents/testPPT_various.pptx | Bin 56659 -> 0 bytes .../test-documents/testPROJECT2003.mpp | Bin 125440 -> 0 bytes .../test-documents/testPROJECT2007.mpp | Bin 147968 -> 0 bytes .../test/resources/test-documents/testPSD.psd | Bin 69410 -> 0 bytes .../test/resources/test-documents/testPSD2.psd | Bin 31315 -> 0 bytes .../test/resources/test-documents/testPST.pst | Bin 271360 -> 0 bytes .../resources/test-documents/testPUBLISHER.pub | Bin 65536 -> 0 bytes .../resources/test-documents/testPageNumber.pdf | Bin 52020 -> 0 bytes .../resources/test-documents/testPages.pages | Bin 134152 -> 0 bytes .../test-documents/testPagesComments.pages | Bin 154546 -> 0 bytes .../testPagesHeadersFootersAlphaLower.pages | Bin 168501 -> 0 bytes .../testPagesHeadersFootersAlphaUpper.pages | Bin 168995 -> 0 bytes .../testPagesHeadersFootersFootnotes.pages | Bin 177328 -> 0 bytes .../testPagesHeadersFootersRomanLower.pages | Bin 103923 -> 0 bytes .../testPagesHeadersFootersRomanUpper.pages | Bin 174197 -> 0 bytes .../test-documents/testPagesLayout.pages | Bin 66480 -> 0 bytes .../test-documents/testPagesPwdProtected.pages | Bin 33166 -> 0 bytes .../test-documents/testPhoneNumberExtractor.odt | Bin 15244 -> 0 bytes .../test-documents/testPopupAnnotation.pdf | Bin 9081 -> 0 bytes .../resources/test-documents/testQUATTRO.qpw | Bin 4608 -> 0 bytes .../resources/test-documents/testQUATTRO.wb3 | Bin 5120 -> 0 bytes .../test/resources/test-documents/testRDF.rdf | 23 - .../test/resources/test-documents/testRFC822 | 41 - .../resources/test-documents/testRFC822-CC-BCC | 44 - .../resources/test-documents/testRFC822-big | 199 - .../test-documents/testRFC822-limitedheaders | 9 - .../test-documents/testRFC822-multipart | 111 - .../resources/test-documents/testRFC822_base64 | 8 - .../test-documents/testRFC822_encrypted_zip | 61 - .../test-documents/testRFC822_i18nheaders | 9 - .../test-documents/testRFC822_normal_zip | 61 - .../resources/test-documents/testRFC822_oddfrom | 2105 ------ .../resources/test-documents/testRFC822_quoted | 13 - .../resources/test-documents/testRTF-ms932.rtf | 30 - .../test/resources/test-documents/testRTF.rtf | 17 - .../test-documents/testRTFBoldItalic.rtf | 164 - .../test-documents/testRTFControls.rtf | 165 - .../testRTFCorruptListOverride.rtf | 95 - .../test-documents/testRTFEmbeddedFiles.rtf | 6856 ------------------ .../test-documents/testRTFEmbeddedLink.rtf | 1438 ---- .../testRTFHexEscapeInsideWord.rtf | 4 - .../test-documents/testRTFHyperlink.rtf | 598 -- .../testRTFIgnoredControlWord.rtf | 17 - .../test-documents/testRTFInvalidUnicode.rtf | 11 - .../test-documents/testRTFJapanese.rtf | 87 - .../test-documents/testRTFListLibreOffice.rtf | 67 - .../test-documents/testRTFListMicrosoftWord.rtf | 227 - .../test-documents/testRTFListOverride.rtf | 424 -- .../test-documents/testRTFNewlines.rtf | 27 - .../test-documents/testRTFRegularImages.rtf | 1241 ---- .../testRTFTableCellSeparation.rtf | 7 - .../testRTFTableCellSeparation2.rtf | 3 - .../test-documents/testRTFUmlautSpaces.rtf | 3 - .../test-documents/testRTFUmlautSpaces2.rtf | 8 - .../test-documents/testRTFUnicodeGothic.rtf | 5 - ...TFUnicodeUCNControlWordCharacterDoubling.rtf | 8 - .../resources/test-documents/testRTFVarious.rtf | 329 - .../testRTFWindowsCodepage1250.rtf | 5 - .../test-documents/testRTFWithCurlyBraces.rtf | 44 - .../testRTFWord2010CzechCharacters.rtf | 190 - .../testRTFWordPadCzechCharacters.rtf | 5 - .../resources/test-documents/testSQLITE3.db | Bin 2048 -> 0 bytes .../test/resources/test-documents/testSVG.svg | 7 - .../test/resources/test-documents/testSVG.svgz | Bin 222 -> 0 bytes .../resources/test-documents/testSolaris-x86-32 | Bin 6404 -> 0 bytes .../resources/test-documents/testSqlite3b.db | Bin 27648 -> 0 bytes .../test-documents/testStarOffice-5.2-calc.sdc | Bin 17408 -> 0 bytes .../test-documents/testStarOffice-5.2-draw.sda | Bin 29184 -> 0 bytes .../testStarOffice-5.2-impress.sdd | Bin 29184 -> 0 bytes .../testStarOffice-5.2-writer.sdw | Bin 8192 -> 0 bytes .../resources/test-documents/testStyles.odt | Bin 11663 -> 0 bytes .../test/resources/test-documents/testTIFF.tif | Bin 25584 -> 0 bytes .../resources/test-documents/testTXT-tika.axx | Bin 334 -> 0 bytes .../test/resources/test-documents/testTXT.txt | 2 - .../test/resources/test-documents/testTXT.zlib | Bin 55 -> 0 bytes .../test/resources/test-documents/testTXT.zlib0 | Bin 58 -> 0 bytes .../test/resources/test-documents/testTXT.zlib5 | Bin 55 -> 0 bytes .../test/resources/test-documents/testTXT.zlib9 | Bin 55 -> 0 bytes .../test-documents/testTXTNonASCIIUTF8.txt | 7 - .../resources/test-documents/testTables.key | Bin 216497 -> 0 bytes .../resources/test-documents/testTextBoxes.key | Bin 208981 -> 0 bytes .../test-documents/testThunderbirdEml.eml | 32 - .../resources/test-documents/testTinyPE.exe | Bin 1024 -> 0 bytes .../resources/test-documents/testTrueType3.ttf | Bin 224592 -> 0 bytes .../test-documents/testUserDefinedCharset.mhtml | 21 - .../test/resources/test-documents/testVISIO.vsd | Bin 45568 -> 0 bytes .../resources/test-documents/testVISIO.vsdm | Bin 32360 -> 0 bytes .../resources/test-documents/testVISIO.vsdx | Bin 32350 -> 0 bytes .../resources/test-documents/testVISIO.vssm | Bin 32358 -> 0 bytes .../resources/test-documents/testVISIO.vssx | Bin 32349 -> 0 bytes .../resources/test-documents/testVISIO.vstm | Bin 32361 -> 0 bytes .../resources/test-documents/testVISIO.vstx | Bin 32350 -> 0 bytes .../resources/test-documents/testVORBIS.ogg | Bin 4241 -> 0 bytes .../test-documents/testVORCalcTemplate.vor | Bin 17408 -> 0 bytes .../test-documents/testVORDrawTemplate.vor | Bin 29696 -> 0 bytes .../test-documents/testVORImpressTemplate.vor | Bin 30208 -> 0 bytes .../test-documents/testVORWriterTemplate.vor | Bin 8192 -> 0 bytes .../test/resources/test-documents/testWAR.war | Bin 1003 -> 0 bytes .../test/resources/test-documents/testWAV.wav | Bin 3884 -> 0 bytes .../test-documents/testWEBARCHIVE.webarchive | 646 -- .../test/resources/test-documents/testWEBM.webm | Bin 39745 -> 0 bytes .../test/resources/test-documents/testWEBP.webp | Bin 3442 -> 0 bytes .../resources/test-documents/testWINMAIL.dat | Bin 66276 -> 0 bytes .../test/resources/test-documents/testWMA.wma | Bin 27747 -> 0 bytes .../test/resources/test-documents/testWMF.wmf | Bin 51590 -> 0 bytes .../test/resources/test-documents/testWMV.wmv | Bin 113878 -> 0 bytes .../test/resources/test-documents/testWORD.doc | Bin 32768 -> 0 bytes .../test/resources/test-documents/testWORD.docx | Bin 13436 -> 0 bytes .../test/resources/test-documents/testWORD6.doc | Bin 6656 -> 0 bytes .../resources/test-documents/testWORD_1img.doc | Bin 14848 -> 0 bytes .../resources/test-documents/testWORD_1img.docx | Bin 8325 -> 0 bytes .../resources/test-documents/testWORD_3imgs.doc | Bin 36352 -> 0 bytes .../test-documents/testWORD_3imgs.docx | Bin 31303 -> 0 bytes .../testWORD_bold_character_runs.doc | Bin 22016 -> 0 bytes .../testWORD_bold_character_runs.docx | Bin 12912 -> 0 bytes .../testWORD_bold_character_runs2.doc | Bin 22016 -> 0 bytes .../testWORD_bold_character_runs2.docx | Bin 12863 -> 0 bytes .../testWORD_closingSmartQInHyperLink.doc | Bin 26624 -> 0 bytes .../test-documents/testWORD_custom_props.doc | Bin 22528 -> 0 bytes .../test-documents/testWORD_custom_props.docx | Bin 13942 -> 0 bytes .../test-documents/testWORD_embedded_pdf.doc | Bin 1491456 -> 0 bytes .../test-documents/testWORD_embedded_pdf.docx | Bin 63294 -> 0 bytes .../test-documents/testWORD_embedded_rtf.doc | Bin 16384 -> 0 bytes .../test-documents/testWORD_embeded.doc | Bin 319488 -> 0 bytes .../test-documents/testWORD_embeded.docx | Bin 157830 -> 0 bytes .../testWORD_header_hyperlink.doc | Bin 22528 -> 0 bytes .../testWORD_missing_ooxml_bean1.docx | Bin 17913 -> 0 bytes .../test-documents/testWORD_missing_text.docx | Bin 31592 -> 0 bytes .../test-documents/testWORD_multi_authors.doc | Bin 22528 -> 0 bytes .../test-documents/testWORD_multi_authors.docx | Bin 12054 -> 0 bytes .../test-documents/testWORD_no_format.doc | Bin 74752 -> 0 bytes .../test-documents/testWORD_no_format.docx | Bin 37018 -> 0 bytes .../test-documents/testWORD_null_style.docx | Bin 29018 -> 0 bytes .../test-documents/testWORD_numbered_list.doc | Bin 44032 -> 0 bytes .../test-documents/testWORD_numbered_list.docx | Bin 24696 -> 0 bytes .../testWORD_override_list_numbering.doc | Bin 56320 -> 0 bytes .../testWORD_override_list_numbering.docx | Bin 15746 -> 0 bytes .../testWORD_protected_passtika.doc | Bin 22016 -> 0 bytes .../testWORD_protected_passtika.docx | Bin 14336 -> 0 bytes .../test-documents/testWORD_tabular_symbol.doc | Bin 10240 -> 0 bytes .../test-documents/testWORD_text_box.docx | Bin 25271 -> 0 bytes .../test-documents/testWORD_various.doc | Bin 35328 -> 0 bytes .../test-documents/testWORD_various.docx | Bin 19169 -> 0 bytes .../test/resources/test-documents/testWORKS.wps | Bin 9728 -> 0 bytes .../resources/test-documents/testWORKS2000.wps | Bin 5120 -> 0 bytes .../test-documents/testWORKSSpreadsheet7.0.xlr | Bin 10752 -> 0 bytes .../testWORKSWordProcessor3.0.wps | Bin 3072 -> 0 bytes .../testWORKSWordProcessor4.0.wps | Bin 3584 -> 0 bytes .../resources/test-documents/testWebVTT.vtt | 33 - .../test-documents/testWebp_Alpha_Lossless.webp | Bin 92312 -> 0 bytes .../test-documents/testWebp_Alpha_Lossy.webp | Bin 23404 -> 0 bytes .../test-documents/testWindows-x86-32.exe | Bin 11723 -> 0 bytes .../resources/test-documents/testWordArt.pptx | Bin 37792 -> 0 bytes .../resources/test-documents/testXHTML.html | 29 - .../test-documents/testXLSX_Thumbnail.xlsx | Bin 10318 -> 0 bytes .../test/resources/test-documents/testXML.xml | 48 - .../test/resources/test-documents/testXML2.xml | 1 - .../test/resources/test-documents/testXML3.xml | 23 - .../resources/test-documents/test_TIKA-1251.doc | Bin 50688 -> 0 bytes .../test-documents/test_embedded_package.rtf | 71 - .../test-documents/test_embedded_zip.pptx | Bin 345027 -> 0 bytes .../test-documents/test_list_override.rtf | 21 - .../resources/test-documents/test_mat_text.mat | Bin 183 -> 0 bytes .../test-documents/test_recursive_embedded.docx | Bin 27082 -> 0 bytes .../test_recursive_embedded_npe.docx | Bin 27817 -> 0 bytes .../resources/test-documents/testiBooks.ibooks | Bin 970636 -> 0 bytes .../testsolidworksAssembly2013SP2.SLDASM | Bin 209408 -> 0 bytes .../testsolidworksAssembly2014SP0.SLDASM | Bin 238080 -> 0 bytes .../testsolidworksDrawing2013SP2.SLDDRW | Bin 180224 -> 0 bytes .../testsolidworksDrawing2014SP0.SLDDRW | Bin 201216 -> 0 bytes .../testsolidworksPart2013SP2.SLDPRT | Bin 1010176 -> 0 bytes .../testsolidworksPart2014SP0.SLDPRT | Bin 1043456 -> 0 bytes .../test/resources/test-documents/tika434.html | 914 --- .../StringsConfig-full.properties | 18 - .../StringsConfig-partial.properties | 16 - .../TesseractOCRConfig-full.properties | 22 - .../TesseractOCRConfig-partial.properties | 18 - 1102 files changed, 27195 insertions(+), 27182 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index f4024e3..f2651fa 100644 --- a/pom.xml +++ b/pom.xml @@ -47,7 +47,6 @@ <modules> <module>tika-parent</module> <module>tika-core</module> - <module>tika-test-resources</module> <module>tika-parsers</module> <module>tika-xmp</module> <module>tika-serialization</module> http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parser-modules/pom.xml b/tika-parser-modules/pom.xml index 0e768fa..e83bde5 100644 --- a/tika-parser-modules/pom.xml +++ b/tika-parser-modules/pom.xml @@ -61,15 +61,12 @@ <dependencies> <!-- Test dependencies --> <dependency> - <groupId>org.apache.tika</groupId> - <artifactId>tika-core</artifactId> - <version>${project.version}</version> - <type>test-jar</type> - <scope>test</scope> + <groupId>junit</groupId> + <artifactId>junit</artifactId> </dependency> <dependency> <groupId>org.apache.tika</groupId> - <artifactId>tika-test-resources</artifactId> + <artifactId>tika-core</artifactId> <version>${project.version}</version> <type>test-jar</type> <scope>test</scope> @@ -89,6 +86,7 @@ <build> <pluginManagement> <plugins> + <!-- <plugin> <groupId>org.apache.maven.plugins</groupId> <artifactId>maven-dependency-plugin</artifactId> @@ -115,6 +113,7 @@ </execution> </executions> </plugin> + --> </plugins> </pluginManagement> </build> http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy new file mode 100644 index 0000000..3b61f20 --- /dev/null +++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/ModelGetter.groovy @@ -0,0 +1,93 @@ + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * This file downloads Apache OpenNLP NER models for testing the NamedEntityParser + */ + +import org.apache.commons.io.IOUtils + +/** + * Copies input stream to output stream, additionally printing the progress. + * NOTE: this is optimized for large content + * @param inStr source stream + * @param outStr target stream + * @param totalLength the total length of the content (used to calculate progress) + * @return + */ +def copyWithProgress(InputStream inStr, OutputStream outStr, long totalLength){ + int PROGRESS_DELAY = 1000; + byte[] buffer = new byte[1024 * 4] + long count = 0 + int len + long tt = System.currentTimeMillis() + while ((len = inStr.read(buffer)) > 0) { + outStr.write(buffer, 0, len) + count += len + if (System.currentTimeMillis() - tt > PROGRESS_DELAY) { + println "${count * 100.0/totalLength}% : $count bytes of $totalLength" + tt = System.currentTimeMillis() + } + } + println "Copy complete. " + IOUtils.closeQuietly(inStr) + IOUtils.closeQuietly(outStr) +} + +/** + * Downloads file + * @param urlStr url of file + * @param file path to store file + * @return + */ +def downloadFile(String urlStr, File file) { + println "GET : $urlStr -> $file" + urlConn = new URL(urlStr).openConnection() + contentLength = urlConn.getContentLengthLong() + + file.getParentFile().mkdirs() + inStream = urlConn.getInputStream() + outStream = new FileOutputStream(file) + copyWithProgress(inStream, outStream, contentLength) + IOUtils.closeQuietly(outStream) + IOUtils.closeQuietly(inStream) + println "Download Complete.." +} + + +def urlPrefix = "http://opennlp.sourceforge.net/models-1.5" +def prefixPath = "src/test/resources/org/apache/tika/parser/ner/opennlp/" + +// detecting proper path for test resources +if (new File("tika-test-resources").exists() && new File("tika-app").exists() ) { + // running from parent maven project, but resources should go to sub-module + prefixPath = "tika-test-resources/" + prefixPath +} + +def modelFiles = //filePath : url + [ (prefixPath + "ner-person.bin"): (urlPrefix + "/en-ner-person.bin"), + (prefixPath + "ner-location.bin"): (urlPrefix + "/en-ner-location.bin"), + (prefixPath + "ner-organization.bin"): (urlPrefix + "/en-ner-organization.bin"), + (prefixPath + "ner-date.bin"): (urlPrefix + "/en-ner-date.bin")] + +for (def entry : modelFiles) { + File file = new File(entry.key) + if (!file.exists()) { + downloadFile(entry.value, file) + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh new file mode 100644 index 0000000..c17899e --- /dev/null +++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/opennlp/get-models.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "Getting OpenNLP NER models" +wget "http://opennlp.sourceforge.net/models-1.5/en-ner-person.bin" -O ner-person.bin +wget "http://opennlp.sourceforge.net/models-1.5/en-ner-location.bin" -O ner-location.bin +wget "http://opennlp.sourceforge.net/models-1.5/en-ner-organization.bin" -O ner-organization.bin + +# Additional 4 +wget "http://opennlp.sourceforge.net/models-1.5/en-ner-date.bin" -O ner-date.bin +wget "http://opennlp.sourceforge.net/models-1.5/en-ner-money.bin" -O ner-money.bin +wget "http://opennlp.sourceforge.net/models-1.5/en-ner-time.bin" -O ner-time.bin +wget "http://opennlp.sourceforge.net/models-1.5/en-ner-percentage.bin" -O ner-percentage.bin \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt new file mode 100644 index 0000000..e6fa39e --- /dev/null +++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/regex/ner-regex.txt @@ -0,0 +1,17 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +WEEK_DAY=(?i)((sun)|(mon)|(tues)|(thurs)|(fri)|((sat)(ur)?))(day)? \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config.xml ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config.xml b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config.xml new file mode 100644 index 0000000..267c399 --- /dev/null +++ b/tika-parser-modules/tika-parser-advanced-module/src/test/resources/org/apache/tika/parser/ner/tika-config.xml @@ -0,0 +1,27 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<properties> + <parsers> + <parser class="org.apache.tika.parser.ner.NamedEntityParser"> + <mime>text/plain</mime> + <mime>text/html</mime> + <mime>application/xhtml+xml</mime> + </parser> + </parsers> + +</properties> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY.prt ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY.prt b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY.prt new file mode 100644 index 0000000..a6c6e98 Binary files /dev/null and b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY.prt differ http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY2.prt ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY2.prt b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY2.prt new file mode 100644 index 0000000..178fd9b Binary files /dev/null and b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testCADKEY2.prt differ http://git-wip-us.apache.org/repos/asf/tika/blob/38916f89/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testDWG2000.dwg ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testDWG2000.dwg b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testDWG2000.dwg new file mode 100644 index 0000000..1b54bbc Binary files /dev/null and b/tika-parser-modules/tika-parser-cad-module/src/test/resources/test-documents/testDWG2000.dwg differ
