http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java index a8bfaed..20f8760 100644 --- a/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java +++ b/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java @@ -60,6 +60,7 @@ import org.apache.tika.sax.ContentHandlerDecorator; import org.apache.tika.sax.ToXMLContentHandler; import org.junit.AfterClass; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import org.xml.sax.ContentHandler; @@ -105,74 +106,55 @@ public class PDFParserTest extends TikaTest { @Test public void testPdfParsing() throws Exception { - Parser parser = new AutoDetectParser(); // Should auto-detect! - Metadata metadata = new Metadata(); - - InputStream stream = PDFParserTest.class.getResourceAsStream( - "/test-documents/testPDF.pdf"); - - String content = getText(stream, parser, metadata); - - assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE)); - assertEquals("Bertrand Delacr\u00e9taz", metadata.get(TikaCoreProperties.CREATOR)); - assertEquals("Bertrand Delacr\u00e9taz", metadata.get(Metadata.AUTHOR)); - assertEquals("Firefox", metadata.get(TikaCoreProperties.CREATOR_TOOL)); - assertEquals("Apache Tika - Apache Tika", metadata.get(TikaCoreProperties.TITLE)); + XMLResult r = getXML("testPDF.pdf"); + assertEquals("application/pdf", r.metadata.get(Metadata.CONTENT_TYPE)); + assertEquals("Bertrand Delacr\u00e9taz", r.metadata.get(TikaCoreProperties.CREATOR)); + assertEquals("Bertrand Delacr\u00e9taz", r.metadata.get(Metadata.AUTHOR)); + assertEquals("Firefox", r.metadata.get(TikaCoreProperties.CREATOR_TOOL)); + assertEquals("Apache Tika - Apache Tika", r.metadata.get(TikaCoreProperties.TITLE)); // Can't reliably test dates yet - see TIKA-451 // assertEquals("Sat Sep 15 10:02:31 BST 2007", metadata.get(Metadata.CREATION_DATE)); // assertEquals("Sat Sep 15 10:02:31 BST 2007", metadata.get(Metadata.LAST_MODIFIED)); - assertContains("Apache Tika", content); - assertContains("Tika - Content Analysis Toolkit", content); - assertContains("incubator", content); - assertContains("Apache Software Foundation", content); + assertContains("Apache Tika", r.xml); + assertContains("Tika - Content Analysis Toolkit", r.xml); + assertContains("incubator", r.xml); + assertContains("Apache Software Foundation", r.xml); // testing how the end of one paragraph is separated from start of the next one - assertTrue("should have word boundary after headline", - !content.contains("ToolkitApache")); - assertTrue("should have word boundary between paragraphs", - !content.contains("libraries.Apache")); + + // should have word boundary after headline + assertNotContained("ToolkitApache", r.xml); + // should have word boundary between paragraphs + assertNotContained("libraries.Apache", r.xml); } @Test public void testPdfParsingMetadataOnly() throws Exception { - Parser parser = new AutoDetectParser(); // Should auto-detect! - Metadata metadata = new Metadata(); - - try (InputStream stream = PDFParserTest.class.getResourceAsStream( - "/test-documents/testPDF.pdf")) { - parser.parse(stream, null, metadata, new ParseContext()); - } - - assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE)); - assertEquals("Bertrand Delacr\u00e9taz", metadata.get(TikaCoreProperties.CREATOR)); - assertEquals("Firefox", metadata.get(TikaCoreProperties.CREATOR_TOOL)); - assertEquals("Apache Tika - Apache Tika", metadata.get(TikaCoreProperties.TITLE)); + XMLResult r = getXML("testPDF.pdf"); + assertEquals("application/pdf", r.metadata.get(Metadata.CONTENT_TYPE)); + assertEquals("Bertrand Delacr\u00e9taz", r.metadata.get(TikaCoreProperties.CREATOR)); + assertEquals("Firefox", r.metadata.get(TikaCoreProperties.CREATOR_TOOL)); + assertEquals("Apache Tika - Apache Tika", r.metadata.get(TikaCoreProperties.TITLE)); } @Test public void testCustomMetadata() throws Exception { - Parser parser = new AutoDetectParser(); // Should auto-detect! - Metadata metadata = new Metadata(); - - InputStream stream = PDFParserTest.class.getResourceAsStream( - "/test-documents/testPDF-custommetadata.pdf"); - - String content = getText(stream, parser, metadata); + XMLResult r = getXML("testPDF-custommetadata.pdf"); - assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE)); - assertEquals("Document author", metadata.get(TikaCoreProperties.CREATOR)); - assertEquals("Document author", metadata.get(Metadata.AUTHOR)); - assertEquals("Document title", metadata.get(TikaCoreProperties.TITLE)); + assertEquals("application/pdf", r.metadata.get(Metadata.CONTENT_TYPE)); + assertEquals("Document author", r.metadata.get(TikaCoreProperties.CREATOR)); + assertEquals("Document author", r.metadata.get(Metadata.AUTHOR)); + assertEquals("Document title", r.metadata.get(TikaCoreProperties.TITLE)); - assertEquals("Custom Value", metadata.get("Custom Property")); + assertEquals("Custom Value", r.metadata.get("Custom Property")); - assertEquals("Array Entry 1", metadata.get("Custom Array")); - assertEquals(2, metadata.getValues("Custom Array").length); - assertEquals("Array Entry 1", metadata.getValues("Custom Array")[0]); - assertEquals("Array Entry 2", metadata.getValues("Custom Array")[1]); + assertEquals("Array Entry 1", r.metadata.get("Custom Array")); + assertEquals(2, r.metadata.getValues("Custom Array").length); + assertEquals("Array Entry 1", r.metadata.getValues("Custom Array")[0]); + assertEquals("Array Entry 2", r.metadata.getValues("Custom Array")[1]); - assertContains("Hello World!", content); + assertContains("Hello World!", r.xml); } /** @@ -182,16 +164,9 @@ public class PDFParserTest extends TikaTest { */ @Test public void testProtectedPDF() throws Exception { - Parser parser = new AutoDetectParser(); // Should auto-detect! - ContentHandler handler = new BodyContentHandler(); - Metadata metadata = new Metadata(); - ParseContext context = new ParseContext(); - - try (InputStream stream = PDFParserTest.class.getResourceAsStream( - "/test-documents/testPDF_protected.pdf")) { - parser.parse(stream, handler, metadata, context); - } + XMLResult r = getXML("testPDF_protected.pdf"); + Metadata metadata = r.metadata; assertEquals("true", metadata.get("pdf:encrypted")); assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE)); assertEquals("The Bank of England", metadata.get(TikaCoreProperties.CREATOR)); @@ -200,27 +175,23 @@ public class PDFParserTest extends TikaTest { assertEquals("Speeches by Andrew G Haldane", metadata.get(Metadata.SUBJECT)); assertEquals("Rethinking the Financial Network, Speech by Andrew G Haldane, Executive Director, Financial Stability delivered at the Financial Student Association, Amsterdam on 28 April 2009", metadata.get(TikaCoreProperties.TITLE)); - String content = handler.toString(); + String content = r.xml; assertContains("RETHINKING THE FINANCIAL NETWORK", content); assertContains("On 16 November 2002", content); assertContains("In many important respects", content); // Try again with an explicit empty password - handler = new BodyContentHandler(); metadata = new Metadata(); - context = new ParseContext(); + ParseContext context = new ParseContext(); context.set(PasswordProvider.class, new PasswordProvider() { public String getPassword(Metadata metadata) { return ""; } }); - - try (InputStream stream = PDFParserTest.class.getResourceAsStream( - "/test-documents/testPDF_protected.pdf")) { - parser.parse(stream, handler, metadata, context); - } + r = getXML("testPDF_protected.pdf", new AutoDetectParser(), metadata, context); + metadata = r.metadata; assertEquals("true", metadata.get("pdf:encrypted")); assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE)); @@ -234,7 +205,6 @@ public class PDFParserTest extends TikaTest { assertContains("In many important respects", content); //now test wrong password - handler = new BodyContentHandler(); metadata = new Metadata(); context = new ParseContext(); context.set(PasswordProvider.class, new PasswordProvider() { @@ -244,23 +214,20 @@ public class PDFParserTest extends TikaTest { }); boolean ex = false; - try (InputStream stream = PDFParserTest.class.getResourceAsStream( - "/test-documents/testPDF_protected.pdf")) { - parser.parse(stream, handler, metadata, context); + try { + r = getXML("testPDF_protected.pdf", new AutoDetectParser(), metadata, context); } catch (EncryptedDocumentException e) { ex = true; } - content = handler.toString(); + content = r.xml; assertTrue("encryption exception", ex); assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE)); assertEquals("true", metadata.get("pdf:encrypted")); //pdf:encrypted, X-Parsed-By and Content-Type assertEquals("very little metadata should be parsed", 3, metadata.names().length); - assertEquals(0, content.length()); //now test wrong password with non sequential parser - handler = new BodyContentHandler(); metadata = new Metadata(); context = new ParseContext(); context.set(PasswordProvider.class, new PasswordProvider() { @@ -272,22 +239,21 @@ public class PDFParserTest extends TikaTest { config.setUseNonSequentialParser(true); context.set(PDFParserConfig.class, config); - ; ex = false; - try (InputStream stream = PDFParserTest.class.getResourceAsStream( - "/test-documents/testPDF_protected.pdf")) { - parser.parse(stream, handler, metadata, context); + try { + r = getXML("testPDF_protected.pdf", new AutoDetectParser(), metadata, context); } catch (EncryptedDocumentException e) { ex = true; } - content = handler.toString(); + + content = r.xml; assertTrue("encryption exception", ex); assertEquals("application/pdf", metadata.get(Metadata.CONTENT_TYPE)); assertEquals("true", metadata.get("pdf:encrypted")); //pdf:encrypted, X-Parsed-By and Content-Type assertEquals("very little metadata should be parsed", 3, metadata.names().length); - assertEquals(0, content.length()); + } @Test @@ -619,6 +585,7 @@ public class PDFParserTest extends TikaTest { * TODO: more testing */ @Test + @Ignore("this will be going away as soon as we upgrade to 2.0") public void testSequentialParser() throws Exception { Parser sequentialParser = new AutoDetectParser(); @@ -745,13 +712,13 @@ public class PDFParserTest extends TikaTest { //The current test doc does not contain any content in the signature area. //This just tests that a RuntimeException is not thrown. //TODO: find a better test file for this issue. - String xml = getXML("/testPDF_acroform3.pdf").xml; + String xml = getXML("testPDF_acroform3.pdf").xml; assertTrue("found", (xml.contains("<li>aTextField: TIKA-1226</li>"))); } @Test // TIKA-1228, TIKA-1268 public void testEmbeddedFilesInChildren() throws Exception { - String xml = getXML("/testPDF_childAttachments.pdf").xml; + String xml = getXML("testPDF_childAttachments.pdf").xml; //"regressiveness" exists only in Unit10.doc not in the container pdf document assertTrue(xml.contains("regressiveness")); @@ -785,7 +752,7 @@ public class PDFParserTest extends TikaTest { @Test public void testEmbeddedFilesInAnnotations() throws Exception { - String xml = getXML("/testPDFFileEmbInAnnotation.pdf").xml; + String xml = getXML("testPDFFileEmbInAnnotation.pdf").xml; assertTrue(xml.contains("This is a Excel")); }
http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java index 4398999..aa70106 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/isatab/ISArchiveParser.java @@ -114,7 +114,7 @@ public class ISArchiveParser implements Parser { InputStream stream = TikaInputStream.get(new File(this.location + investigation)); ISATabUtils.parseInvestigation(stream, xhtml, metadata, context, this.studyFileName); - + stream.close(); xhtml.element("h1", "INVESTIGATION " + metadata.get("Investigation Identifier")); } @@ -130,6 +130,7 @@ public class ISArchiveParser implements Parser { xhtml.element("h3", "ASSAY " + assayFileName); InputStream stream = TikaInputStream.get(new File(this.location + assayFileName)); ISATabUtils.parseAssay(stream, xhtml, metadata, context); + stream.close(); xhtml.endElement("div"); } } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java index 6a63eb4..ddbca81 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java @@ -21,11 +21,10 @@ package org.apache.tika.parser.netcdf; import java.io.IOException; import java.io.InputStream; import java.util.Collections; -import java.util.Set; import java.util.List; +import java.util.Set; import org.apache.tika.exception.TikaException; -import org.apache.tika.io.TemporaryResources; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Property; @@ -37,11 +36,10 @@ import org.apache.tika.parser.Parser; import org.apache.tika.sax.XHTMLContentHandler; import org.xml.sax.ContentHandler; import org.xml.sax.SAXException; - import ucar.nc2.Attribute; +import ucar.nc2.Dimension; import ucar.nc2.NetcdfFile; import ucar.nc2.Variable; -import ucar.nc2.Dimension; /** * A {@link Parser} for <a @@ -82,9 +80,10 @@ public class NetCDFParser extends AbstractParser { Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { - TikaInputStream tis = TikaInputStream.get(stream, new TemporaryResources()); + TikaInputStream tis = TikaInputStream.get(stream); + NetcdfFile ncFile = null; try { - NetcdfFile ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath()); + ncFile = NetcdfFile.open(tis.getFile().getAbsolutePath()); metadata.set("File-Type-Description", ncFile.getFileTypeDescription()); // first parse out the set of global attributes for (Attribute attr : ncFile.getGlobalAttributes()) { @@ -129,9 +128,13 @@ public class NetCDFParser extends AbstractParser { xhtml.endElement("ul"); xhtml.endDocument(); - + ncFile.close(); } catch (IOException e) { throw new TikaException("NetCDF parse error", e); + } finally { + if (ncFile != null) { + ncFile.close(); + } } } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java index ef31abc..373da0d 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/dif/DIFParserTest.java @@ -18,37 +18,22 @@ package org.apache.tika.parser.dif; import static org.junit.Assert.assertEquals; -import java.io.InputStream; - import org.apache.tika.TikaTest; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.BodyContentHandler; import org.junit.Test; -import org.xml.sax.ContentHandler; public class DIFParserTest extends TikaTest { @Test public void testDifMetadata() throws Exception { - Parser parser = new DIFParser(); - ContentHandler handler = new BodyContentHandler(); - Metadata metadata = new Metadata(); - - try (InputStream stream = DIFParser.class.getResourceAsStream( - "/test-documents/Zamora2010.dif")) { - parser.parse(stream, handler, metadata, new ParseContext()); - } - - assertEquals(metadata.get("DIF-Entry_ID"),"00794186-48f9-11e3-9dcb-00c0f03d5b7c"); - assertEquals(metadata.get("DIF-Metadata_Name"),"ACADIS IDN DIF"); + XMLResult r = getXML("Zamora2010.dif", new DIFParser()); + assertEquals(r.metadata.get("DIF-Entry_ID"),"00794186-48f9-11e3-9dcb-00c0f03d5b7c"); + assertEquals(r.metadata.get("DIF-Metadata_Name"),"ACADIS IDN DIF"); - String content = handler.toString(); + String content = r.xml; assertContains("Title: Zamora 2010 Using Sediment Geochemistry", content); - assertContains("Southernmost_Latitude : 78.833", content); - assertContains("Northernmost_Latitude : 79.016", content); - assertContains("Westernmost_Longitude : 11.64", content); - assertContains("Easternmost_Longitude : 13.34", content); + assertContains("Southernmost_Latitude : </td><td>78.833", content); + assertContains("Northernmost_Latitude : </td><td>79.016", content); + assertContains("Westernmost_Longitude : </td><td>11.64", content); + assertContains("Easternmost_Longitude : </td><td>13.34", content); } } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java index 3603280..0bf67fb 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java @@ -17,44 +17,26 @@ package org.apache.tika.parser.envi; -import static org.apache.tika.TikaTest.assertContains; -import static org.junit.Assert.assertNotNull; - -import java.io.InputStream; - -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.ToXMLContentHandler; +import org.apache.tika.TikaTest; import org.junit.Test; /** * Test cases to exercise the {@link EnviHeaderParser}. */ -public class EnviHeaderParserTest { +public class EnviHeaderParserTest extends TikaTest { @Test public void testParseGlobalMetadata() throws Exception { if (System.getProperty("java.version").startsWith("1.5")) { return; } - Parser parser = new EnviHeaderParser(); - ToXMLContentHandler handler = new ToXMLContentHandler(); - Metadata metadata = new Metadata(); - - try (InputStream stream = EnviHeaderParser.class.getResourceAsStream( - "/test-documents/envi_test_header.hdr")) { - assertNotNull("Test ENVI file not found", stream); - parser.parse(stream, handler, metadata, new ParseContext()); - } - + XMLResult r = getXML("envi_test_header.hdr", new EnviHeaderParser()); // Check content of test file - String content = handler.toString(); - assertContains("<body><p>ENVI</p>", content); - assertContains("<p>samples = 2400</p>", content); - assertContains("<p>lines = 2400</p>", content); - assertContains("<p>map info = {Sinusoidal, 1.5000, 1.5000, -10007091.3643, 5559289.2856, 4.6331271653e+02, 4.6331271653e+02, , units=Meters}</p>", content); - assertContains("content=\"application/envi.hdr\"", content); - assertContains("projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, Sinusoidal, units=Meters}", content); + assertContains("<body><p>ENVI</p>", r.xml); + assertContains("<p>samples = 2400</p>", r.xml); + assertContains("<p>lines = 2400</p>", r.xml); + assertContains("<p>map info = {Sinusoidal, 1.5000, 1.5000, -10007091.3643, 5559289.2856, 4.6331271653e+02, 4.6331271653e+02, , units=Meters}</p>", r.xml); + assertContains("content=\"application/envi.hdr\"", r.xml); + assertContains("projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, Sinusoidal, units=Meters}", r.xml); } } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java index cf37989..5d4c58c 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java @@ -49,7 +49,7 @@ public class TestGDALParser extends TikaTest { } @Test - public void testParseBasicInfo() { + public void testParseBasicInfo() throws Exception { assumeTrue(canRun()); final String expectedDriver = "netCDF/Network Common Data Format"; final String expectedUpperRight = "512.0, 0.0"; @@ -59,18 +59,9 @@ public class TestGDALParser extends TikaTest { final String expectedCoordinateSystem = "`'"; final String expectedSize = "512, 512"; - GDALParser parser = new GDALParser(); - InputStream stream = TestGDALParser.class - .getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc"); - Metadata met = new Metadata(); - BodyContentHandler handler = new BodyContentHandler(); - try { - parser.parse(stream, handler, met, new ParseContext()); - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } + XMLResult r = getXML("sresa1b_ncar_ccsm3_0_run1_200001.nc", new GDALParser()); + Metadata met = r.metadata; assertNotNull(met); assertNotNull(met.get("Driver")); assertEquals(expectedDriver, met.get("Driver")); @@ -91,7 +82,7 @@ public class TestGDALParser extends TikaTest { } @Test - public void testParseMetadata() { + public void testParseMetadata() throws Exception { assumeTrue(canRun()); final String expectedNcInst = "NCAR (National Center for Atmospheric Research, Boulder, CO, USA)"; final String expectedModelNameEnglish = "NCAR CCSM"; @@ -102,14 +93,10 @@ public class TestGDALParser extends TikaTest { final String expectedSub8Name = "\":ua"; final String expectedSub8Desc = "[1x17x128x256] eastward_wind (32-bit floating-point)"; - GDALParser parser = new GDALParser(); - InputStream stream = TestGDALParser.class - .getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc"); - Metadata met = new Metadata(); - BodyContentHandler handler = new BodyContentHandler(); - try { - parser.parse(stream, handler, met, new ParseContext()); - assertNotNull(met); + XMLResult r = getXML("sresa1b_ncar_ccsm3_0_run1_200001.nc"); + Metadata met = r.metadata; + + assertNotNull(met); assertNotNull(met.get("NC_GLOBAL#institution")); assertEquals(expectedNcInst, met.get("NC_GLOBAL#institution")); assertNotNull(met.get("NC_GLOBAL#model_name_english")); @@ -129,14 +116,11 @@ public class TestGDALParser extends TikaTest { assertTrue(met.get("SUBDATASET_8_NAME").endsWith(expectedSub8Name)); assertNotNull(met.get("SUBDATASET_8_DESC")); assertEquals(expectedSub8Desc, met.get("SUBDATASET_8_DESC")); - } catch (Exception e) { - e.printStackTrace(); - fail(e.getMessage()); - } } @Test public void testParseFITS() { + //TODO: fix this...add spooling to tmp file to TikaTest String fitsFilename = "/test-documents/WFPC2u5780205r_c0fx.fits"; assumeTrue(canRun()); http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java index 0d6fb74..0fbe7b3 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geo/topic/GeoParserTest.java @@ -21,25 +21,30 @@ import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; -import org.junit.Test; + import java.io.ByteArrayInputStream; import java.io.IOException; -import java.io.InputStream; import java.io.UnsupportedEncodingException; +import org.apache.tika.TikaTest; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; +import org.junit.Test; import org.xml.sax.SAXException; -public class GeoParserTest { +public class GeoParserTest extends TikaTest { private Parser geoparser = new GeoParser(); @Test - public void testFunctions() throws UnsupportedEncodingException, - IOException, SAXException, TikaException { + public void testFunctions() throws Exception { + + /* if it's not available no tests to run */ + if (!((GeoParser) geoparser).isAvailable()) + return; + String text = "The millennial-scale cooling trend that followed the HTM coincides with the decrease in China " + "summer insolation driven by slow changes in Earth's orbit. Despite the nearly linear forcing, the transition from the HTM to " + "the Little Ice Age (1500-1900 AD) was neither gradual nor uniform. To understand how feedbacks and perturbations result in rapid changes, " @@ -53,13 +58,7 @@ public class GeoParserTest { GeoParserConfig config = new GeoParserConfig(); context.set(GeoParserConfig.class, config); - InputStream s = new ByteArrayInputStream(text.getBytes(UTF_8)); - /* if it's not available no tests to run */ - if (!((GeoParser) geoparser).isAvailable()) - return; - - geoparser.parse(s, new BodyContentHandler(), metadata, context); - + XMLResult r = getXML(new ByteArrayInputStream(text.getBytes(UTF_8)), geoparser, metadata, context); assertNotNull(metadata.get("Geographic_NAME")); assertNotNull(metadata.get("Geographic_LONGITUDE")); assertNotNull(metadata.get("Geographic_LATITUDE")); http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java index acd0cb2..442b080 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/geoinfo/GeographicInformationParserTest.java @@ -17,45 +17,29 @@ package org.apache.tika.parser.geoinfo; +import static org.junit.Assert.assertEquals; + +import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.geoinfo.GeographicInformationParser; -import org.apache.tika.sax.BodyContentHandler; import org.junit.Test; -import org.xml.sax.ContentHandler; -import java.io.*; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -public class GeographicInformationParserTest { +public class GeographicInformationParserTest extends TikaTest { @Test - public void testISO19139() throws Exception{ - String path ="/test-documents/sampleFile.iso19139"; - - Metadata metadata = new Metadata(); - Parser parser=new org.apache.tika.parser.geoinfo.GeographicInformationParser(); - ContentHandler contentHandler=new BodyContentHandler(); - ParseContext parseContext=new ParseContext(); - - InputStream inputStream = GeographicInformationParser.class.getResourceAsStream(path); - - parser.parse(inputStream, contentHandler, metadata, parseContext); - - assertEquals("text/iso19139+xml", metadata.get(Metadata.CONTENT_TYPE)); - assertEquals("UTF-8", metadata.get("CharacterSet")); - assertEquals("https", metadata.get("TransferOptionsOnlineProtocol ")); - assertEquals("browser", metadata.get("TransferOptionsOnlineProfile ")); - assertEquals("Barrow Atqasuk ARCSS Plant", metadata.get("TransferOptionsOnlineName ")); - - String content = contentHandler.toString(); - assertTrue(content.contains("Barrow Atqasuk ARCSS Plant")); - assertTrue(content.contains("GeographicElementWestBoundLatitude -157.24")); - assertTrue(content.contains("GeographicElementEastBoundLatitude -156.4")); - assertTrue(content.contains("GeographicElementNorthBoundLatitude 71.18")); - assertTrue(content.contains("GeographicElementSouthBoundLatitude 70.27")); + public void testISO19139() throws Exception { + XMLResult r = getXML("sampleFile.iso19139", new GeographicInformationParser()); + assertEquals("text/iso19139+xml", r.metadata.get(Metadata.CONTENT_TYPE)); + assertEquals("UTF-8", r.metadata.get("CharacterSet")); + assertEquals("https", r.metadata.get("TransferOptionsOnlineProtocol ")); + assertEquals("browser", r.metadata.get("TransferOptionsOnlineProfile ")); + assertEquals("Barrow Atqasuk ARCSS Plant", r.metadata.get("TransferOptionsOnlineName ")); + + assertContains("Barrow Atqasuk ARCSS Plant", r.xml); + assertContains("<td>GeographicElementWestBoundLatitude</td>\t<td>-157.24</td>", r.xml); + assertContains("<td>GeographicElementEastBoundLatitude</td>\t<td>-156.4</td>", r.xml); + assertContains("<td>GeographicElementNorthBoundLatitude</td>\t<td>71.18</td>", r.xml); + assertContains("<td>GeographicElementSouthBoundLatitude</td>\t<td>70.27</td>", r.xml); } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java index 6ccf6af..622d511 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/grib/GribParserTest.java @@ -18,36 +18,24 @@ package org.apache.tika.parser.grib; //JDK imports -import static org.junit.Assert.*; -import java.io.InputStream; +import static org.junit.Assert.assertNotNull; -//TIKA imports -import org.apache.tika.metadata.Metadata; -import org.apache.tika.metadata.TikaCoreProperties; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.BodyContentHandler; +import org.apache.tika.TikaTest; import org.junit.Test; -import org.xml.sax.ContentHandler; -import java.io.File; + +//TIKA imports /** * Test cases to exercise the {@link org.apache.tika.parser.grib.GribParser}. */ -public class GribParserTest { +public class GribParserTest extends TikaTest { @Test public void testParseGlobalMetadata() throws Exception { - Parser parser = new GribParser(); - Metadata metadata = new Metadata(); - ContentHandler handler = new BodyContentHandler(); - try (InputStream stream = GribParser.class.getResourceAsStream("/test-documents/gdas1.forecmwf.2014062612.grib2")) { - parser.parse(stream, handler, metadata, new ParseContext()); - } - assertNotNull(metadata); - String content = handler.toString(); - assertTrue(content.contains("dimensions:")); - assertTrue(content.contains("variables:")); + XMLResult r = getXML("gdas1.forecmwf.2014062612.grib2", new GribParser()); + assertNotNull(r.metadata); + assertContains("dimensions:", r.xml); + assertContains("variables:", r.xml); } } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java index 9bda875..1ee4dc7 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/hdf/HDFParserTest.java @@ -17,39 +17,27 @@ package org.apache.tika.parser.hdf; //JDK imports + import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; -import java.io.InputStream; - - - +import org.apache.tika.TikaTest; +import org.junit.Test; //TIKA imports -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.parser.hdf.HDFParser; -import org.apache.tika.sax.BodyContentHandler; -import org.junit.Test; -import org.xml.sax.ContentHandler; /** * * Test suite for the {@link HDFParser}. * */ -public class HDFParserTest { +public class HDFParserTest extends TikaTest { @Test public void testParseGlobalMetadata() throws Exception { if(System.getProperty("java.version").startsWith("1.5")) { return; } - Parser parser = new HDFParser(); - ContentHandler handler = new BodyContentHandler(); - Metadata metadata = new Metadata(); - /* * this is a publicly available HDF5 file from the MLS mission: * @@ -57,12 +45,10 @@ public class HDFParserTest { * ftp://acdisc.gsfc.nasa.gov/data/s4pa///Aura_MLS_Level2/ML2O3.002//2009 * /MLS-Aura_L2GP-O3_v02-23-c01_2009d122.he5 */ - try (InputStream stream = HDFParser.class.getResourceAsStream("/test-documents/test.he5")) { - parser.parse(stream, handler, metadata, new ParseContext()); - } - assertNotNull(metadata); - assertEquals("5", metadata.get("GranuleMonth")); + XMLResult r = getXML("test.he5", new HDFParser()); + assertNotNull(r.metadata); + assertEquals("5", r.metadata.get("GranuleMonth")); } @Test @@ -70,23 +56,17 @@ public class HDFParserTest { if(System.getProperty("java.version").startsWith("1.5")) { return; } - Parser parser = new HDFParser(); - ContentHandler handler = new BodyContentHandler(); - Metadata metadata = new Metadata(); /* * this is a publicly available HDF4 file from the HD4 examples: * * http://www.hdfgroup.org/training/hdf4_chunking/Chunkit/bin/input54kmdata.hdf */ - try (InputStream stream = HDFParser.class.getResourceAsStream("/test-documents/test.hdf")) { - parser.parse(stream, handler, metadata, new ParseContext()); - } - - assertNotNull(metadata); - assertEquals("Direct read of HDF4 file through CDM library", metadata.get("_History")); - assertEquals("Ascending", metadata.get("Pass")); + XMLResult r = getXML("test.hdf", new HDFParser()); + assertNotNull(r.metadata); + assertEquals("Direct read of HDF4 file through CDM library", r.metadata.get("_History")); + assertEquals("Ascending", r.metadata.get("Pass")); assertEquals("Hierarchical Data Format, version 4", - metadata.get("File-Type-Description")); + r.metadata.get("File-Type-Description")); } } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java index ce4299c..fcc71f5 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/isatab/ISArchiveParserTest.java @@ -17,44 +17,72 @@ package org.apache.tika.parser.isatab; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; -import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.ParseContext; +import org.apache.tika.TikaTest; import org.apache.tika.parser.Parser; -import org.apache.tika.sax.BodyContentHandler; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; -import org.xml.sax.ContentHandler; -public class ISArchiveParserTest { +public class ISArchiveParserTest extends TikaTest { + + static Path tmpDir; + final static String ISA_SUBDIR = "testISATab_BII-I-1"; + final static String[] ISA_FILES = { + "a_bii-s-2_metabolite profiling_NMR spectroscopy.txt", + "a_metabolome.txt", + "a_microarray.txt", + "a_proteome.txt", + "a_transcriptome.txt", + "i_investigation.txt" + }; + + @BeforeClass + public static void createTempDir() throws Exception { + tmpDir = Files.createTempDirectory(ISA_SUBDIR); + for (String isaFile : ISA_FILES) { + String isaPath = "test-documents/"+ISA_SUBDIR+"/"+isaFile; + Files.copy(ISArchiveParserTest.class.getClassLoader().getResourceAsStream(isaPath), + tmpDir.resolve(isaFile)); + } + } + @AfterClass + public static void deleteTempDir() throws Exception { + for (String isaFile : ISA_FILES) { + Path p = tmpDir.resolve(isaFile); + Files.delete(p); + } + Files.delete(tmpDir); + } @Test public void testParseArchive() throws Exception { - String path = "/test-documents/testISATab_BII-I-1/s_BII-S-1.txt"; - - Parser parser = new ISArchiveParser(ISArchiveParserTest.class.getResource("/test-documents/testISATab_BII-I-1/").toURI().getPath()); - //Parser parser = new AutoDetectParser(); - - ContentHandler handler = new BodyContentHandler(); - Metadata metadata = new Metadata(); - ParseContext context = new ParseContext(); - try (InputStream stream = ISArchiveParserTest.class.getResourceAsStream(path)) { - parser.parse(stream, handler, metadata, context); - } - + + Parser parser = new ISArchiveParser(tmpDir.toString()); + XMLResult r = getXML(ISA_SUBDIR+"/s_BII-S-1.txt", + parser); + // INVESTIGATION - assertEquals("Invalid Investigation Identifier", "BII-I-1", metadata.get("Investigation Identifier")); - assertEquals("Invalid Investigation Title", "Growth control of the eukaryote cell: a systems biology study in yeast", metadata.get("Investigation Title")); + assertEquals("Invalid Investigation Identifier", "BII-I-1", + r.metadata.get("Investigation Identifier")); + assertEquals("Invalid Investigation Title", + "Growth control of the eukaryote cell: a systems biology study in yeast", + r.metadata.get("Investigation Title")); // INVESTIGATION PUBLICATIONS - assertEquals("Invalid Investigation PubMed ID", "17439666", metadata.get("Investigation PubMed ID")); - assertEquals("Invalid Investigation Publication DOI", "doi:10.1186/jbiol54", metadata.get("Investigation Publication DOI")); + assertEquals("Invalid Investigation PubMed ID", "17439666", + r.metadata.get("Investigation PubMed ID")); + assertEquals("Invalid Investigation Publication DOI", "doi:10.1186/jbiol54", + r.metadata.get("Investigation Publication DOI")); // INVESTIGATION CONTACTS - assertEquals("Invalid Investigation Person Last Name", "Oliver", metadata.get("Investigation Person Last Name")); - assertEquals("Invalid Investigation Person First Name", "Stephen", metadata.get("Investigation Person First Name")); + assertEquals("Invalid Investigation Person Last Name", "Oliver", + r.metadata.get("Investigation Person Last Name")); + assertEquals("Invalid Investigation Person First Name", "Stephen", + r.metadata.get("Investigation Person First Name")); } } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java index 0b31fea..aee5d62 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/mat/MatParserTest.java @@ -16,65 +16,39 @@ */ package org.apache.tika.parser.mat; -import static org.apache.tika.TikaTest.assertContains; import static org.junit.Assert.assertEquals; -import java.io.InputStream; - -import org.apache.tika.metadata.Metadata; -import org.apache.tika.parser.AutoDetectParser; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.ToXMLContentHandler; +import org.apache.tika.TikaTest; import org.junit.Test; /** * Test cases to exercise the {@link MatParser}. */ -public class MatParserTest { +public class MatParserTest extends TikaTest { @Test public void testParser() throws Exception { - AutoDetectParser parser = new AutoDetectParser(); - ToXMLContentHandler handler = new ToXMLContentHandler(); - Metadata metadata = new Metadata(); - String path = "/test-documents/breidamerkurjokull_radar_profiles_2009.mat"; - - try (InputStream stream = MatParser.class.getResourceAsStream(path)) { - parser.parse(stream, handler, metadata, new ParseContext()); - } + XMLResult r = getXML("breidamerkurjokull_radar_profiles_2009.mat"); // Check Metadata - assertEquals("PCWIN64", metadata.get("platform")); - assertEquals("MATLAB 5.0 MAT-file", metadata.get("fileType")); - assertEquals("IM", metadata.get("endian")); - assertEquals("Thu Feb 21 15:52:49 2013", metadata.get("createdOn")); + assertEquals("PCWIN64", r.metadata.get("platform")); + assertEquals("MATLAB 5.0 MAT-file", r.metadata.get("fileType")); + assertEquals("IM", r.metadata.get("endian")); + assertEquals("Thu Feb 21 15:52:49 2013", r.metadata.get("createdOn")); // Check Content - String content = handler.toString(); - - assertContains("<li>[1x909 double array]</li>", content); - assertContains("<p>c1:[1x1 struct array]</p>", content); - assertContains("<li>[1024x1 double array]</li>", content); - assertContains("<p>b1:[1x1 struct array]</p>", content); - assertContains("<p>a1:[1x1 struct array]</p>", content); - assertContains("<li>[1024x1261 double array]</li>", content); - assertContains("<li>[1x1 double array]</li>", content); - assertContains("</body></html>", content); + assertContains("<li>[1x909 double array]</li>", r.xml); + assertContains("<p>c1:[1x1 struct array]</p>", r.xml); + assertContains("<li>[1024x1 double array]</li>", r.xml); + assertContains("<p>b1:[1x1 struct array]</p>", r.xml); + assertContains("<p>a1:[1x1 struct array]</p>", r.xml); + assertContains("<li>[1024x1261 double array]</li>", r.xml); + assertContains("<li>[1x1 double array]</li>", r.xml); + assertContains("</body></html>", r.xml); } @Test public void testParserForText() throws Exception { - Parser parser = new MatParser(); - ToXMLContentHandler handler = new ToXMLContentHandler(); - Metadata metadata = new Metadata(); - String path = "/test-documents/test_mat_text.mat"; - - try (InputStream stream = MatParser.class.getResourceAsStream(path)) { - parser.parse(stream, handler, metadata, new ParseContext()); - } - - // Check Content - String content = handler.toString(); - assertContains("<p>double:[2x2 double array]</p>", content); + XMLResult r = getXML("test_mat_text.mat", new MatParser()); + assertContains("<p>double:[2x2 double array]</p>", r.xml); } } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java index 3cc1df8..7d0f2e8 100644 --- a/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java +++ b/tika-parser-modules/tika-parser-scientific-module/src/test/java/org/apache/tika/parser/netcdf/NetCDFParserTest.java @@ -17,54 +17,42 @@ package org.apache.tika.parser.netcdf; //JDK imports -import java.io.InputStream; -//TIKA imports +import static org.junit.Assert.assertEquals; + +import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; -import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.Parser; -import org.apache.tika.sax.BodyContentHandler; import org.junit.Test; -import org.xml.sax.ContentHandler; -import static org.apache.tika.TikaTest.assertContains; -import static org.junit.Assert.assertEquals; +//TIKA imports /** * Test cases to exercise the {@link NetCDFParser}. */ -public class NetCDFParserTest { +public class NetCDFParserTest extends TikaTest { @Test public void testParseGlobalMetadata() throws Exception { - Parser parser = new NetCDFParser(); - ContentHandler handler = new BodyContentHandler(); - Metadata metadata = new Metadata(); - - try (InputStream stream = NetCDFParser.class - .getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc")) { - parser.parse(stream, handler, metadata, new ParseContext()); - } - assertEquals(metadata.get(TikaCoreProperties.TITLE), + XMLResult r = getXML("sresa1b_ncar_ccsm3_0_run1_200001.nc", new NetCDFParser()); + assertEquals(r.metadata.get(TikaCoreProperties.TITLE), "model output prepared for IPCC AR4"); - assertEquals(metadata.get(Metadata.CONTACT), "[email protected]"); - assertEquals(metadata.get(Metadata.PROJECT_ID), + assertEquals(r.metadata.get(Metadata.CONTACT), "[email protected]"); + assertEquals(r.metadata.get(Metadata.PROJECT_ID), "IPCC Fourth Assessment"); - assertEquals(metadata.get(Metadata.CONVENTIONS), "CF-1.0"); - assertEquals(metadata.get(Metadata.REALIZATION), "1"); - assertEquals(metadata.get(Metadata.EXPERIMENT_ID), + assertEquals(r.metadata.get(Metadata.CONVENTIONS), "CF-1.0"); + assertEquals(r.metadata.get(Metadata.REALIZATION), "1"); + assertEquals(r.metadata.get(Metadata.EXPERIMENT_ID), "720 ppm stabilization experiment (SRESA1B)"); - assertEquals(metadata.get("File-Type-Description"), + assertEquals(r.metadata.get("File-Type-Description"), "NetCDF-3/CDM"); - String content = handler.toString(); - assertContains("long_name = \"Surface area\"", content); - assertContains("float area(lat=128, lon=256)", content); - assertContains("float lat(lat=128)", content); - assertContains("double lat_bnds(lat=128, bnds=2)", content); - assertContains("double lon_bnds(lon=256, bnds=2)", content); + assertContains("long_name = \"Surface area\"", r.xml); + assertContains("float area(lat=128, lon=256)", r.xml); + assertContains("float lat(lat=128)", r.xml); + assertContains("double lat_bnds(lat=128, bnds=2)", r.xml); + assertContains("double lon_bnds(lon=256, bnds=2)", r.xml); http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java index 5f197d2..1c5b2db 100644 --- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java +++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/strings/StringsParserTest.java @@ -14,21 +14,19 @@ package org.apache.tika.parser.strings; import static org.apache.tika.parser.strings.StringsParser.getStringsProg; -import static org.junit.Assert.*; +import static org.junit.Assert.assertTrue; import static org.junit.Assume.assumeTrue; -import java.io.InputStream; import java.util.Arrays; +import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; import org.apache.tika.parser.Parser; import org.apache.tika.parser.external.ExternalParser; -import org.apache.tika.sax.BodyContentHandler; import org.junit.Test; -import org.xml.sax.ContentHandler; -public class StringsParserTest { +public class StringsParserTest extends TikaTest { public static boolean canRun() { StringsConfig config = new StringsConfig(); String[] checkCmd = {config.getStringsPath() + getStringsProg(), "--version"}; @@ -40,7 +38,7 @@ public class StringsParserTest { public void testParse() throws Exception { assumeTrue(canRun()); - String resource = "/test-documents/testOCTET_header.dbase3"; + String resource = "testOCTET_header.dbase3"; String[] content = { "CLASSNO", "TITLE", "ITEMNO", "LISTNO", "LISTDATE" }; @@ -50,22 +48,15 @@ public class StringsParserTest { FileConfig fileConfig = new FileConfig(); Parser parser = new StringsParser(); - ContentHandler handler = new BodyContentHandler(); - Metadata metadata = new Metadata(); - ParseContext context = new ParseContext(); context.set(StringsConfig.class, stringsConfig); context.set(FileConfig.class, fileConfig); - - try (InputStream stream = StringsParserTest.class.getResourceAsStream(resource)) { - parser.parse(stream, handler, metadata, context); - } catch (Exception e) { - e.printStackTrace(); - } + Metadata metadata = new Metadata(); + XMLResult r = getXML(resource, parser, metadata, context); // Content for (String word : content) { - assertTrue(handler.toString().contains(word)); + assertTrue(r.xml.contains(word)); } // Metadata http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java index 050ef15..9064597 100644 --- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java +++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java @@ -23,13 +23,14 @@ import java.io.IOException; import java.io.InputStream; import java.io.Reader; +import org.apache.tika.TikaTest; import org.junit.Test; -public class CharsetDetectorTest { +public class CharsetDetectorTest extends TikaTest { @Test public void testTagDropper() throws IOException { - try (InputStream in = CharsetDetectorTest.class.getResourceAsStream("/test-documents/resume.html")) { + try (InputStream in = getTestDocumentAsStream("resume.html")) { CharsetDetector detector = new CharsetDetector(); detector.enableInputFilter(true); detector.setText(in); @@ -52,7 +53,7 @@ public class CharsetDetectorTest { @Test public void testEmptyOrNullDeclaredCharset() throws IOException { - try (InputStream in = CharsetDetectorTest.class.getResourceAsStream("/test-documents/resume.html")) { + try (InputStream in = getTestDocumentAsStream("resume.html")) { CharsetDetector detector = new CharsetDetector(); Reader reader = detector.getReader(in, null); assertTrue(reader.ready()); http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java index 3de5eac..6d1c99a 100644 --- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java +++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/TXTParserTest.java @@ -18,13 +18,13 @@ package org.apache.tika.parser.txt; import static java.nio.charset.StandardCharsets.ISO_8859_1; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.apache.tika.TikaTest.assertContains; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNull; import java.io.ByteArrayInputStream; import java.io.StringWriter; +import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; import org.apache.tika.parser.ParseContext; @@ -35,7 +35,7 @@ import org.junit.Test; import org.xml.sax.ContentHandler; import org.xml.sax.helpers.DefaultHandler; -public class TXTParserTest { +public class TXTParserTest extends TikaTest { private Parser parser = new TXTParser(); @@ -233,39 +233,21 @@ public class TXTParserTest { @Test public void testCP866() throws Exception { - Metadata metadata = new Metadata(); - StringWriter writer = new StringWriter(); - parser.parse( - TXTParserTest.class.getResourceAsStream("/test-documents/russian.cp866.txt"), - new WriteOutContentHandler(writer), - metadata, - new ParseContext()); - - assertEquals("text/plain; charset=IBM866", metadata.get(Metadata.CONTENT_TYPE)); + XMLResult r = getXML("russian.cp866.txt", parser); + assertEquals("text/plain; charset=IBM866", r.metadata.get(Metadata.CONTENT_TYPE)); } @Test public void testEBCDIC_CP500() throws Exception { - Metadata metadata = new Metadata(); - StringWriter writer = new StringWriter(); - parser.parse( - TXTParserTest.class.getResourceAsStream("/test-documents/english.cp500.txt"), - new WriteOutContentHandler(writer), - metadata, - new ParseContext()); - - assertEquals("text/plain; charset=IBM500", metadata.get(Metadata.CONTENT_TYPE)); + XMLResult r = getXML("english.cp500.txt", parser); + assertEquals("text/plain; charset=IBM500", r.metadata.get(Metadata.CONTENT_TYPE)); // Additional check that it isn't too eager on short blocks of text - metadata = new Metadata(); - writer = new StringWriter(); - parser.parse( + r = getXML( new ByteArrayInputStream("<html><body>hello world</body></html>".getBytes(ISO_8859_1)), - new WriteOutContentHandler(writer), - metadata, - new ParseContext()); + parser, new Metadata()); - assertEquals("text/plain; charset=ISO-8859-1", metadata.get(Metadata.CONTENT_TYPE)); + assertEquals("text/plain; charset=ISO-8859-1", r.metadata.get(Metadata.CONTENT_TYPE)); } /** @@ -276,20 +258,17 @@ public class TXTParserTest { @Test public void testCharsetDetectionWithShortSnipet() throws Exception { final String text = "Hello, World!"; - - Metadata metadata = new Metadata(); - parser.parse( - new ByteArrayInputStream(text.getBytes(UTF_8)), - new BodyContentHandler(), metadata, new ParseContext()); - assertEquals("text/plain; charset=ISO-8859-1", metadata.get(Metadata.CONTENT_TYPE)); + XMLResult r = getXML( + new ByteArrayInputStream(text.getBytes(UTF_8)), parser, new Metadata()); + assertEquals("text/plain; charset=ISO-8859-1", r.metadata.get(Metadata.CONTENT_TYPE)); // Now verify that if we tell the parser the encoding is UTF-8, that's what // we get back (see TIKA-868) - metadata.set(Metadata.CONTENT_TYPE, "application/binary; charset=UTF-8"); + r.metadata.set(Metadata.CONTENT_TYPE, "application/binary; charset=UTF-8"); parser.parse( new ByteArrayInputStream(text.getBytes(UTF_8)), - new BodyContentHandler(), metadata, new ParseContext()); - assertEquals("text/plain; charset=UTF-8", metadata.get(Metadata.CONTENT_TYPE)); + new BodyContentHandler(), r.metadata, new ParseContext()); + assertEquals("text/plain; charset=UTF-8", r.metadata.get(Metadata.CONTENT_TYPE)); } } http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java index 22094f4..665151d 100644 --- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java +++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/DcXMLParserTest.java @@ -20,26 +20,17 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; -import java.io.InputStream; - import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.TikaCoreProperties; -import org.apache.tika.sax.BodyContentHandler; import org.junit.Test; -import org.xml.sax.ContentHandler; -import org.xml.sax.helpers.DefaultHandler; public class DcXMLParserTest extends TikaTest { @Test public void testXMLParserAsciiChars() throws Exception { - try (InputStream input = DcXMLParserTest.class.getResourceAsStream( - "/test-documents/testXML.xml")) { - Metadata metadata = new Metadata(); - ContentHandler handler = new BodyContentHandler(); - new DcXMLParser().parse(input, handler, metadata); - + XMLResult result = getXML("testXML.xml", new DcXMLParser()); + Metadata metadata = result.metadata; assertEquals( "application/xml", metadata.get(Metadata.CONTENT_TYPE)); @@ -74,22 +65,17 @@ public class DcXMLParserTest extends TikaTest { assertEquals("Fr", metadata.get(TikaCoreProperties.LANGUAGE)); assertTrue(metadata.get(TikaCoreProperties.RIGHTS).contains("testing chars")); - String content = handler.toString(); - assertContains("Tika test document", content); + assertContains("Tika test document", result.xml); assertEquals("2000-12-01T00:00:00.000Z", metadata.get(TikaCoreProperties.CREATED)); - } + } @Test public void testXMLParserNonAsciiChars() throws Exception { - try (InputStream input = DcXMLParserTest.class.getResourceAsStream("/test-documents/testXML.xml")) { - Metadata metadata = new Metadata(); - new DcXMLParser().parse(input, new DefaultHandler(), metadata); - - final String expected = "Archim\u00E8de et Lius \u00E0 Ch\u00E2teauneuf testing chars en \u00E9t\u00E9"; - assertEquals(expected, metadata.get(TikaCoreProperties.RIGHTS)); - } + XMLResult r = getXML("testXML.xml", new DcXMLParser()); + final String expected = "Archim\u00E8de et Lius \u00E0 Ch\u00E2teauneuf testing chars en \u00E9t\u00E9"; + assertEquals(expected, r.metadata.get(TikaCoreProperties.RIGHTS)); } // TIKA-1048 http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java index 20227a6..536f9d7 100644 --- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java +++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/EmptyAndDuplicateElementsXMLParserTest.java @@ -18,13 +18,10 @@ package org.apache.tika.parser.xml; import static org.junit.Assert.assertEquals; -import java.io.InputStream; - import org.apache.tika.TikaTest; import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Property; import org.apache.tika.parser.ParseContext; -import org.apache.tika.sax.BodyContentHandler; import org.apache.tika.sax.TeeContentHandler; import org.junit.Test; import org.xml.sax.ContentHandler; @@ -38,52 +35,45 @@ public class EmptyAndDuplicateElementsXMLParserTest extends TikaTest { @Test public void testDefaultBehavior() throws Exception { - try (InputStream input = EmptyAndDuplicateElementsXMLParserTest.class.getResourceAsStream( - "/test-documents/testXML3.xml")) { - Metadata metadata = new Metadata(); - ContentHandler handler = new BodyContentHandler(); - new DefaultCustomXMLTestParser().parse(input, handler, metadata, new ParseContext()); + XMLResult r = getXML("testXML3.xml", new DefaultCustomXMLTestParser()); + Metadata metadata = r.metadata; - assertEquals(4, metadata.getValues(FIRST_NAME).length); - assertEquals(2, metadata.getValues(LAST_NAME).length); + assertEquals(4, metadata.getValues(FIRST_NAME).length); + assertEquals(2, metadata.getValues(LAST_NAME).length); - assertEquals("John", metadata.getValues(FIRST_NAME)[0]); - assertEquals("Smith", metadata.getValues(LAST_NAME)[0]); + assertEquals("John", metadata.getValues(FIRST_NAME)[0]); + assertEquals("Smith", metadata.getValues(LAST_NAME)[0]); - assertEquals("Jane", metadata.getValues(FIRST_NAME)[1]); - assertEquals("Doe", metadata.getValues(LAST_NAME)[1]); + assertEquals("Jane", metadata.getValues(FIRST_NAME)[1]); + assertEquals("Doe", metadata.getValues(LAST_NAME)[1]); - // We didn't know Bob's last name, but now we don't know an entry existed - assertEquals("Bob", metadata.getValues(FIRST_NAME)[2]); + // We didn't know Bob's last name, but now we don't know an entry existed + assertEquals("Bob", metadata.getValues(FIRST_NAME)[2]); - // We don't know Kate's last name because it was a duplicate - assertEquals("Kate", metadata.getValues(FIRST_NAME)[3]); - } + // We don't know Kate's last name because it was a duplicate + assertEquals("Kate", metadata.getValues(FIRST_NAME)[3]); } @Test public void testEmptiesAndRepeats() throws Exception { - try (InputStream input = EmptyAndDuplicateElementsXMLParserTest.class.getResourceAsStream( - "/test-documents/testXML3.xml")) { - Metadata metadata = new Metadata(); - ContentHandler handler = new BodyContentHandler(); - new AllowEmptiesAndDuplicatesCustomXMLTestParser().parse(input, handler, metadata, new ParseContext()); + XMLResult r = getXML("testXML3.xml", new AllowEmptiesAndDuplicatesCustomXMLTestParser()); + Metadata metadata = r.metadata; - assertEquals(4, metadata.getValues(FIRST_NAME).length); - assertEquals(4, metadata.getValues(LAST_NAME).length); + assertEquals(4, metadata.getValues(FIRST_NAME).length); + assertEquals(4, metadata.getValues(LAST_NAME).length); - assertEquals("John", metadata.getValues(FIRST_NAME)[0]); - assertEquals("Smith", metadata.getValues(LAST_NAME)[0]); + assertEquals("John", metadata.getValues(FIRST_NAME)[0]); + assertEquals("Smith", metadata.getValues(LAST_NAME)[0]); - assertEquals("Jane", metadata.getValues(FIRST_NAME)[1]); - assertEquals("Doe", metadata.getValues(LAST_NAME)[1]); + assertEquals("Jane", metadata.getValues(FIRST_NAME)[1]); + assertEquals("Doe", metadata.getValues(LAST_NAME)[1]); - assertEquals("Bob", metadata.getValues(FIRST_NAME)[2]); - assertEquals("", metadata.getValues(LAST_NAME)[2]); + assertEquals("Bob", metadata.getValues(FIRST_NAME)[2]); + assertEquals("", metadata.getValues(LAST_NAME)[2]); + + assertEquals("Kate", metadata.getValues(FIRST_NAME)[3]); + assertEquals("Smith", metadata.getValues(LAST_NAME)[3]); - assertEquals("Kate", metadata.getValues(FIRST_NAME)[3]); - assertEquals("Smith", metadata.getValues(LAST_NAME)[3]); - } } private class DefaultCustomXMLTestParser extends XMLParser { http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java ---------------------------------------------------------------------- diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java index 62454fa..aee7307 100644 --- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java +++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/xml/FictionBookParserTest.java @@ -16,38 +16,29 @@ */ package org.apache.tika.parser.xml; -import static org.apache.tika.TikaTest.assertContains; import static org.junit.Assert.assertEquals; import java.io.InputStream; -import org.apache.tika.TikaTest.TrackingHandler; +import org.apache.tika.TikaTest; import org.apache.tika.extractor.ContainerExtractor; import org.apache.tika.extractor.ParserContainerExtractor; import org.apache.tika.io.TikaInputStream; import org.apache.tika.metadata.Metadata; import org.apache.tika.parser.ParseContext; -import org.apache.tika.sax.BodyContentHandler; import org.junit.Test; -import org.xml.sax.ContentHandler; -public class FictionBookParserTest { +public class FictionBookParserTest extends TikaTest { @Test public void testFB2() throws Exception { - try (InputStream input = FictionBookParserTest.class.getResourceAsStream("/test-documents/test.fb2")) { - Metadata metadata = new Metadata(); - ContentHandler handler = new BodyContentHandler(); - new FictionBookParser().parse(input, handler, metadata, new ParseContext()); - String content = handler.toString(); - - assertContains("1812", content); - } + XMLResult r = getXML("test.fb2", new FictionBookParser(), new Metadata(), new ParseContext()); + assertContains("1812", r.xml); } @Test public void testEmbedded() throws Exception { - try (InputStream input = FictionBookParserTest.class.getResourceAsStream("/test-documents/test.fb2")) { + try (InputStream input = getTestDocumentAsStream("test.fb2")) { ContainerExtractor extractor = new ParserContainerExtractor(); TikaInputStream stream = TikaInputStream.get(input); http://git-wip-us.apache.org/repos/asf/tika/blob/aa5f60d7/tika-parsers/pom.xml ---------------------------------------------------------------------- diff --git a/tika-parsers/pom.xml b/tika-parsers/pom.xml deleted file mode 100644 index 67207d2..0000000 --- a/tika-parsers/pom.xml +++ /dev/null @@ -1,333 +0,0 @@ -<?xml version="1.0" encoding="UTF-8"?> - -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - <modelVersion>4.0.0</modelVersion> - - <parent> - <groupId>org.apache.tika</groupId> - <artifactId>tika-parent</artifactId> - <version>2.0-SNAPSHOT</version> - <relativePath>../tika-parent/pom.xml</relativePath> - </parent> - - <artifactId>tika-parsers</artifactId> - <packaging>bundle</packaging> - <name>Apache Tika parsers</name> - <url>http://tika.apache.org/</url> - - <properties> - <vorbis.version>0.6</vorbis.version> - </properties> - - <dependencies> - <!-- Optional OSGi dependency, used only when running within OSGi --> - <dependency> - <groupId>org.osgi</groupId> - <artifactId>org.osgi.core</artifactId> - <version>4.0.0</version> - <scope>provided</scope> - <optional>true</optional> - </dependency> - - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-core</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-multimedia-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-advanced-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-cad-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-code-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-crypto-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-database-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-ebook-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-journal-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-office-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-package-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-pdf-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-scientific-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-text-module</artifactId> - <version>${project.version}</version> - </dependency> - <dependency> - <groupId>${project.groupId}</groupId> - <artifactId>tika-parser-web-module</artifactId> - <version>${project.version}</version> - </dependency> - - <!-- Optional OSGi dependencies, used only when running within OSGi --> - <dependency> - <groupId>org.apache.felix</groupId> - <artifactId>org.apache.felix.scr.annotations</artifactId> - <scope>provided</scope> - </dependency> - - <!-- Externally Maintained Parsers --> - <dependency> - <groupId>org.gagravarr</groupId> - <artifactId>vorbis-java-tika</artifactId> - <version>${vorbis.version}</version> - </dependency> - <dependency> - <groupId>org.gagravarr</groupId> - <artifactId>vorbis-java-core</artifactId> - <version>${vorbis.version}</version> - </dependency> - - <!-- Test dependencies --> - <dependency> - <groupId>junit</groupId> - <artifactId>junit</artifactId> - </dependency> - <dependency> - <groupId>org.slf4j</groupId> - <artifactId>slf4j-log4j12</artifactId> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.tika</groupId> - <artifactId>tika-core</artifactId> - <version>${project.version}</version> - <type>test-jar</type> - <scope>test</scope> - </dependency> - <dependency> - <groupId>org.apache.tika</groupId> - <artifactId>tika-test-resources</artifactId> - <version>${project.version}</version> - <type>test-jar</type> - <scope>test</scope> - </dependency> - - </dependencies> - - <build> - <plugins> - <plugin> - <groupId>org.apache.felix</groupId> - <artifactId>maven-bundle-plugin</artifactId> - <extensions>true</extensions> - <configuration> - <instructions> - <Bundle-DocURL>${project.url}</Bundle-DocURL> - <Bundle-Activator> - org.apache.tika.parser.internal.Activator - </Bundle-Activator> - <Import-Package> - org.w3c.dom, - org.apache.tika.*, - *;resolution:=optional - </Import-Package> - </instructions> - </configuration> - </plugin> - <plugin> - <groupId>org.apache.rat</groupId> - <artifactId>apache-rat-plugin</artifactId> - </plugin> - - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-dependency-plugin</artifactId> - <version>2.10</version> - <executions> - <execution> - <id>unpack</id> - <phase>compile</phase> - <goals> - <goal>unpack</goal> - </goals> - <configuration> - <artifactItems> - <artifactItem> - <groupId>${project.groupId}</groupId> - <artifactId>tika-test-resources</artifactId> - <version>${project.version}</version> - <type>test-jar</type> - <overWrite>true</overWrite> - <outputDirectory>${project.build.testOutputDirectory}</outputDirectory> - </artifactItem> - </artifactItems> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-shade-plugin</artifactId> - <version>2.4.2</version> - <executions> - <execution> - <phase>package</phase> - <goals> - <goal>shade</goal> - </goals> - <configuration> - <createDependencyReducedPom> - false - </createDependencyReducedPom> - <artifactSet> - <includes> - <include>org.apache.tika:tika-parser-multimedia-module</include> - <include>org.apache.tika:tika-parser-advanced-module</include> - <include>org.apache.tika:tika-parser-cad-module</include> - <include>org.apache.tika:tika-parser-code-module</include> - <include>org.apache.tika:tika-parser-crypto-module</include> - <include>org.apache.tika:tika-parser-database-module</include> - <include>org.apache.tika:tika-parser-ebook-module</include> - <include>org.apache.tika:tika-parser-journal-module</include> - <include>org.apache.tika:tika-parser-office-module</include> - <include>org.apache.tika:tika-parser-package-module</include> - <include>org.apache.tika:tika-parser-pdf-module</include> - <include>org.apache.tika:tika-parser-scientific-module</include> - <include>org.apache.tika:tika-parser-text-module</include> - <include>org.apache.tika:tika-parser-web-module</include> - </includes> - </artifactSet> - <transformers> - <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> - <resource>META-INF/services/org.apache.tika.detect.Detector</resource> - </transformer> - <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> - <resource>META-INF/services/org.apache.tika.detect.EncodingDetector</resource> - </transformer> - <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer"> - <resource>META-INF/services/org.apache.tika.parser.Parser</resource> - </transformer> - </transformers> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-jar-plugin</artifactId> - <executions> - <execution> - <goals> - <goal>test-jar</goal> - </goals> - </execution> - </executions> - </plugin> - </plugins> - - <pluginManagement> - <plugins> - <!-- This plugin's configuration is used to store Eclipse m2e --> - <!-- settings only. It has no influence on the Maven build itself. --> - <plugin> - <groupId>org.eclipse.m2e</groupId> - <artifactId>lifecycle-mapping</artifactId> - <version>1.0.0</version> - <configuration> - <lifecycleMappingMetadata> - <pluginExecutions> - <pluginExecution> - <pluginExecutionFilter> - <groupId>org.apache.felix</groupId> - <artifactId>maven-scr-plugin</artifactId> - <versionRange>[1.7.2,)</versionRange> - <goals> - <goal>scr</goal> - </goals> - </pluginExecutionFilter> - <action> - <execute /> - </action> - </pluginExecution> - </pluginExecutions> - </lifecycleMappingMetadata> - </configuration> - </plugin> - </plugins> - </pluginManagement> - </build> - - <organization> - <name>The Apache Software Foundation</name> - <url>http://www.apache.org</url> - </organization> - <scm> - <url>http://svn.apache.org/viewvc/tika/trunk/tika-parsers</url> - <connection>scm:svn:http://svn.apache.org/repos/asf/tika/trunk/tika-parsers</connection> - <developerConnection>scm:svn:https://svn.apache.org/repos/asf/tika/trunk/tika-parsers</developerConnection> - </scm> - <issueManagement> - <system>JIRA</system> - <url>https://issues.apache.org/jira/browse/TIKA</url> - </issueManagement> - <ciManagement> - <system>Jenkins</system> - <url>https://builds.apache.org/job/Tika-trunk/</url> - </ciManagement> -</project>
