http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java index 8886e31..ed300af 100644 --- a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java @@ -21,11 +21,9 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.html.AbstractExtractorTestCase; import org.apache.any23.vocab.CSV; import org.junit.Test; -import org.eclipse.rdf4j.model.impl.LiteralImpl; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.model.vocabulary.RDF; import org.eclipse.rdf4j.model.vocabulary.XMLSchema; -import org.eclipse.rdf4j.repository.RepositoryException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -36,93 +34,93 @@ import org.slf4j.LoggerFactory; */ public class CSVExtractorTest extends AbstractExtractorTestCase { - private static final Logger logger = LoggerFactory - .getLogger(CSVExtractorTest.class); - - @Override - protected ExtractorFactory<?> getExtractorFactory() { - return new CSVExtractorFactory(); - } - - @Test - public void testExtractionCommaSeparated() throws Exception { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-comma.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 28); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", - XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", - XMLSchema.INTEGER)); - } - - @Test - public void testExtractionSemicolonSeparated() throws Exception { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 28); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", - XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", - XMLSchema.INTEGER)); - } - - @Test - public void testExtractionTabSeparated() throws Exception { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-tab.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 28); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", - XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", - XMLSchema.INTEGER)); - } - - @Test - public void testTypeManagement() throws Exception { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-type.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 21); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2", - XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", - XMLSchema.INTEGER)); - assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XMLSchema.FLOAT)); - assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XMLSchema.FLOAT)); - assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XMLSchema.INTEGER)); - } - - @Test - public void testExtractionEmptyValue() throws Exception { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-missing.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 25); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", - XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", - XMLSchema.INTEGER)); - assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XMLSchema.STRING)); - assertContains(null, null, - SimpleValueFactory.getInstance().createLiteral("Giovanni", XMLSchema.STRING)); - } + private static final Logger logger = LoggerFactory + .getLogger(CSVExtractorTest.class); + + @Override + protected ExtractorFactory<?> getExtractorFactory() { + return new CSVExtractorFactory(); + } + + @Test + public void testExtractionCommaSeparated() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-comma.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 28); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", + XMLSchema.INTEGER)); + } + + @Test + public void testExtractionSemicolonSeparated() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 28); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", + XMLSchema.INTEGER)); + } + + @Test + public void testExtractionTabSeparated() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-tab.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 28); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", + XMLSchema.INTEGER)); + } + + @Test + public void testTypeManagement() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-type.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 21); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", + XMLSchema.INTEGER)); + assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XMLSchema.FLOAT)); + assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XMLSchema.FLOAT)); + assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XMLSchema.INTEGER)); + } + + @Test + public void testExtractionEmptyValue() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-missing.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 25); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3", + XMLSchema.INTEGER)); + assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XMLSchema.STRING)); + assertContains(null, null, + SimpleValueFactory.getInstance().createLiteral("Giovanni", XMLSchema.STRING)); + } }
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java index 855a88c..5354924 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java +++ b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java @@ -31,6 +31,7 @@ import org.apache.any23.writer.RepositoryWriter; import org.junit.After; import org.junit.Assert; import org.junit.Before; +import org.eclipse.rdf4j.common.iteration.Iterations; import org.eclipse.rdf4j.model.BNode; import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.Resource; @@ -62,802 +63,799 @@ import java.util.Map; */ public abstract class AbstractExtractorTestCase extends AbstractAny23TestBase { - /** - * Base test document. - */ - protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/"); // TODO: - // change - // base - // IRI - // string. - - /** - * Internal connection used to collect extraction results. - */ - protected RepositoryConnection conn; - - /** - * The latest generated report. - */ - private SingleDocumentExtractionReport report; - - private Sail store; - - private SailRepository repository; - - /** - * Constructor. - */ - public AbstractExtractorTestCase() { - super(); - } - - /** - * @return the factory of the extractor to be tested. - */ - protected abstract ExtractorFactory<?> getExtractorFactory(); - - /** - * Test case initialization. - * - * @throws Exception - */ - @Before - public void setUp() throws Exception { - super.setUp(); - store = new MemoryStore(); - repository = new SailRepository(store); - repository.initialize(); - conn = repository.getConnection(); - } - - /** - * Test case resources release. - * - * @throws RepositoryException - */ - @After - public void tearDown() throws RepositoryException { - try { - conn.close(); - } finally { - repository.shutDown(); - } - conn = null; - report = null; - store = null; - repository = null; - } - - /** - * @return the connection to the memory repository. - */ - protected RepositoryConnection getConnection() { - return conn; - } - - /** - * @return the last generated report. - */ - protected SingleDocumentExtractionReport getReport() { - return report; - } - - /** - * Returns the list of issues raised by a given extractor. - * - * @param extractorName - * name of the extractor. - * @return collection of issues. - */ - protected Collection<IssueReport.Issue> getIssues(String extractorName) { - for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report - .getExtractorToIssues().entrySet()) { - if (issueEntry.getKey().equals(extractorName)) { - return issueEntry.getValue(); - } - } - return Collections.emptyList(); - } - - /** - * Returns the list of issues raised by the extractor under testing. - * - * @return collection of issues. - */ - protected Collection<IssueReport.Issue> getIssues() { - return getIssues(getExtractorFactory().getExtractorName()); - } - - /** - * Applies the extractor provided by the {@link #getExtractorFactory()} to - * the specified resource. - * - * @param resource - * resource name. - * @throws org.apache.any23.extractor.ExtractionException - * @throws IOException - */ - // TODO: MimeType detector to null forces the execution of all extractors, - // but extraction - // tests should be based on mimetype detection. - protected void extract(String resource) throws ExtractionException, - IOException { - SingleDocumentExtraction ex = new SingleDocumentExtraction( - new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI - .toString()), getExtractorFactory(), - new RepositoryWriter(conn)); - ex.setMIMETypeDetector(null); - report = ex.run(); - } - - /** - * Performs data extraction over the content of a resource and assert that - * the extraction was fine. - * - * @param resource - * resource name. - * @param assertNoIssues - * if <code>true</code>invokes {@link #assertNoIssues()} after - * the extraction. - */ - protected void assertExtract(String resource, boolean assertNoIssues) { - try { - extract(resource); - if (assertNoIssues) - assertNoIssues(); - } catch (ExtractionException ex) { - throw new RuntimeException(ex); - } catch (IOException ex) { - throw new RuntimeException(ex); - } - } - - /** - * Performs data extraction over the content of a resource and assert that - * the extraction was fine and raised no issues. - * - * @param resource - */ - protected void assertExtract(String resource) { - assertExtract(resource, true); - } - - /** - * Asserts that the extracted triples contain the pattern - * <code>(_ p o)</code>. - * - * @param p - * predicate - * @param o - * object. - * @throws RepositoryException - */ - protected void assertContains(IRI p, Resource o) throws RepositoryException { - assertContains(null, p, o); - } - - /** - * Asserts that the extracted triples contain the pattern - * <code>(_ p o)</code>. - * - * @param p - * predicate - * @param o - * object. - * @throws RepositoryException - */ - protected void assertContains(IRI p, String o) throws RepositoryException { - assertContains(null, p, RDFUtils.literal(o)); - } - - /** - * Asserts that the extracted triples contain the pattern - * <code>(_ p o)</code>. - * - * @param p - * predicate - * @param o - * object. - * @throws RepositoryException - */ - protected void assertNotContains(IRI p, Resource o) - throws RepositoryException { - assertNotContains(null, p, o); - } - - /** - * Asserts that the extracted triples contain the pattern - * <code>(s p o)</code>. - * - * @param s - * subject. - * @param p - * predicate. - * @param o - * object. - * @throws RepositoryException - */ - protected void assertContains(Resource s, IRI p, Value o) - throws RepositoryException { - Assert.assertTrue( - getFailedExtractionMessage() - + String.format("Cannot find triple (%s %s %s)", s, p, - o), conn.hasStatement(s, p, o, false)); - } - - /** - * Asserts that the extracted triples contain the pattern - * <code>(s p o)</code>. - * - * @param s - * subject. - * @param p - * predicate. - * @param o - * object. - * @throws RepositoryException - */ - protected void assertNotContains(Resource s, IRI p, String o) - throws RepositoryException { - Assert.assertFalse(getFailedExtractionMessage(), - conn.hasStatement(s, p, RDFUtils.literal(o), false)); - } - - /** - * Asserts that the extracted triples contain the pattern - * <code>(s p o)</code>. - * - * @param s - * subject. - * @param p - * predicate. - * @param o - * object. - * @throws RepositoryException - */ - protected void assertNotContains(Resource s, IRI p, Resource o) - throws RepositoryException { - Assert.assertFalse(getFailedExtractionMessage(), - conn.hasStatement(s, p, o, false)); - } - - /** - * Asserts that the model contains at least a statement. - * - * @throws RepositoryException - */ - protected void assertModelNotEmpty() throws RepositoryException { - Assert.assertFalse("The model is expected to not be empty." - + getFailedExtractionMessage(), conn.isEmpty()); - } - - /** - * Asserts that the model doesn't contain the pattern <code>(s p o)</code> - * - * @param s - * subject. - * @param p - * predicate. - * @param o - * object. - * @throws RepositoryException - */ - protected void assertNotContains(Resource s, IRI p, Literal o) - throws RepositoryException { - Assert.assertFalse(getFailedExtractionMessage(), - conn.hasStatement(s, p, o, false)); - } - - /** - * Asserts that the model is expected to contains no statements. - * - * @throws RepositoryException - */ - protected void assertModelEmpty() throws RepositoryException { - Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty()); - } - - /** - * Asserts that the extraction generated no issues. - */ - protected void assertNoIssues() { - for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report - .getExtractorToIssues().entrySet()) { - if (entry.getValue().size() > 0) { - System.out.println("Unexpected issue for extractor " + entry.getKey() - + " : " + entry.getValue()); - } - for(Issue nextIssue : entry.getValue()) { - if(nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) { - Assert.fail("Unexpected issue for extractor " + entry.getKey() - + " : " + entry.getValue()); - } - } - } - } - - /** - * Asserts that an issue has been produced by the processed - * {@link org.apache.any23.extractor.Extractor}. - * - * @param level - * expected issue level - * @param issueRegex - * regex matching the expected human readable issue message. - */ - protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) { - final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory() - .getExtractorName()); - boolean found = false; - for (IssueReport.Issue issue : issues) { - if (issue.getLevel() == level - && issue.getMessage().matches(issueRegex)) { - found = true; - break; - } - } - Assert.assertTrue(String.format( - "Cannot find issue with level %s matching expression '%s'", - level, issueRegex), found); - } - - /** - * Verifies that the current model contains all the given statements. - * - * @param statements - * list of statements to be verified. - * @throws RepositoryException - */ - public void assertContainsModel(Statement[] statements) - throws RepositoryException { - for (Statement statement : statements) { - assertContains(statement); - } - } - - /** - * Verifies that the current model contains all the statements declared in - * the specified <code>modelFile</code>. - * - * @param modelResource - * the resource containing the model. - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - * @throws RepositoryException - */ - public void assertContainsModel(String modelResource) - throws RDFHandlerException, IOException, RDFParseException, - RepositoryException { - getConnection().remove(null, SINDICE.getInstance().date, (Value) null, - (Resource) null); - getConnection().remove(null, SINDICE.getInstance().size, (Value) null, - (Resource) null); - assertContainsModel(RDFUtils.parseRDF(modelResource)); - } - - /** - * Asserts that the given pattern <code>(s p o)</code> satisfies the - * expected number of statements. - * - * @param s - * subject. - * @param p - * predicate. - * @param o - * object. - * @param expected - * expected matches. - * @throws RepositoryException - */ - protected void assertStatementsSize(Resource s, IRI p, Value o, int expected) - throws RDFHandlerException, RepositoryException { - int statementsSize = getStatementsSize(s, p, o); - if (statementsSize != expected) { - getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, System.out)); - } - - Assert.assertEquals("Unexpected number of matching statements.", - expected, statementsSize); - } - - /** - * Asserts that the given pattern <code>(_ p o)</code> satisfies the - * expected number of statements. - * - * @param p - * predicate. - * @param o - * object. - * @param expected - * expected matches. - * @throws RepositoryException - */ - protected void assertStatementsSize(IRI p, Value o, int expected) - throws RDFHandlerException, RepositoryException { - assertStatementsSize(null, p, o, expected); - } - - /** - * Asserts that the given pattern <code>(_ p o)</code> satisfies the - * expected number of statements. - * - * @param p - * predicate. - * @param o - * object. - * @param expected - * expected matches. - * @throws RepositoryException - */ - protected void assertStatementsSize(IRI p, String o, int expected) - throws RDFHandlerException, RepositoryException { - assertStatementsSize(p, o == null ? null : RDFUtils.literal(o), - expected); - } - - /** - * Asserts that the given pattern <code>(s p _)</code> is not present. - * - * @param s - * subject. - * @param p - * predicate. - * @throws RepositoryException - */ - protected void assertNotFound(Resource s, IRI p) throws RepositoryException { - RepositoryResult<Statement> statements = conn.getStatements(s, p, null, - true); - try { - Assert.assertFalse("Expected no statements.", statements.hasNext()); - } finally { - statements.close(); - } - } - - /** - * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it - * is expected to exists and be just one. - * - * @param p - * predicate. - * @param o - * object. - * @return the matching blank subject. - * @throws RepositoryException - */ - protected Resource findExactlyOneBlankSubject(IRI p, Value o) - throws RepositoryException { - RepositoryResult<Statement> it = conn.getStatements(null, p, o, false); - try { - Assert.assertTrue(getFailedExtractionMessage(), it.hasNext()); - Statement stmt = it.next(); - Resource result = stmt.getSubject(); - Assert.assertTrue(getFailedExtractionMessage(), - result instanceof BNode); - Assert.assertFalse(getFailedExtractionMessage(), it.hasNext()); - return result; - } finally { - it.close(); - } - } - - /** - * Returns the object matching the pattern <code>(s p o)</code>, it is - * expected to exists and be just one. - * - * @param s - * subject. - * @param p - * predicate. - * @return the matching object. - * @throws RepositoryException - */ - protected Value findExactlyOneObject(Resource s, IRI p) - throws RepositoryException { - RepositoryResult<Statement> it = conn.getStatements(s, p, null, false); - try { - Assert.assertTrue(getFailedExtractionMessage(), it.hasNext()); - return it.next().getObject(); - } finally { - it.close(); - } - } - - /** - * Returns all the subjects matching the pattern <code>(s? p o)</code>. - * - * @param p - * predicate. - * @param o - * object. - * @return list of matching subjects. - * @throws RepositoryException - */ - protected List<Resource> findSubjects(IRI p, Value o) - throws RepositoryException { - RepositoryResult<Statement> it = conn.getStatements(null, p, o, false); - List<Resource> subjects = new ArrayList<Resource>(); - try { - Statement statement; - while (it.hasNext()) { - statement = it.next(); - subjects.add(statement.getSubject()); - } - } finally { - it.close(); - } - return subjects; - } - - /** - * Returns all the objects matching the pattern <code>(s p _)</code>. - * - * @param s - * predicate. - * @param p - * predicate. - * @return list of matching objects. - * @throws RepositoryException - */ - protected List<Value> findObjects(Resource s, IRI p) - throws RepositoryException { - RepositoryResult<Statement> it = conn.getStatements(s, p, null, false); - List<Value> objects = new ArrayList<Value>(); - try { - Statement statement; - while (it.hasNext()) { - statement = it.next(); - objects.add(statement.getObject()); - } - } finally { - it.close(); - } - return objects; - } - - /** - * Finds the object matching the pattern <code>(s p _)</code>, asserts to - * find exactly one result. - * - * @param s - * subject. - * @param p - * predicate - * @return matching object. - * @throws org.openrdf.repository.RepositoryException - */ - protected Value findObject(Resource s, IRI p) throws RepositoryException { - RepositoryResult<Statement> statements = conn.getStatements(s, p, null, - true); - try { - Assert.assertTrue("Expected at least a statement.", - statements.hasNext()); - return (statements.next().getObject()); - } finally { - statements.close(); - } - } - - /** - * Finds the resource object matching the pattern <code>(s p _)</code>, - * asserts to find exactly one result. - * - * @param s - * subject. - * @param p - * predicate. - * @return matching object. - * @throws RepositoryException - */ - protected Resource findObjectAsResource(Resource s, IRI p) - throws RepositoryException { - final Value v = findObject(s, p); - try { - return (Resource) v; - } catch (ClassCastException cce) { - Assert.fail("Expected resource object, found: " - + v.getClass().getSimpleName()); - throw new IllegalStateException(); - } - } - - /** - * Finds the literal object matching the pattern <code>(s p _)</code>, - * asserts to find exactly one result. - * - * @param s - * subject. - * @param p - * predicate. - * @return matching object. - * @throws RepositoryException - */ - protected String findObjectAsLiteral(Resource s, IRI p) - throws RepositoryException { - return findObject(s, p).stringValue(); - } - - /** - * Dumps the extracted model in <i>Turtle</i> format. - * - * @return a string containing the model in Turtle. - * @throws RepositoryException - */ - protected String dumpModelToTurtle() throws RepositoryException { - StringWriter w = new StringWriter(); - try { - conn.export(Rio.createWriter(RDFFormat.TURTLE, w)); - return w.toString(); - } catch (RDFHandlerException ex) { - throw new RuntimeException(ex); - } - } - - /** - * Dumps the extracted model in <i>NQuads</i> format. - * - * @return a string containing the model in NQuads. - * @throws RepositoryException - */ - protected String dumpModelToNQuads() throws RepositoryException { - StringWriter w = new StringWriter(); - try { - conn.export(Rio.createWriter(RDFFormat.NQUADS, w)); - return w.toString(); - } catch (RDFHandlerException ex) { - throw new RuntimeException(ex); - } - } - - /** - * Dumps the extracted model in <i>RDFXML</i> format. - * - * @return a string containing the model in RDFXML. - * @throws RepositoryException - */ - protected String dumpModelToRDFXML() throws RepositoryException { - StringWriter w = new StringWriter(); - try { - conn.export(Rio.createWriter(RDFFormat.RDFXML, w)); - return w.toString(); - } catch (RDFHandlerException ex) { - throw new RuntimeException(ex); - } - } - - /** - * Dumps the list of statements contained in the extracted model. - * - * @return list of extracted statements. - * @throws RepositoryException - */ - protected List<Statement> dumpAsListOfStatements() - throws RepositoryException { - return conn.getStatements(null, null, null, false).asList(); - } - - /** - * @return string containing human readable statements. - * @throws RepositoryException - */ - protected String dumpHumanReadableTriples() throws RepositoryException { - StringBuilder sb = new StringBuilder(); - RepositoryResult<Statement> result = conn.getStatements(null, null, - null, false); - while (result.hasNext()) { - Statement statement = result.next(); - sb.append(String.format("%s %s %s %s\n", statement.getSubject(), - statement.getPredicate(), statement.getObject(), - statement.getContext())); - - } - return sb.toString(); - } - - /** - * Checks that a statement is contained in the extracted model. If the - * statement declares bnodes, they are replaced with <code>_</code> - * patterns. - * - * @param statement - * @throws RepositoryException - */ - // TODO: bnode check is too weak, introduce graph omomorphism check. - protected void assertContains(Statement statement) - throws RepositoryException { - Assert.assertTrue("Cannot find statement " + statement + " in model.", - conn.hasStatement( - statement.getSubject() instanceof BNode ? null - : statement.getSubject(), statement - .getPredicate(), - statement.getObject() instanceof BNode ? null - : statement.getObject(), false)); - } - - /** - * Assert that the model contains the statement <code>(s p l)</code> where - * <code>l</code> is a literal. - * - * @param s - * subject. - * @param p - * predicate. - * @param l - * literal content. - * @throws RepositoryException - */ - protected void assertContains(Resource s, IRI p, String l) - throws RepositoryException { - assertContains(s, p, RDFUtils.literal(l)); - } - - /** - * Assert that the model contains the statement <code>(s p l)</code> where - * <code>l</code> is a language literal. - * - * @param s - * subject. - * @param p - * predicate. - * @param l - * literal content. - * @param lang - * literal language. - * @throws RepositoryException - */ - protected void assertContains(Resource s, IRI p, String l, String lang) - throws RepositoryException { - assertContains(s, p, RDFUtils.literal(l, lang)); - } - - /** - * Returns all statements matching the pattern <code>(s p o)</code>. - * - * @param s - * subject. - * @param p - * predicate. - * @param o - * object. - * @return list of statements. - * @throws RepositoryException - */ - protected RepositoryResult<Statement> getStatements(Resource s, IRI p, - Value o) throws RepositoryException { - return conn.getStatements(s, p, o, false); - } - - /** - * Counts all statements matching the pattern <code>(s p o)</code>. - * - * @param s - * subject. - * @param p - * predicate. - * @param o - * object. - * @return number of matches. - * @throws RepositoryException - */ - protected int getStatementsSize(Resource s, IRI p, Value o) - throws RepositoryException { - RepositoryResult<Statement> result = getStatements(s, p, o); - int count = 0; - try { - while (result.hasNext()) { - result.next(); - count++; - } - } finally { - result.close(); - } - return count; - } - - private String getFailedExtractionMessage() throws RepositoryException { - return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads(); - } + /** + * Base test document. + */ + //TODO: change base IRI string. + protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/"); + + /** + * Internal connection used to collect extraction results. + */ + protected RepositoryConnection conn; + + /** + * The latest generated report. + */ + private SingleDocumentExtractionReport report; + + private Sail store; + + private SailRepository repository; + + /** + * Constructor. + */ + public AbstractExtractorTestCase() { + super(); + } + + /** + * @return the factory of the extractor to be tested. + */ + protected abstract ExtractorFactory<?> getExtractorFactory(); + + /** + * Test case initialization. + * + * @throws Exception + */ + @Before + public void setUp() throws Exception { + super.setUp(); + store = new MemoryStore(); + repository = new SailRepository(store); + repository.initialize(); + conn = repository.getConnection(); + } + + /** + * Test case resources release. + * + * @throws RepositoryException + */ + @After + public void tearDown() throws RepositoryException { + try { + conn.close(); + } finally { + repository.shutDown(); + } + conn = null; + report = null; + store = null; + repository = null; + } + + /** + * @return the connection to the memory repository. + */ + protected RepositoryConnection getConnection() { + return conn; + } + + /** + * @return the last generated report. + */ + protected SingleDocumentExtractionReport getReport() { + return report; + } + + /** + * Returns the list of issues raised by a given extractor. + * + * @param extractorName + * name of the extractor. + * @return collection of issues. + */ + protected Collection<IssueReport.Issue> getIssues(String extractorName) { + for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report + .getExtractorToIssues().entrySet()) { + if (issueEntry.getKey().equals(extractorName)) { + return issueEntry.getValue(); + } + } + return Collections.emptyList(); + } + + /** + * Returns the list of issues raised by the extractor under testing. + * + * @return collection of issues. + */ + protected Collection<IssueReport.Issue> getIssues() { + return getIssues(getExtractorFactory().getExtractorName()); + } + + /** + * Applies the extractor provided by the {@link #getExtractorFactory()} to + * the specified resource. + * + * @param resource + * resource name. + * @throws org.apache.any23.extractor.ExtractionException + * @throws IOException + */ + // TODO: MimeType detector to null forces the execution of all extractors, + // but extraction + // tests should be based on mimetype detection. + protected void extract(String resource) throws ExtractionException, + IOException { + SingleDocumentExtraction ex = new SingleDocumentExtraction( + new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI + .toString()), getExtractorFactory(), + new RepositoryWriter(conn)); + ex.setMIMETypeDetector(null); + report = ex.run(); + } + + /** + * Performs data extraction over the content of a resource and assert that + * the extraction was fine. + * + * @param resource + * resource name. + * @param assertNoIssues + * if <code>true</code>invokes {@link #assertNoIssues()} after + * the extraction. + */ + protected void assertExtract(String resource, boolean assertNoIssues) { + try { + extract(resource); + if (assertNoIssues) + assertNoIssues(); + } catch (ExtractionException ex) { + throw new RuntimeException(ex); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Performs data extraction over the content of a resource and assert that + * the extraction was fine and raised no issues. + * + * @param resource + */ + protected void assertExtract(String resource) { + assertExtract(resource, true); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(_ p o)</code>. + * + * @param p + * predicate + * @param o + * object. + * @throws RepositoryException + */ + protected void assertContains(IRI p, Resource o) throws RepositoryException { + assertContains(null, p, o); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(_ p o)</code>. + * + * @param p + * predicate + * @param o + * object. + * @throws RepositoryException + */ + protected void assertContains(IRI p, String o) throws RepositoryException { + assertContains(null, p, RDFUtils.literal(o)); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(_ p o)</code>. + * + * @param p + * predicate + * @param o + * object. + * @throws RepositoryException + */ + protected void assertNotContains(IRI p, Resource o) + throws RepositoryException { + assertNotContains(null, p, o); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @throws RepositoryException + */ + protected void assertContains(Resource s, IRI p, Value o) + throws RepositoryException { + Assert.assertTrue( + getFailedExtractionMessage() + + String.format("Cannot find triple (%s %s %s)", s, p, + o), conn.hasStatement(s, p, o, false)); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @throws RepositoryException + */ + protected void assertNotContains(Resource s, IRI p, String o) + throws RepositoryException { + Assert.assertFalse(getFailedExtractionMessage(), + conn.hasStatement(s, p, RDFUtils.literal(o), false)); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @throws RepositoryException + */ + protected void assertNotContains(Resource s, IRI p, Resource o) + throws RepositoryException { + Assert.assertFalse(getFailedExtractionMessage(), + conn.hasStatement(s, p, o, false)); + } + + /** + * Asserts that the model contains at least a statement. + * + * @throws RepositoryException + */ + protected void assertModelNotEmpty() throws RepositoryException { + Assert.assertFalse("The model is expected to not be empty." + + getFailedExtractionMessage(), conn.isEmpty()); + } + + /** + * Asserts that the model doesn't contain the pattern <code>(s p o)</code> + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @throws RepositoryException + */ + protected void assertNotContains(Resource s, IRI p, Literal o) + throws RepositoryException { + Assert.assertFalse(getFailedExtractionMessage(), + conn.hasStatement(s, p, o, false)); + } + + /** + * Asserts that the model is expected to contains no statements. + * + * @throws RepositoryException + */ + protected void assertModelEmpty() throws RepositoryException { + Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty()); + } + + /** + * Asserts that the extraction generated no issues. + */ + protected void assertNoIssues() { + for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report + .getExtractorToIssues().entrySet()) { + if (entry.getValue().size() > 0) { + System.out.println("Unexpected issue for extractor " + entry.getKey() + + " : " + entry.getValue()); + } + for(Issue nextIssue : entry.getValue()) { + if(nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) { + Assert.fail("Unexpected issue for extractor " + entry.getKey() + + " : " + entry.getValue()); + } + } + } + } + + /** + * Asserts that an issue has been produced by the processed + * {@link org.apache.any23.extractor.Extractor}. + * + * @param level + * expected issue level + * @param issueRegex + * regex matching the expected human readable issue message. + */ + protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) { + final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory() + .getExtractorName()); + boolean found = false; + for (IssueReport.Issue issue : issues) { + if (issue.getLevel() == level + && issue.getMessage().matches(issueRegex)) { + found = true; + break; + } + } + Assert.assertTrue(String.format( + "Cannot find issue with level %s matching expression '%s'", + level, issueRegex), found); + } + + /** + * Verifies that the current model contains all the given statements. + * + * @param statements + * list of statements to be verified. + * @throws RepositoryException + */ + public void assertContainsModel(Statement[] statements) + throws RepositoryException { + for (Statement statement : statements) { + assertContains(statement); + } + } + + /** + * Verifies that the current model contains all the statements declared in + * the specified <code>modelFile</code>. + * + * @param modelResource + * the resource containing the model. + * @throws RDFHandlerException + * @throws IOException + * @throws RDFParseException + * @throws RepositoryException + */ + public void assertContainsModel(String modelResource) + throws RDFHandlerException, IOException, RDFParseException, + RepositoryException { + getConnection().remove(null, SINDICE.getInstance().date, (Value) null, + (Resource) null); + getConnection().remove(null, SINDICE.getInstance().size, (Value) null, + (Resource) null); + assertContainsModel(RDFUtils.parseRDF(modelResource)); + } + + /** + * Asserts that the given pattern <code>(s p o)</code> satisfies the + * expected number of statements. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @param expected + * expected matches. + * @throws RepositoryException + */ + protected void assertStatementsSize(Resource s, IRI p, Value o, int expected) + throws RDFHandlerException, RepositoryException { + int statementsSize = getStatementsSize(s, p, o); + if (statementsSize != expected) { + getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, System.out)); + } + + Assert.assertEquals("Unexpected number of matching statements.", + expected, statementsSize); + } + + /** + * Asserts that the given pattern <code>(_ p o)</code> satisfies the + * expected number of statements. + * + * @param p + * predicate. + * @param o + * object. + * @param expected + * expected matches. + * @throws RepositoryException + */ + protected void assertStatementsSize(IRI p, Value o, int expected) + throws RDFHandlerException, RepositoryException { + assertStatementsSize(null, p, o, expected); + } + + /** + * Asserts that the given pattern <code>(_ p o)</code> satisfies the + * expected number of statements. + * + * @param p + * predicate. + * @param o + * object. + * @param expected + * expected matches. + * @throws RepositoryException + */ + protected void assertStatementsSize(IRI p, String o, int expected) + throws RDFHandlerException, RepositoryException { + assertStatementsSize(p, o == null ? null : RDFUtils.literal(o), + expected); + } + + /** + * Asserts that the given pattern <code>(s p _)</code> is not present. + * + * @param s + * subject. + * @param p + * predicate. + * @throws RepositoryException + */ + protected void assertNotFound(Resource s, IRI p) throws RepositoryException { + RepositoryResult<Statement> statements = conn.getStatements(s, p, null, + true); + try { + Assert.assertFalse("Expected no statements.", statements.hasNext()); + } finally { + statements.close(); + } + } + + /** + * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it + * is expected to exists and be just one. + * + * @param p + * predicate. + * @param o + * object. + * @return the matching blank subject. + * @throws RepositoryException + */ + protected Resource findExactlyOneBlankSubject(IRI p, Value o) + throws RepositoryException { + RepositoryResult<Statement> it = conn.getStatements(null, p, o, false); + try { + Assert.assertTrue(getFailedExtractionMessage(), it.hasNext()); + Statement stmt = it.next(); + Resource result = stmt.getSubject(); + Assert.assertTrue(getFailedExtractionMessage(), + result instanceof BNode); + Assert.assertFalse(getFailedExtractionMessage(), it.hasNext()); + return result; + } finally { + it.close(); + } + } + + /** + * Returns the object matching the pattern <code>(s p o)</code>, it is + * expected to exists and be just one. + * + * @param s + * subject. + * @param p + * predicate. + * @return the matching object. + * @throws RepositoryException + */ + protected Value findExactlyOneObject(Resource s, IRI p) + throws RepositoryException { + RepositoryResult<Statement> it = conn.getStatements(s, p, null, false); + try { + Assert.assertTrue(getFailedExtractionMessage(), it.hasNext()); + return it.next().getObject(); + } finally { + it.close(); + } + } + + /** + * Returns all the subjects matching the pattern <code>(s? p o)</code>. + * + * @param p + * predicate. + * @param o + * object. + * @return list of matching subjects. + * @throws RepositoryException + */ + protected List<Resource> findSubjects(IRI p, Value o) + throws RepositoryException { + RepositoryResult<Statement> it = conn.getStatements(null, p, o, false); + List<Resource> subjects = new ArrayList<Resource>(); + try { + Statement statement; + while (it.hasNext()) { + statement = it.next(); + subjects.add(statement.getSubject()); + } + } finally { + it.close(); + } + return subjects; + } + + /** + * Returns all the objects matching the pattern <code>(s p _)</code>. + * + * @param s + * predicate. + * @param p + * predicate. + * @return list of matching objects. + * @throws RepositoryException + */ + protected List<Value> findObjects(Resource s, IRI p) + throws RepositoryException { + RepositoryResult<Statement> it = conn.getStatements(s, p, null, false); + List<Value> objects = new ArrayList<Value>(); + try { + Statement statement; + while (it.hasNext()) { + statement = it.next(); + objects.add(statement.getObject()); + } + } finally { + it.close(); + } + return objects; + } + + /** + * Finds the object matching the pattern <code>(s p _)</code>, asserts to + * find exactly one result. + * + * @param s + * subject. + * @param p + * predicate + * @return matching object. + * @throws org.openrdf.repository.RepositoryException + */ + protected Value findObject(Resource s, IRI p) throws RepositoryException { + RepositoryResult<Statement> statements = conn.getStatements(s, p, null, + true); + try { + Assert.assertTrue("Expected at least a statement.", + statements.hasNext()); + return (statements.next().getObject()); + } finally { + statements.close(); + } + } + + /** + * Finds the resource object matching the pattern <code>(s p _)</code>, + * asserts to find exactly one result. + * + * @param s + * subject. + * @param p + * predicate. + * @return matching object. + * @throws RepositoryException + */ + protected Resource findObjectAsResource(Resource s, IRI p) + throws RepositoryException { + final Value v = findObject(s, p); + try { + return (Resource) v; + } catch (ClassCastException cce) { + Assert.fail("Expected resource object, found: " + + v.getClass().getSimpleName()); + throw new IllegalStateException(); + } + } + + /** + * Finds the literal object matching the pattern <code>(s p _)</code>, + * asserts to find exactly one result. + * + * @param s + * subject. + * @param p + * predicate. + * @return matching object. + * @throws RepositoryException + */ + protected String findObjectAsLiteral(Resource s, IRI p) + throws RepositoryException { + return findObject(s, p).stringValue(); + } + + /** + * Dumps the extracted model in <i>Turtle</i> format. + * + * @return a string containing the model in Turtle. + * @throws RepositoryException + */ + protected String dumpModelToTurtle() throws RepositoryException { + StringWriter w = new StringWriter(); + try { + conn.export(Rio.createWriter(RDFFormat.TURTLE, w)); + return w.toString(); + } catch (RDFHandlerException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Dumps the extracted model in <i>NQuads</i> format. + * + * @return a string containing the model in NQuads. + * @throws RepositoryException + */ + protected String dumpModelToNQuads() throws RepositoryException { + StringWriter w = new StringWriter(); + try { + conn.export(Rio.createWriter(RDFFormat.NQUADS, w)); + return w.toString(); + } catch (RDFHandlerException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Dumps the extracted model in <i>RDFXML</i> format. + * + * @return a string containing the model in RDFXML. + * @throws RepositoryException + */ + protected String dumpModelToRDFXML() throws RepositoryException { + StringWriter w = new StringWriter(); + try { + conn.export(Rio.createWriter(RDFFormat.RDFXML, w)); + return w.toString(); + } catch (RDFHandlerException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Dumps the list of statements contained in the extracted model. + * + * @return list of extracted statements. + * @throws RepositoryException + */ + protected List<Statement> dumpAsListOfStatements() + throws RepositoryException { + return Iterations.asList(conn.getStatements(null, null, null, false)); + } + + /** + * @return string containing human readable statements. + * @throws RepositoryException + */ + protected String dumpHumanReadableTriples() throws RepositoryException { + StringBuilder sb = new StringBuilder(); + RepositoryResult<Statement> result = conn.getStatements(null, null, + null, false); + while (result.hasNext()) { + Statement statement = result.next(); + sb.append(String.format("%s %s %s %s\n", statement.getSubject(), + statement.getPredicate(), statement.getObject(), + statement.getContext())); + + } + return sb.toString(); + } + + /** + * Checks that a statement is contained in the extracted model. If the + * statement declares bnodes, they are replaced with <code>_</code> + * patterns. + * + * @param statement + * @throws RepositoryException + */ + // TODO: bnode check is too weak, introduce graph omomorphism check. + protected void assertContains(Statement statement) + throws RepositoryException { + Assert.assertTrue("Cannot find statement " + statement + " in model.", + conn.hasStatement( + statement.getSubject() instanceof BNode ? null + : statement.getSubject(), statement + .getPredicate(), + statement.getObject() instanceof BNode ? null + : statement.getObject(), false)); + } + + /** + * Assert that the model contains the statement <code>(s p l)</code> where + * <code>l</code> is a literal. + * + * @param s + * subject. + * @param p + * predicate. + * @param l + * literal content. + * @throws RepositoryException + */ + protected void assertContains(Resource s, IRI p, String l) + throws RepositoryException { + assertContains(s, p, RDFUtils.literal(l)); + } + + /** + * Assert that the model contains the statement <code>(s p l)</code> where + * <code>l</code> is a language literal. + * + * @param s + * subject. + * @param p + * predicate. + * @param l + * literal content. + * @param lang + * literal language. + * @throws RepositoryException + */ + protected void assertContains(Resource s, IRI p, String l, String lang) + throws RepositoryException { + assertContains(s, p, RDFUtils.literal(l, lang)); + } + + /** + * Returns all statements matching the pattern <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @return list of statements. + * @throws RepositoryException + */ + protected RepositoryResult<Statement> getStatements(Resource s, IRI p, + Value o) throws RepositoryException { + return conn.getStatements(s, p, o, false); + } + + /** + * Counts all statements matching the pattern <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @return number of matches. + * @throws RepositoryException + */ + protected int getStatementsSize(Resource s, IRI p, Value o) + throws RepositoryException { + RepositoryResult<Statement> result = getStatements(s, p, o); + int count = 0; + try { + while (result.hasNext()) { + result.next(); + count++; + } + } finally { + result.close(); + } + return count; + } + + private String getFailedExtractionMessage() throws RepositoryException { + return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads(); + } } \ No newline at end of file
