http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java index 6384ab4..e528171 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java +++ b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java @@ -60,673 +60,796 @@ import java.util.Map; */ public abstract class AbstractExtractorTestCase extends AbstractAny23TestBase { - /** - * Base test document. - */ - protected static URI baseURI = RDFUtils.uri("http://bob.example.com/"); // TODO: change base URI string. - - /** - * Internal connection used to collect extraction results. - */ - protected RepositoryConnection conn; - - /** - * The latest generated report. - */ - private SingleDocumentExtractionReport report; - - /** - * Constructor. - */ - public AbstractExtractorTestCase() { - super(); - } - - /** - * @return the factory of the extractor to be tested. - */ - protected abstract ExtractorFactory<?> getExtractorFactory(); - - /** - * Test case initialization. - * @throws Exception - */ - @Before - public void setUp() throws Exception { - super.setUp(); - Sail store = new MemoryStore(); - store.initialize(); - conn = new SailRepository(store).getConnection(); - } - - /** - * Test case resources release. - * - * @throws RepositoryException - */ - @After - public void tearDown() throws RepositoryException { - conn.close(); - conn = null; - report = null; - } - - /** - * @return the connection to the memory repository. - */ - protected RepositoryConnection getConnection() { - return conn; - } - - /** - * @return the last generated report. - */ - protected SingleDocumentExtractionReport getReport() { - return report; - } - - /** - * Returns the list of issues raised by a given extractor. - * - * @param extractorName name of the extractor. - * @return collection of issues. - */ - protected Collection<IssueReport.Issue> getIssues(String extractorName) { - for( - Map.Entry<String, Collection<IssueReport.Issue>> issueEntry - : - report.getExtractorToIssues().entrySet() - ) { - if(issueEntry.getKey().equals(extractorName)) { - return issueEntry.getValue(); - } - } - return Collections.emptyList(); - } - - /** - * Returns the list of issues raised by the extractor under testing. - * - * @return collection of issues. - */ - protected Collection<IssueReport.Issue> getIssues() { - return getIssues(getExtractorFactory().getExtractorName()); - } - - /** - * Applies the extractor provided by the {@link #getExtractorFactory()} to the specified resource. - * - * @param resource resource name. - * @throws org.apache.any23.extractor.ExtractionException - * @throws IOException - */ - // TODO: MimeType detector to null forces the execution of all extractors, but extraction - // tests should be based on mimetype detection. - protected void extract(String resource) throws ExtractionException, IOException { - SingleDocumentExtraction ex = new SingleDocumentExtraction( - new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseURI.toString()), - getExtractorFactory(), new RepositoryWriter(conn) - ); - ex.setMIMETypeDetector(null); - report = ex.run(); - } - - /** - * Performs data extraction over the content of a resource - * and assert that the extraction was fine. - * - * @param resource resource name. - * @param assertNoIssues if <code>true</code>invokes {@link #assertNoIssues()} after the extraction. - */ - protected void assertExtract(String resource, boolean assertNoIssues) { - try { - extract(resource); - if(assertNoIssues) assertNoIssues(); - } catch (ExtractionException ex) { - throw new RuntimeException(ex); - } catch (IOException ex) { - throw new RuntimeException(ex); - } - } - - /** - * Performs data extraction over the content of a resource - * and assert that the extraction was fine and raised no issues. - * - * @param resource - */ - protected void assertExtract(String resource) { - assertExtract(resource, true); - } - - /** - * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>. - * - * @param p predicate - * @param o object. - * @throws RepositoryException - */ - protected void assertContains(URI p, Resource o) throws RepositoryException { - assertContains(null, p, o); - } - - /** - * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>. - * - * @param p predicate - * @param o object. - * @throws RepositoryException - */ - protected void assertContains(URI p, String o) throws RepositoryException { - assertContains(null, p, RDFUtils.literal(o)); - } - - /** - * Asserts that the extracted triples contain the pattern <code>(_ p o)</code>. - * - * @param p predicate - * @param o object. - * @throws RepositoryException - */ - protected void assertNotContains(URI p, Resource o) throws RepositoryException { - assertNotContains(null, p, o); - } - - /** - * Asserts that the extracted triples contain the pattern <code>(s p o)</code>. - * - * @param s subject. - * @param p predicate. - * @param o object. - * @throws RepositoryException - */ - protected void assertContains(Resource s, URI p, Value o) throws RepositoryException { - Assert.assertTrue( - getFailedExtractionMessage() + - String.format("Cannot find triple (%s %s %s)", s, p, o), - conn.hasStatement(s, p, o, false)); - } - - /** - * Asserts that the extracted triples contain the pattern <code>(s p o)</code>. - * - * @param s subject. - * @param p predicate. - * @param o object. - * @throws RepositoryException - */ - protected void assertNotContains(Resource s, URI p, String o) throws RepositoryException { - Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, RDFUtils.literal(o), false)); - } - - /** - * Asserts that the extracted triples contain the pattern <code>(s p o)</code>. - * - * @param s subject. - * @param p predicate. - * @param o object. - * @throws RepositoryException - */ - protected void assertNotContains(Resource s, URI p, Resource o) throws RepositoryException { - Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false)); - } - - /** - * Asserts that the model contains at least a statement. - * - * @throws RepositoryException - */ - protected void assertModelNotEmpty() throws RepositoryException { - Assert.assertFalse( - "The model is expected to not be empty." + getFailedExtractionMessage(), - conn.isEmpty() - ); - } - - /** - * Asserts that the model doesn't contain the pattern <code>(s p o)</code> - * - * @param s subject. - * @param p predicate. - * @param o object. - * @throws RepositoryException - */ - protected void assertNotContains(Resource s, URI p, Literal o) throws RepositoryException { - Assert.assertFalse(getFailedExtractionMessage(), conn.hasStatement(s, p, o, false)); - } - - /** - * Asserts that the model is expected to contains no statements. - * - * @throws RepositoryException - */ - protected void assertModelEmpty() throws RepositoryException { - Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty()); - } - - /** - * Asserts that the extraction generated no issues. - */ - protected void assertNoIssues() { - for( Map.Entry<String, Collection<IssueReport.Issue>> entry : report.getExtractorToIssues().entrySet() ) { - if(entry.getValue().size() > 0) { - Assert.fail("Unexpected issue for extractor " + entry.getKey() + " : " + entry.getValue()); - } - } - } - - /** - * Asserts that an issue has been produced by the processed {@link org.apache.any23.extractor.Extractor}. - * - * @param level expected issue level - * @param issueRegex regex matching the expected human readable issue message. - */ - protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) { - final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory().getExtractorName()); - boolean found = false; - for(IssueReport.Issue issue : issues) { - if(issue.getLevel() == level && issue.getMessage().matches(issueRegex)) { - found = true; - break; - } - } - Assert.assertTrue( - String.format("Cannot find issue with level %s matching expression '%s'", level, issueRegex), - found - ); - } - - /** - * Verifies that the current model contains all the given statements. - * - * @param statements list of statements to be verified. - * @throws RepositoryException - */ - public void assertContainsModel(Statement[] statements) throws RepositoryException { - for(Statement statement : statements) { - assertContains(statement); - } - } - - /** - * Verifies that the current model contains all the statements declared in the - * specified <code>modelFile</code>. - * - * @param modelResource the resource containing the model. - * @throws RDFHandlerException - * @throws IOException - * @throws RDFParseException - * @throws RepositoryException - */ - public void assertContainsModel(String modelResource) - throws RDFHandlerException, IOException, RDFParseException, RepositoryException { - getConnection().remove(null, SINDICE.getInstance().date, (Value) null, (Resource) null); - getConnection().remove(null, SINDICE.getInstance().size, (Value) null, (Resource) null); - assertContainsModel(RDFUtils.parseRDF(modelResource)); - } - - /** - * Asserts that the given pattern <code>(s p o)</code> satisfies the expected number of statements. - * - * @param s subject. - * @param p predicate. - * @param o object. - * @param expected expected matches. - * @throws RepositoryException - */ - protected void assertStatementsSize(Resource s, URI p, Value o, int expected) - throws RepositoryException { - Assert.assertEquals( - "Unexpected number of matching statements.", - expected, - getStatementsSize(s, p, o) - ); - } - - /** - * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements. - * - * @param p predicate. - * @param o object. - * @param expected expected matches. - * @throws RepositoryException - */ - protected void assertStatementsSize(URI p, Value o, int expected) throws RepositoryException { - assertStatementsSize(null, p, o, expected); - } - - /** - * Asserts that the given pattern <code>(_ p o)</code> satisfies the expected number of statements. - * - * @param p predicate. - * @param o object. - * @param expected expected matches. - * @throws RepositoryException - */ - protected void assertStatementsSize(URI p, String o, int expected) throws RepositoryException { - assertStatementsSize(p, o == null ? null : RDFUtils.literal(o), expected); - } - - /** - * Asserts that the given pattern <code>(s p _)</code> is not present. - * - * @param s subject. - * @param p predicate. - * @throws RepositoryException - */ - protected void assertNotFound(Resource s, URI p) throws RepositoryException { - RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true); - try { - Assert.assertFalse("Expected no statements.", statements.hasNext()); - } finally { - statements.close(); - } - } - - /** - * Returns the blank subject matching the pattern <code>(_:b p o)</code>, - * it is expected to exists and be just one. - * - * @param p predicate. - * @param o object. - * @return the matching blank subject. - * @throws RepositoryException - */ - protected Resource findExactlyOneBlankSubject(URI p, Value o) throws RepositoryException { - RepositoryResult<Statement> it = conn.getStatements(null, p, o, false); - try { - Assert.assertTrue(getFailedExtractionMessage(), it.hasNext()); - Statement stmt = it.next(); - Resource result = stmt.getSubject(); - Assert.assertTrue(getFailedExtractionMessage(), result instanceof BNode); - Assert.assertFalse(getFailedExtractionMessage(), it.hasNext()); - return result; - } finally { - it.close(); - } - } - - /** - * Returns the object matching the pattern <code>(s p o)</code>, - * it is expected to exists and be just one. - * - * @param s subject. - * @param p predicate. - * @return the matching object. - * @throws RepositoryException - */ - protected Value findExactlyOneObject(Resource s, URI p) throws RepositoryException { - RepositoryResult<Statement> it = conn.getStatements(s, p, null, false); - try { - Assert.assertTrue(getFailedExtractionMessage(), it.hasNext()); - return it.next().getObject(); - } finally { - it.close(); - } - } - - /** - * Returns all the subjects matching the pattern <code>(s? p o)</code>. - * - * @param p predicate. - * @param o object. - * @return list of matching subjects. - * @throws RepositoryException - */ - protected List<Resource> findSubjects(URI p, Value o) throws RepositoryException { - RepositoryResult<Statement> it = conn.getStatements(null, p, o, false); - List<Resource> subjects = new ArrayList<Resource>(); - try { - Statement statement; - while( it.hasNext() ) { - statement = it.next(); - subjects.add( statement.getSubject() ); - } - } finally { - it.close(); - } - return subjects; - } - - /** - * Returns all the objects matching the pattern <code>(s p _)</code>. - * - * @param s predicate. - * @param p predicate. - * @return list of matching objects. - * @throws RepositoryException - */ - protected List<Value> findObjects(Resource s, URI p) throws RepositoryException { - RepositoryResult<Statement> it = conn.getStatements(s, p, null, false); - List<Value> objects = new ArrayList<Value>(); - try { - Statement statement; - while( it.hasNext() ) { - statement = it.next(); - objects.add( statement.getObject() ); - } - } finally { - it.close(); - } - return objects; - } - - /** - * Finds the object matching the pattern <code>(s p _)</code>, asserts to find - * exactly one result. - * - * @param s subject. - * @param p predicate - * @return matching object. - * @throws org.openrdf.repository.RepositoryException - */ - protected Value findObject(Resource s, URI p) throws RepositoryException { - RepositoryResult<Statement> statements = conn.getStatements(s, p, null, true); - try { - Assert.assertTrue("Expected at least a statement.", statements.hasNext()); - return (statements.next().getObject()); - } finally { - statements.close(); - } - } - - /** - * Finds the resource object matching the pattern <code>(s p _)</code>, asserts to find - * exactly one result. - * - * @param s subject. - * @param p predicate. - * @return matching object. - * @throws RepositoryException - */ - protected Resource findObjectAsResource(Resource s, URI p) throws RepositoryException { - final Value v = findObject(s, p); - try { - return (Resource) v; - } catch (ClassCastException cce) { - Assert.fail("Expected resource object, found: " + v.getClass().getSimpleName()); - throw new IllegalStateException(); - } - } - - /** - * Finds the literal object matching the pattern <code>(s p _)</code>, asserts to find - * exactly one result. - * - * @param s subject. - * @param p predicate. - * @return matching object. - * @throws RepositoryException - */ - protected String findObjectAsLiteral(Resource s, URI p) throws RepositoryException { - return findObject(s, p).stringValue(); - } - - /** - * Dumps the extracted model in <i>Turtle</i> format. - * - * @return a string containing the model in Turtle. - * @throws RepositoryException - */ - protected String dumpModelToTurtle() throws RepositoryException { - StringWriter w = new StringWriter(); - try { - conn.export(Rio.createWriter(RDFFormat.TURTLE, w)); - return w.toString(); - } catch (RDFHandlerException ex) { - throw new RuntimeException(ex); - } - } - - /** - * Dumps the extracted model in <i>NQuads</i> format. - * - * @return a string containing the model in NQuads. - * @throws RepositoryException - */ - protected String dumpModelToNQuads() throws RepositoryException { - StringWriter w = new StringWriter(); - try { - conn.export(Rio.createWriter(RDFFormat.NQUADS, w)); - return w.toString(); - } catch (RDFHandlerException ex) { - throw new RuntimeException(ex); - } - } - - /** - * Dumps the extracted model in <i>RDFXML</i> format. - * - * @return a string containing the model in RDFXML. - * @throws RepositoryException - */ - protected String dumpModelToRDFXML() throws RepositoryException { - StringWriter w = new StringWriter(); - try { - conn.export(Rio.createWriter(RDFFormat.RDFXML, w)); - return w.toString(); - } catch (RDFHandlerException ex) { - throw new RuntimeException(ex); - } - } - - /** - * Dumps the list of statements contained in the extracted model. - * - * @return list of extracted statements. - * @throws RepositoryException - */ - protected List<Statement> dumpAsListOfStatements() throws RepositoryException { - return conn.getStatements(null, null, null, false).asList(); - } - - /** - * @return string containing human readable statements. - * @throws RepositoryException - */ - protected String dumpHumanReadableTriples() throws RepositoryException { - StringBuilder sb = new StringBuilder(); - RepositoryResult<Statement> result = conn.getStatements(null, null, null, false); - while(result.hasNext()) { - Statement statement = result.next(); - sb.append(String.format("%s %s %s %s\n", - statement.getSubject(), - statement.getPredicate(), - statement.getObject(), - statement.getContext() - ) - ); - - } - return sb.toString(); - } - - /** - * Checks that a statement is contained in the extracted model. - * If the statement declares bnodes, they are replaced with <code>_</code> patterns. - * - * @param statement - * @throws RepositoryException - */ - // TODO: bnode check is too weak, introduce graph omomorphism check. - protected void assertContains(Statement statement) throws RepositoryException { - Assert.assertTrue( - "Cannot find statement " + statement + " in model.", - conn.hasStatement( - statement.getSubject() instanceof BNode ? null : statement.getSubject(), - statement.getPredicate(), - statement.getObject() instanceof BNode ? null : statement.getObject(), - false - ) - ); - } - - /** - * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> is a literal. - * - * @param s subject. - * @param p predicate. - * @param l literal content. - * @throws RepositoryException - */ - protected void assertContains(Resource s, URI p, String l) throws RepositoryException { - assertContains(s, p, RDFUtils.literal(l)); - } - - /** - * Assert that the model contains the statement <code>(s p l)</code> where <code>l</code> - * is a language literal. - * - * @param s subject. - * @param p predicate. - * @param l literal content. - * @param lang literal language. - * @throws RepositoryException - */ - protected void assertContains(Resource s, URI p, String l, String lang) throws RepositoryException { - assertContains(s, p, RDFUtils.literal(l, lang)); - } - - /** - * Returns all statements matching the pattern <code>(s p o)</code>. - * - * @param s subject. - * @param p predicate. - * @param o object. - * @return list of statements. - * @throws RepositoryException - */ - protected RepositoryResult<Statement> getStatements(Resource s, URI p, Value o) - throws RepositoryException { - return conn.getStatements(s, p, o, false); - } - - /** - * Counts all statements matching the pattern <code>(s p o)</code>. - * - * @param s subject. - * @param p predicate. - * @param o object. - * @return number of matches. - * @throws RepositoryException - */ - protected int getStatementsSize(Resource s, URI p, Value o) - throws RepositoryException { - RepositoryResult<Statement> result = getStatements(s, p, o); - int count = 0; - try { - while (result.hasNext()) { - result.next(); - count++; - } - } finally { - result.close(); - } - return count; - } - - private String getFailedExtractionMessage() throws RepositoryException { - return "Assertion failed! Extracted triples:\n" + dumpModelToTurtle(); - } + /** + * Base test document. + */ + protected static URI baseURI = RDFUtils.uri("http://bob.example.com/"); // TODO: + // change + // base + // URI + // string. + + /** + * Internal connection used to collect extraction results. + */ + protected RepositoryConnection conn; + + /** + * The latest generated report. + */ + private SingleDocumentExtractionReport report; + + private Sail store; + + private SailRepository repository; + + /** + * Constructor. + */ + public AbstractExtractorTestCase() { + super(); + } + + /** + * @return the factory of the extractor to be tested. + */ + protected abstract ExtractorFactory<?> getExtractorFactory(); + + /** + * Test case initialization. + * + * @throws Exception + */ + @Before + public void setUp() throws Exception { + super.setUp(); + store = new MemoryStore(); + repository = new SailRepository(store); + repository.initialize(); + conn = repository.getConnection(); + } + + /** + * Test case resources release. + * + * @throws RepositoryException + */ + @After + public void tearDown() throws RepositoryException { + try { + conn.close(); + } finally { + repository.shutDown(); + } + conn = null; + report = null; + store = null; + repository = null; + } + + /** + * @return the connection to the memory repository. + */ + protected RepositoryConnection getConnection() { + return conn; + } + + /** + * @return the last generated report. + */ + protected SingleDocumentExtractionReport getReport() { + return report; + } + + /** + * Returns the list of issues raised by a given extractor. + * + * @param extractorName + * name of the extractor. + * @return collection of issues. + */ + protected Collection<IssueReport.Issue> getIssues(String extractorName) { + for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report + .getExtractorToIssues().entrySet()) { + if (issueEntry.getKey().equals(extractorName)) { + return issueEntry.getValue(); + } + } + return Collections.emptyList(); + } + + /** + * Returns the list of issues raised by the extractor under testing. + * + * @return collection of issues. + */ + protected Collection<IssueReport.Issue> getIssues() { + return getIssues(getExtractorFactory().getExtractorName()); + } + + /** + * Applies the extractor provided by the {@link #getExtractorFactory()} to + * the specified resource. + * + * @param resource + * resource name. + * @throws org.apache.any23.extractor.ExtractionException + * @throws IOException + */ + // TODO: MimeType detector to null forces the execution of all extractors, + // but extraction + // tests should be based on mimetype detection. + protected void extract(String resource) throws ExtractionException, + IOException { + SingleDocumentExtraction ex = new SingleDocumentExtraction( + new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseURI + .toString()), getExtractorFactory(), + new RepositoryWriter(conn)); + ex.setMIMETypeDetector(null); + report = ex.run(); + } + + /** + * Performs data extraction over the content of a resource and assert that + * the extraction was fine. + * + * @param resource + * resource name. + * @param assertNoIssues + * if <code>true</code>invokes {@link #assertNoIssues()} after + * the extraction. + */ + protected void assertExtract(String resource, boolean assertNoIssues) { + try { + extract(resource); + if (assertNoIssues) + assertNoIssues(); + } catch (ExtractionException ex) { + throw new RuntimeException(ex); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Performs data extraction over the content of a resource and assert that + * the extraction was fine and raised no issues. + * + * @param resource + */ + protected void assertExtract(String resource) { + assertExtract(resource, true); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(_ p o)</code>. + * + * @param p + * predicate + * @param o + * object. + * @throws RepositoryException + */ + protected void assertContains(URI p, Resource o) throws RepositoryException { + assertContains(null, p, o); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(_ p o)</code>. + * + * @param p + * predicate + * @param o + * object. + * @throws RepositoryException + */ + protected void assertContains(URI p, String o) throws RepositoryException { + assertContains(null, p, RDFUtils.literal(o)); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(_ p o)</code>. + * + * @param p + * predicate + * @param o + * object. + * @throws RepositoryException + */ + protected void assertNotContains(URI p, Resource o) + throws RepositoryException { + assertNotContains(null, p, o); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @throws RepositoryException + */ + protected void assertContains(Resource s, URI p, Value o) + throws RepositoryException { + Assert.assertTrue( + getFailedExtractionMessage() + + String.format("Cannot find triple (%s %s %s)", s, p, + o), conn.hasStatement(s, p, o, false)); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @throws RepositoryException + */ + protected void assertNotContains(Resource s, URI p, String o) + throws RepositoryException { + Assert.assertFalse(getFailedExtractionMessage(), + conn.hasStatement(s, p, RDFUtils.literal(o), false)); + } + + /** + * Asserts that the extracted triples contain the pattern + * <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @throws RepositoryException + */ + protected void assertNotContains(Resource s, URI p, Resource o) + throws RepositoryException { + Assert.assertFalse(getFailedExtractionMessage(), + conn.hasStatement(s, p, o, false)); + } + + /** + * Asserts that the model contains at least a statement. + * + * @throws RepositoryException + */ + protected void assertModelNotEmpty() throws RepositoryException { + Assert.assertFalse("The model is expected to not be empty." + + getFailedExtractionMessage(), conn.isEmpty()); + } + + /** + * Asserts that the model doesn't contain the pattern <code>(s p o)</code> + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @throws RepositoryException + */ + protected void assertNotContains(Resource s, URI p, Literal o) + throws RepositoryException { + Assert.assertFalse(getFailedExtractionMessage(), + conn.hasStatement(s, p, o, false)); + } + + /** + * Asserts that the model is expected to contains no statements. + * + * @throws RepositoryException + */ + protected void assertModelEmpty() throws RepositoryException { + Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty()); + } + + /** + * Asserts that the extraction generated no issues. + */ + protected void assertNoIssues() { + for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report + .getExtractorToIssues().entrySet()) { + if (entry.getValue().size() > 0) { + Assert.fail("Unexpected issue for extractor " + entry.getKey() + + " : " + entry.getValue()); + } + } + } + + /** + * Asserts that an issue has been produced by the processed + * {@link org.apache.any23.extractor.Extractor}. + * + * @param level + * expected issue level + * @param issueRegex + * regex matching the expected human readable issue message. + */ + protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) { + final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory() + .getExtractorName()); + boolean found = false; + for (IssueReport.Issue issue : issues) { + if (issue.getLevel() == level + && issue.getMessage().matches(issueRegex)) { + found = true; + break; + } + } + Assert.assertTrue(String.format( + "Cannot find issue with level %s matching expression '%s'", + level, issueRegex), found); + } + + /** + * Verifies that the current model contains all the given statements. + * + * @param statements + * list of statements to be verified. + * @throws RepositoryException + */ + public void assertContainsModel(Statement[] statements) + throws RepositoryException { + for (Statement statement : statements) { + assertContains(statement); + } + } + + /** + * Verifies that the current model contains all the statements declared in + * the specified <code>modelFile</code>. + * + * @param modelResource + * the resource containing the model. + * @throws RDFHandlerException + * @throws IOException + * @throws RDFParseException + * @throws RepositoryException + */ + public void assertContainsModel(String modelResource) + throws RDFHandlerException, IOException, RDFParseException, + RepositoryException { + getConnection().remove(null, SINDICE.getInstance().date, (Value) null, + (Resource) null); + getConnection().remove(null, SINDICE.getInstance().size, (Value) null, + (Resource) null); + assertContainsModel(RDFUtils.parseRDF(modelResource)); + } + + /** + * Asserts that the given pattern <code>(s p o)</code> satisfies the + * expected number of statements. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @param expected + * expected matches. + * @throws RepositoryException + */ + protected void assertStatementsSize(Resource s, URI p, Value o, int expected) + throws RDFHandlerException, RepositoryException { + int statementsSize = getStatementsSize(s, p, o); + if (statementsSize != expected) { + getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, System.out)); + } + + Assert.assertEquals("Unexpected number of matching statements.", + expected, statementsSize); + } + + /** + * Asserts that the given pattern <code>(_ p o)</code> satisfies the + * expected number of statements. + * + * @param p + * predicate. + * @param o + * object. + * @param expected + * expected matches. + * @throws RepositoryException + */ + protected void assertStatementsSize(URI p, Value o, int expected) + throws RDFHandlerException, RepositoryException { + assertStatementsSize(null, p, o, expected); + } + + /** + * Asserts that the given pattern <code>(_ p o)</code> satisfies the + * expected number of statements. + * + * @param p + * predicate. + * @param o + * object. + * @param expected + * expected matches. + * @throws RepositoryException + */ + protected void assertStatementsSize(URI p, String o, int expected) + throws RDFHandlerException, RepositoryException { + assertStatementsSize(p, o == null ? null : RDFUtils.literal(o), + expected); + } + + /** + * Asserts that the given pattern <code>(s p _)</code> is not present. + * + * @param s + * subject. + * @param p + * predicate. + * @throws RepositoryException + */ + protected void assertNotFound(Resource s, URI p) throws RepositoryException { + RepositoryResult<Statement> statements = conn.getStatements(s, p, null, + true); + try { + Assert.assertFalse("Expected no statements.", statements.hasNext()); + } finally { + statements.close(); + } + } + + /** + * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it + * is expected to exists and be just one. + * + * @param p + * predicate. + * @param o + * object. + * @return the matching blank subject. + * @throws RepositoryException + */ + protected Resource findExactlyOneBlankSubject(URI p, Value o) + throws RepositoryException { + RepositoryResult<Statement> it = conn.getStatements(null, p, o, false); + try { + Assert.assertTrue(getFailedExtractionMessage(), it.hasNext()); + Statement stmt = it.next(); + Resource result = stmt.getSubject(); + Assert.assertTrue(getFailedExtractionMessage(), + result instanceof BNode); + Assert.assertFalse(getFailedExtractionMessage(), it.hasNext()); + return result; + } finally { + it.close(); + } + } + + /** + * Returns the object matching the pattern <code>(s p o)</code>, it is + * expected to exists and be just one. + * + * @param s + * subject. + * @param p + * predicate. + * @return the matching object. + * @throws RepositoryException + */ + protected Value findExactlyOneObject(Resource s, URI p) + throws RepositoryException { + RepositoryResult<Statement> it = conn.getStatements(s, p, null, false); + try { + Assert.assertTrue(getFailedExtractionMessage(), it.hasNext()); + return it.next().getObject(); + } finally { + it.close(); + } + } + + /** + * Returns all the subjects matching the pattern <code>(s? p o)</code>. + * + * @param p + * predicate. + * @param o + * object. + * @return list of matching subjects. + * @throws RepositoryException + */ + protected List<Resource> findSubjects(URI p, Value o) + throws RepositoryException { + RepositoryResult<Statement> it = conn.getStatements(null, p, o, false); + List<Resource> subjects = new ArrayList<Resource>(); + try { + Statement statement; + while (it.hasNext()) { + statement = it.next(); + subjects.add(statement.getSubject()); + } + } finally { + it.close(); + } + return subjects; + } + + /** + * Returns all the objects matching the pattern <code>(s p _)</code>. + * + * @param s + * predicate. + * @param p + * predicate. + * @return list of matching objects. + * @throws RepositoryException + */ + protected List<Value> findObjects(Resource s, URI p) + throws RepositoryException { + RepositoryResult<Statement> it = conn.getStatements(s, p, null, false); + List<Value> objects = new ArrayList<Value>(); + try { + Statement statement; + while (it.hasNext()) { + statement = it.next(); + objects.add(statement.getObject()); + } + } finally { + it.close(); + } + return objects; + } + + /** + * Finds the object matching the pattern <code>(s p _)</code>, asserts to + * find exactly one result. + * + * @param s + * subject. + * @param p + * predicate + * @return matching object. + * @throws org.openrdf.repository.RepositoryException + */ + protected Value findObject(Resource s, URI p) throws RepositoryException { + RepositoryResult<Statement> statements = conn.getStatements(s, p, null, + true); + try { + Assert.assertTrue("Expected at least a statement.", + statements.hasNext()); + return (statements.next().getObject()); + } finally { + statements.close(); + } + } + + /** + * Finds the resource object matching the pattern <code>(s p _)</code>, + * asserts to find exactly one result. + * + * @param s + * subject. + * @param p + * predicate. + * @return matching object. + * @throws RepositoryException + */ + protected Resource findObjectAsResource(Resource s, URI p) + throws RepositoryException { + final Value v = findObject(s, p); + try { + return (Resource) v; + } catch (ClassCastException cce) { + Assert.fail("Expected resource object, found: " + + v.getClass().getSimpleName()); + throw new IllegalStateException(); + } + } + + /** + * Finds the literal object matching the pattern <code>(s p _)</code>, + * asserts to find exactly one result. + * + * @param s + * subject. + * @param p + * predicate. + * @return matching object. + * @throws RepositoryException + */ + protected String findObjectAsLiteral(Resource s, URI p) + throws RepositoryException { + return findObject(s, p).stringValue(); + } + + /** + * Dumps the extracted model in <i>Turtle</i> format. + * + * @return a string containing the model in Turtle. + * @throws RepositoryException + */ + protected String dumpModelToTurtle() throws RepositoryException { + StringWriter w = new StringWriter(); + try { + conn.export(Rio.createWriter(RDFFormat.TURTLE, w)); + return w.toString(); + } catch (RDFHandlerException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Dumps the extracted model in <i>NQuads</i> format. + * + * @return a string containing the model in NQuads. + * @throws RepositoryException + */ + protected String dumpModelToNQuads() throws RepositoryException { + StringWriter w = new StringWriter(); + try { + conn.export(Rio.createWriter(RDFFormat.NQUADS, w)); + return w.toString(); + } catch (RDFHandlerException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Dumps the extracted model in <i>RDFXML</i> format. + * + * @return a string containing the model in RDFXML. + * @throws RepositoryException + */ + protected String dumpModelToRDFXML() throws RepositoryException { + StringWriter w = new StringWriter(); + try { + conn.export(Rio.createWriter(RDFFormat.RDFXML, w)); + return w.toString(); + } catch (RDFHandlerException ex) { + throw new RuntimeException(ex); + } + } + + /** + * Dumps the list of statements contained in the extracted model. + * + * @return list of extracted statements. + * @throws RepositoryException + */ + protected List<Statement> dumpAsListOfStatements() + throws RepositoryException { + return conn.getStatements(null, null, null, false).asList(); + } + + /** + * @return string containing human readable statements. + * @throws RepositoryException + */ + protected String dumpHumanReadableTriples() throws RepositoryException { + StringBuilder sb = new StringBuilder(); + RepositoryResult<Statement> result = conn.getStatements(null, null, + null, false); + while (result.hasNext()) { + Statement statement = result.next(); + sb.append(String.format("%s %s %s %s\n", statement.getSubject(), + statement.getPredicate(), statement.getObject(), + statement.getContext())); + + } + return sb.toString(); + } + + /** + * Checks that a statement is contained in the extracted model. If the + * statement declares bnodes, they are replaced with <code>_</code> + * patterns. + * + * @param statement + * @throws RepositoryException + */ + // TODO: bnode check is too weak, introduce graph omomorphism check. + protected void assertContains(Statement statement) + throws RepositoryException { + Assert.assertTrue("Cannot find statement " + statement + " in model.", + conn.hasStatement( + statement.getSubject() instanceof BNode ? null + : statement.getSubject(), statement + .getPredicate(), + statement.getObject() instanceof BNode ? null + : statement.getObject(), false)); + } + + /** + * Assert that the model contains the statement <code>(s p l)</code> where + * <code>l</code> is a literal. + * + * @param s + * subject. + * @param p + * predicate. + * @param l + * literal content. + * @throws RepositoryException + */ + protected void assertContains(Resource s, URI p, String l) + throws RepositoryException { + assertContains(s, p, RDFUtils.literal(l)); + } + + /** + * Assert that the model contains the statement <code>(s p l)</code> where + * <code>l</code> is a language literal. + * + * @param s + * subject. + * @param p + * predicate. + * @param l + * literal content. + * @param lang + * literal language. + * @throws RepositoryException + */ + protected void assertContains(Resource s, URI p, String l, String lang) + throws RepositoryException { + assertContains(s, p, RDFUtils.literal(l, lang)); + } + + /** + * Returns all statements matching the pattern <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @return list of statements. + * @throws RepositoryException + */ + protected RepositoryResult<Statement> getStatements(Resource s, URI p, + Value o) throws RepositoryException { + return conn.getStatements(s, p, o, false); + } + + /** + * Counts all statements matching the pattern <code>(s p o)</code>. + * + * @param s + * subject. + * @param p + * predicate. + * @param o + * object. + * @return number of matches. + * @throws RepositoryException + */ + protected int getStatementsSize(Resource s, URI p, Value o) + throws RepositoryException { + RepositoryResult<Statement> result = getStatements(s, p, o); + int count = 0; + try { + while (result.hasNext()) { + result.next(); + count++; + } + } finally { + result.close(); + } + return count; + } + + private String getFailedExtractionMessage() throws RepositoryException { + return "Assertion failed! Extracted triples:\n" + dumpModelToTurtle(); + } } \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java index b30840c..a49d680 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorTest.java @@ -28,23 +28,23 @@ import org.openrdf.repository.RepositoryException; */ public class EmbeddedJSONLDExtractorTest extends AbstractExtractorTestCase { - @Test - public void testEmbeddedJSONLDInHead() throws RepositoryException { - assertExtract("/html/html-embedded-jsonld-extractor.html"); - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 7); - } - - @Test - public void testSeveralEmbeddedJSONLDInHead() throws RepositoryException { - assertExtract("/html/html-embedded-jsonld-extractor.html"); - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 7); - } + @Test + public void testEmbeddedJSONLDInHead() throws Exception { + assertExtract("/html/html-embedded-jsonld-extractor.html"); + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 3); + } - @Override - protected ExtractorFactory<?> getExtractorFactory() { - return new EmbeddedJSONLDExtractorFactory(); - } + @Test + public void testSeveralEmbeddedJSONLDInHead() throws Exception { + assertExtract("/html/html-embedded-jsonld-extractor-multiple.html"); + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 7); + } + + @Override + protected ExtractorFactory<?> getExtractorFactory() { + return new EmbeddedJSONLDExtractorFactory(); + } } http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java index 2666821..56d4fa1 100644 --- a/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java @@ -43,351 +43,389 @@ import java.io.IOException; */ public class HCalendarExtractorTest extends AbstractExtractorTestCase { - private static final ICAL vICAL = ICAL.getInstance(); - private static final SINDICE vSINDICE = SINDICE.getInstance(); - - private final static URI vcal = vICAL.Vcalendar; - private final static URI vevent = vICAL.Vevent; - private final static URI vjournal = vICAL.Vjournal; - private final static URI vtodo = vICAL.Vtodo; - - protected ExtractorFactory<?> getExtractorFactory() { - return new HCalendarExtractorFactory(); - } - - @Test - public void testOneVEvent() throws RepositoryException { - assertExtract("/microformats/hcalendar/example1.html"); - assertModelNotEmpty(); - assertContains(baseURI, RDF.TYPE, vcal); - assertContains(null, RDF.TYPE, vevent); - RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent); - try { - while (result.hasNext()) { - Statement statement = result.next(); - final Resource subject = statement.getSubject(); - assertContains(null, vICAL.component, subject); - assertContains(subject, RDF.TYPE, vevent); - assertContains(subject, vICAL.dtstart, "1997-09-05T18:00:00.000Z"); - assertContains(subject, vICAL.dtstamp, "1997-09-01T13:00:00.000Z"); - assertContains(subject, vICAL.dtend, "1997-09-03T19:00:00.000Z"); - assertContains(subject, vICAL.uid, "[email protected]"); - assertContains(subject, vICAL.summary, "Annual Employee Review"); - assertContains(subject, vICAL.class_, "private"); - assertContains(subject, vICAL.categories, "Business"); - assertContains(subject, vICAL.categories, "Human Resources"); - } - } finally { - result.close(); - } - } - - @Test - public void testTransparentEvent() throws RepositoryException { - assertExtract("/microformats/hcalendar/example2.html"); - assertModelNotEmpty(); - assertContains(baseURI, RDF.TYPE, vcal); - assertContains(null, RDF.TYPE, vevent); - RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent); - try { - while (result.hasNext()) { - Statement statement = result.next(); - final Resource subject = statement.getSubject(); - assertContains(null, vICAL.component, subject); - assertContains(subject, RDF.TYPE, vevent); - assertContains(subject, vICAL.dtstart, "1997-04-03T18:00:00.000Z"); - assertContains(subject, vICAL.dtstamp, "1997-09-01T13:00:00.000Z"); - assertContains(subject, vICAL.dtend, "1997-04-02T01:00:00.000Z"); - assertContains(subject, vICAL.uid, "[email protected]"); - assertContains(subject, vICAL.summary, "Laurel is in sensitivity awareness class."); - assertContains(subject, vICAL.class_, "public"); - assertContains(subject, vICAL.transp, "transparent"); - assertContains(subject, vICAL.categories, "Business"); - assertContains(subject, vICAL.categories, "Human Resources"); - } - } finally { - result.close(); - } - } - - @Test - public void testRepetitiveEvent() throws RepositoryException { - assertExtract("/microformats/hcalendar/example3.html"); - assertModelNotEmpty(); - assertContains(baseURI, RDF.TYPE, vcal); - assertContains(null, RDF.TYPE, vevent); - RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent); - try { - while (result.hasNext()) { - Statement statement = result.next(); - final Resource subject = statement.getSubject(); - assertContains(null, vICAL.component, subject); - assertContains(subject, RDF.TYPE, vevent); - assertContains(subject, vICAL.dtstart, "19971102"); - assertContains(subject, vICAL.dtstamp, "1997-09-01T13:00:00.000Z"); - assertContains(subject, vICAL.uid, "[email protected]"); - assertContains(subject, vICAL.summary, "Our Blissful Anniversary"); - assertContains(subject, vICAL.class_, "confidential"); - assertContains(subject, vICAL.categories, "Anniversary"); - assertContains(subject, vICAL.categories, "Personal"); - assertContains(subject, vICAL.categories, "Special Occassion"); - assertContains(subject, vICAL.rrule, (Value) null); - } - } finally { - result.close(); - } - } - - @Test - public void testThreeDayEvent() throws RepositoryException { - assertExtract("/microformats/hcalendar/example5.html"); - assertModelNotEmpty(); - assertContains(baseURI, RDF.TYPE, vcal); - assertContains(null, RDF.TYPE, vevent); - RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, vevent); - try { - while (result.hasNext()) { - Statement statement = result.next(); - final Resource subject = statement.getSubject(); - assertContains(null, vICAL.component, subject); - assertContains(subject, RDF.TYPE, vevent); - assertContains(subject, vICAL.dtstart, "1996-09-20T16:00:00.000Z"); - assertContains(subject, vICAL.dtstamp, "1996-07-04T12:00:00.000Z"); - assertContains(subject, vICAL.dtend, "1996-09-20T22:00:00.000Z"); - assertContains(subject, vICAL.uid, "[email protected]"); - assertContains(subject, vICAL.summary, "Networld+Interop Conference"); - assertContains(subject, vICAL.description, "Networld+Interop Conference and Exhibit Atlanta World Congress\n" + - " Center Atlanta, Georgia"); - assertContains(subject, vICAL.categories, "Conference"); - assertContains(subject, vICAL.status, "CONFIRMED"); - assertContains(subject, vICAL.organizer, (Value) null); - } - } finally { - result.close(); - } - } - - @Test - public void testHCalendarWithBudyInfo() throws RepositoryException { - assertExtract("/microformats/hcalendar/example5.5.html"); - assertModelNotEmpty(); - assertContains(baseURI, RDF.TYPE, vcal); - assertContains(null, RDF.TYPE, vjournal); - } - - @Test - public void test01() throws RepositoryException { - assertDefault("/microformats/hcalendar/01-component-vevent-dtstart-date.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "19970903"); - } - - @Test - public void test02() throws RepositoryException { - assertDefault("/microformats/hcalendar/02-component-vevent-dtstart-datetime.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "1997-09-05T18:00:00.000Z"); - } - - @Test - public void test03() throws RepositoryException { - assertDefault("/microformats/hcalendar/03-component-vevent-dtend-date.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "19970903"); - assertContains(event, vICAL.dtend, "19970904"); - } - - @Test - public void test04() throws RepositoryException { - assertDefault("/microformats/hcalendar/04-component-vevent-dtend-datetime.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "1997-09-03T16:00:00.000Z"); - assertContains(event, vICAL.dtend, "1997-09-03T18:00:00.000Z"); - } - - @Test - public void test05() throws RepositoryException { - assertDefault("/microformats/hcalendar/05-calendar-simple.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "2005-10-05"); - assertContains(event, vICAL.dtend, "2005-10-08"); - assertContains(event, vICAL.summary, "Web 2.0 Conference"); - assertContains(event, vICAL.url, RDFUtils.uri("http://www.web2con.com/")); - assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA"); - } - - @Test - public void test06() throws RepositoryException { - assertDefault("/microformats/hcalendar/06-component-vevent-uri-relative.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "20060115T000000"); - assertContains(event, vICAL.summary, "Bad Movie Night - Gigli (blame mike spiegelman)"); - assertContains(event, vICAL.url, RDFUtils.uri(baseURI + "squidlist/calendar/12279/2006/1/15")); - } - - @Test - public void test07() throws RepositoryException { - assertDefault("/microformats/hcalendar/07-component-vevent-description-simple.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.description, "Project xyz Review Meeting Minutes"); - assertNotContains(event, vICAL.url, (Resource) null); - } - - @Test - public void test08() throws RepositoryException { - assertDefault("/microformats/hcalendar/08-component-vevent-multiple-classes.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "2005-10-05"); - assertContains(event, vICAL.dtend, "2005-10-08"); - assertContains(event, vICAL.summary, "Web 2.0 Conference"); - assertContains(event, vICAL.url, RDFUtils.uri("http://www.web2con.com/")); - assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA"); - } - - @Test - public void test09() throws RepositoryException { - assertDefault("/microformats/hcalendar/09-component-vevent-summary-in-img-alt.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtend, "20060310"); - assertContains(event, vICAL.dtstart, "20060306"); - assertContains(event, vICAL.summary, "O'Reilly Emerging Technology Conference"); - assertContains(event, vICAL.url, RDFUtils.uri("http://conferences.oreillynet.com/et2006/")); - assertContains(event, vICAL.location, "Manchester Grand Hyatt in San Diego, CA"); - } - - @Test - public void test10() throws RepositoryException { - assertDefault("/microformats/hcalendar/10-component-vevent-entity.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.summary, "Cricket & Tennis Centre"); - assertContains(event, vICAL.description, "Melbourne's Cricket & Tennis Centres are in the heart of the city"); - } - - @Test - public void test11() throws RepositoryException { - assertDefault("/microformats/hcalendar/11-component-vevent-summary-in-subelements.html"); - Resource event = getExactlyOneComponent(vevent); - - assertContains(event, vICAL.dtstart, "20051005T1630-0700"); - assertContains(event, vICAL.dtend, "20051005T1645-0700"); - assertContains(event, vICAL.summary, "Welcome!\n John Battelle,\n Tim O'Reilly"); - } - - @Test - public void test12() throws RepositoryException { - assertDefault("/microformats/hcalendar/12-component-vevent-summary-url-in-same-class.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "20060125T000000"); - assertContains(event, vICAL.url, RDFUtils.uri("http://www.laughingsquid.com/squidlist/calendar/12377/2006/1/25")); - assertContains(event, vICAL.summary, "Art Reception for Tom Schultz and Felix Macnee"); - } - - @Test - public void test13() throws RepositoryException { - assertDefault("/microformats/hcalendar/13-component-vevent-summary-url-property.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.url, RDFUtils.uri( - "http://dps1.travelocity.com/dparcobrand.ctl?smls=Y&Service=YHOE&.intl=us&aln_name=AA&flt_num=" + - "1655&dep_arp_name=&arr_arp_name=&dep_dt_dy_1=23&dep_dt_mn_1=Jan&dep_dt_yr_1=2006&dep_tm_1=9:00am") - ); - assertContains(event, vICAL.summary, "ORD-SFO/AA 1655"); - } - - @Test - public void test15() throws RepositoryException { - assertDefault("/microformats/hcalendar/15-calendar-xml-lang.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "2005-10-05"); - assertContains(event, vICAL.dtend, "2005-10-08"); - assertContains(event, vICAL.summary, "Web 2.0 Conference"); - assertContains(event, vICAL.url, RDFUtils.uri("http://www.web2con.com/")); - assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA"); - } - - @Test - public void test16() throws RepositoryException { - assertDefault("/microformats/hcalendar/16-calendar-force-outlook.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "2005-10-05"); - assertContains(event, vICAL.dtend, "2005-10-08"); - assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA"); - } - - @Test - public void test17() throws RepositoryException { - assertDefault("/microformats/hcalendar/17-component-vevent-description-value-in-subelements.html"); - Resource event = getExactlyOneComponent(vevent); - assertContains(event, vICAL.dtstart, "2006-01-18"); - assertContains(event, vICAL.dtend, "2006-01-20"); - assertContains(event, vICAL.location, "Maryland"); - assertContains(event, vICAL.summary, "3rd PAW ftf meeting"); - assertContains(event, vICAL.description, - "RESOLUTION: to have a\n 3rd PAW ftf meeting \n" + - " 18-19 Jan in \n Maryland; location contingent" + - " on confirmation from timbl" - ); - } - - @Test - public void test18() throws RepositoryException { - assertDefault("/microformats/hcalendar/18-component-vevent-uid.html"); - assertStatementsSize(RDF.TYPE, vevent, 5); - assertStatementsSize(vICAL.uid, RDFUtils.literal("http://example.com/foo.html"), 5); - } - - @Test - public void testNoMicroformats() throws RepositoryException, IOException, ExtractionException { - extract("/html/html-without-uf.html"); - assertModelEmpty(); - } - - @Test - public void testNoMicroformatsInStatCvsPage() throws RepositoryException, IOException, ExtractionException { - extract("/microformats/hcalendar/empty-statcvs.html"); - assertModelEmpty(); - } - - @Test - public void testFullHCalendarClass() throws RepositoryException { - assertExtract("/microformats/hcalendar/example5.3.html"); - assertModelNotEmpty(); - assertContains(baseURI, RDF.TYPE, vcal); - assertContains(null, RDF.TYPE, vevent); - } - - @Test - public void testHCalendarClassWithTodo() throws RepositoryException { - assertExtract("/microformats/hcalendar/example5.4.html"); - assertModelNotEmpty(); - assertContains(baseURI, RDF.TYPE, vcal); - assertContains(null, RDF.TYPE, vtodo); - } - - @Test - public void testHCalendarClassWithJournal() throws RepositoryException { - assertExtract("/microformats/hcalendar/example5.5.html"); - assertModelNotEmpty(); - assertContains(baseURI, RDF.TYPE, vcal); - assertContains(null, RDF.TYPE, vjournal); - } - - private Resource getExactlyOneComponent(Resource r) throws RepositoryException { - RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, r); - try { - Assert.assertTrue(result.hasNext()); - Resource sub = result.next().getSubject(); - Assert.assertFalse(result.hasNext()); - return sub; - } finally { - result.close(); - } - } - - private void assertDefault(String name) throws RepositoryException { - assertExtract(name); - assertModelNotEmpty(); - assertContains(baseURI, RDF.TYPE, vcal); - assertStatementsSize(RDF.TYPE, vcal, 1); - } + private static final ICAL vICAL = ICAL.getInstance(); + private static final SINDICE vSINDICE = SINDICE.getInstance(); + + private final static URI vcal = vICAL.Vcalendar; + private final static URI vevent = vICAL.Vevent; + private final static URI vjournal = vICAL.Vjournal; + private final static URI vtodo = vICAL.Vtodo; + + protected ExtractorFactory<?> getExtractorFactory() { + return new HCalendarExtractorFactory(); + } + + @Test + public void testOneVEvent() throws Exception { + assertExtract("/microformats/hcalendar/example1.html"); + assertModelNotEmpty(); + assertContains(baseURI, RDF.TYPE, vcal); + assertContains(null, RDF.TYPE, vevent); + RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, + vevent); + try { + while (result.hasNext()) { + Statement statement = result.next(); + final Resource subject = statement.getSubject(); + assertContains(null, vICAL.component, subject); + assertContains(subject, RDF.TYPE, vevent); + assertContains(subject, vICAL.dtstart, + "1997-09-05T18:00:00.000Z"); + assertContains(subject, vICAL.dtstamp, + "1997-09-01T13:00:00.000Z"); + assertContains(subject, vICAL.dtend, "1997-09-03T19:00:00.000Z"); + assertContains(subject, vICAL.uid, + "[email protected]"); + assertContains(subject, vICAL.summary, "Annual Employee Review"); + assertContains(subject, vICAL.class_, "private"); + assertContains(subject, vICAL.categories, "Business"); + assertContains(subject, vICAL.categories, "Human Resources"); + } + } finally { + result.close(); + } + } + + @Test + public void testTransparentEvent() throws Exception { + assertExtract("/microformats/hcalendar/example2.html"); + assertModelNotEmpty(); + assertContains(baseURI, RDF.TYPE, vcal); + assertContains(null, RDF.TYPE, vevent); + RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, + vevent); + try { + while (result.hasNext()) { + Statement statement = result.next(); + final Resource subject = statement.getSubject(); + assertContains(null, vICAL.component, subject); + assertContains(subject, RDF.TYPE, vevent); + assertContains(subject, vICAL.dtstart, + "1997-04-03T18:00:00.000Z"); + assertContains(subject, vICAL.dtstamp, + "1997-09-01T13:00:00.000Z"); + assertContains(subject, vICAL.dtend, "1997-04-02T01:00:00.000Z"); + assertContains(subject, vICAL.uid, + "[email protected]"); + assertContains(subject, vICAL.summary, + "Laurel is in sensitivity awareness class."); + assertContains(subject, vICAL.class_, "public"); + assertContains(subject, vICAL.transp, "transparent"); + assertContains(subject, vICAL.categories, "Business"); + assertContains(subject, vICAL.categories, "Human Resources"); + } + } finally { + result.close(); + } + } + + @Test + public void testRepetitiveEvent() throws Exception { + assertExtract("/microformats/hcalendar/example3.html"); + assertModelNotEmpty(); + assertContains(baseURI, RDF.TYPE, vcal); + assertContains(null, RDF.TYPE, vevent); + RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, + vevent); + try { + while (result.hasNext()) { + Statement statement = result.next(); + final Resource subject = statement.getSubject(); + assertContains(null, vICAL.component, subject); + assertContains(subject, RDF.TYPE, vevent); + assertContains(subject, vICAL.dtstart, "19971102"); + assertContains(subject, vICAL.dtstamp, + "1997-09-01T13:00:00.000Z"); + assertContains(subject, vICAL.uid, + "[email protected]"); + assertContains(subject, vICAL.summary, + "Our Blissful Anniversary"); + assertContains(subject, vICAL.class_, "confidential"); + assertContains(subject, vICAL.categories, "Anniversary"); + assertContains(subject, vICAL.categories, "Personal"); + assertContains(subject, vICAL.categories, "Special Occassion"); + assertContains(subject, vICAL.rrule, (Value) null); + } + } finally { + result.close(); + } + } + + @Test + public void testThreeDayEvent() throws Exception { + assertExtract("/microformats/hcalendar/example5.html"); + assertModelNotEmpty(); + assertContains(baseURI, RDF.TYPE, vcal); + assertContains(null, RDF.TYPE, vevent); + RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, + vevent); + try { + while (result.hasNext()) { + Statement statement = result.next(); + final Resource subject = statement.getSubject(); + assertContains(null, vICAL.component, subject); + assertContains(subject, RDF.TYPE, vevent); + assertContains(subject, vICAL.dtstart, + "1996-09-20T16:00:00.000Z"); + assertContains(subject, vICAL.dtstamp, + "1996-07-04T12:00:00.000Z"); + assertContains(subject, vICAL.dtend, "1996-09-20T22:00:00.000Z"); + assertContains(subject, vICAL.uid, "[email protected]"); + assertContains(subject, vICAL.summary, + "Networld+Interop Conference"); + assertContains(subject, vICAL.description, + "Networld+Interop Conference and Exhibit Atlanta World Congress\n" + + " Center Atlanta, Georgia"); + assertContains(subject, vICAL.categories, "Conference"); + assertContains(subject, vICAL.status, "CONFIRMED"); + assertContains(subject, vICAL.organizer, (Value) null); + } + } finally { + result.close(); + } + } + + @Test + public void testHCalendarWithBudyInfo() throws Exception { + assertExtract("/microformats/hcalendar/example5.5.html"); + assertModelNotEmpty(); + assertContains(baseURI, RDF.TYPE, vcal); + assertContains(null, RDF.TYPE, vjournal); + } + + @Test + public void test01() throws Exception { + assertDefault("/microformats/hcalendar/01-component-vevent-dtstart-date.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "19970903"); + } + + @Test + public void test02() throws Exception { + assertDefault("/microformats/hcalendar/02-component-vevent-dtstart-datetime.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "1997-09-05T18:00:00.000Z"); + } + + @Test + public void test03() throws Exception { + assertDefault("/microformats/hcalendar/03-component-vevent-dtend-date.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "19970903"); + assertContains(event, vICAL.dtend, "19970904"); + } + + @Test + public void test04() throws Exception { + assertDefault("/microformats/hcalendar/04-component-vevent-dtend-datetime.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "1997-09-03T16:00:00.000Z"); + assertContains(event, vICAL.dtend, "1997-09-03T18:00:00.000Z"); + } + + @Test + public void test05() throws Exception { + assertDefault("/microformats/hcalendar/05-calendar-simple.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "2005-10-05"); + assertContains(event, vICAL.dtend, "2005-10-08"); + assertContains(event, vICAL.summary, "Web 2.0 Conference"); + assertContains(event, vICAL.url, + RDFUtils.uri("http://www.web2con.com/")); + assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA"); + } + + @Test + public void test06() throws Exception { + assertDefault("/microformats/hcalendar/06-component-vevent-uri-relative.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "20060115T000000"); + assertContains(event, vICAL.summary, + "Bad Movie Night - Gigli (blame mike spiegelman)"); + assertContains(event, vICAL.url, + RDFUtils.uri(baseURI + "squidlist/calendar/12279/2006/1/15")); + } + + @Test + public void test07() throws Exception { + assertDefault("/microformats/hcalendar/07-component-vevent-description-simple.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.description, + "Project xyz Review Meeting Minutes"); + assertNotContains(event, vICAL.url, (Resource) null); + } + + @Test + public void test08() throws Exception { + assertDefault("/microformats/hcalendar/08-component-vevent-multiple-classes.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "2005-10-05"); + assertContains(event, vICAL.dtend, "2005-10-08"); + assertContains(event, vICAL.summary, "Web 2.0 Conference"); + assertContains(event, vICAL.url, + RDFUtils.uri("http://www.web2con.com/")); + assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA"); + } + + @Test + public void test09() throws Exception { + assertDefault("/microformats/hcalendar/09-component-vevent-summary-in-img-alt.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtend, "20060310"); + assertContains(event, vICAL.dtstart, "20060306"); + assertContains(event, vICAL.summary, + "O'Reilly Emerging Technology Conference"); + assertContains(event, vICAL.url, + RDFUtils.uri("http://conferences.oreillynet.com/et2006/")); + assertContains(event, vICAL.location, + "Manchester Grand Hyatt in San Diego, CA"); + } + + @Test + public void test10() throws Exception { + assertDefault("/microformats/hcalendar/10-component-vevent-entity.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.summary, "Cricket & Tennis Centre"); + assertContains(event, vICAL.description, + "Melbourne's Cricket & Tennis Centres are in the heart of the city"); + } + + @Test + public void test11() throws Exception { + assertDefault("/microformats/hcalendar/11-component-vevent-summary-in-subelements.html"); + Resource event = getExactlyOneComponent(vevent); + + assertContains(event, vICAL.dtstart, "20051005T1630-0700"); + assertContains(event, vICAL.dtend, "20051005T1645-0700"); + assertContains(event, vICAL.summary, + "Welcome!\n John Battelle,\n Tim O'Reilly"); + } + + @Test + public void test12() throws Exception { + assertDefault("/microformats/hcalendar/12-component-vevent-summary-url-in-same-class.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "20060125T000000"); + assertContains( + event, + vICAL.url, + RDFUtils.uri("http://www.laughingsquid.com/squidlist/calendar/12377/2006/1/25")); + assertContains(event, vICAL.summary, + "Art Reception for Tom Schultz and Felix Macnee"); + } + + @Test + public void test13() throws Exception { + assertDefault("/microformats/hcalendar/13-component-vevent-summary-url-property.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains( + event, + vICAL.url, + RDFUtils.uri("http://dps1.travelocity.com/dparcobrand.ctl?smls=Y&Service=YHOE&.intl=us&aln_name=AA&flt_num=" + + "1655&dep_arp_name=&arr_arp_name=&dep_dt_dy_1=23&dep_dt_mn_1=Jan&dep_dt_yr_1=2006&dep_tm_1=9:00am")); + assertContains(event, vICAL.summary, "ORD-SFO/AA 1655"); + } + + @Test + public void test15() throws Exception { + assertDefault("/microformats/hcalendar/15-calendar-xml-lang.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "2005-10-05"); + assertContains(event, vICAL.dtend, "2005-10-08"); + assertContains(event, vICAL.summary, "Web 2.0 Conference"); + assertContains(event, vICAL.url, + RDFUtils.uri("http://www.web2con.com/")); + assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA"); + } + + @Test + public void test16() throws Exception { + assertDefault("/microformats/hcalendar/16-calendar-force-outlook.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "2005-10-05"); + assertContains(event, vICAL.dtend, "2005-10-08"); + assertContains(event, vICAL.location, "Argent Hotel, San Francisco, CA"); + } + + @Test + public void test17() throws Exception { + assertDefault("/microformats/hcalendar/17-component-vevent-description-value-in-subelements.html"); + Resource event = getExactlyOneComponent(vevent); + assertContains(event, vICAL.dtstart, "2006-01-18"); + assertContains(event, vICAL.dtend, "2006-01-20"); + assertContains(event, vICAL.location, "Maryland"); + assertContains(event, vICAL.summary, "3rd PAW ftf meeting"); + assertContains( + event, + vICAL.description, + "RESOLUTION: to have a\n 3rd PAW ftf meeting \n" + + " 18-19 Jan in \n Maryland; location contingent" + + " on confirmation from timbl"); + } + + @Test + public void test18() throws Exception { + assertDefault("/microformats/hcalendar/18-component-vevent-uid.html"); + assertStatementsSize(RDF.TYPE, vevent, 5); + assertStatementsSize(vICAL.uid, + RDFUtils.literal("http://example.com/foo.html"), 5); + } + + @Test + public void testNoMicroformats() throws Exception, IOException, + ExtractionException { + extract("/html/html-without-uf.html"); + assertModelEmpty(); + } + + @Test + public void testNoMicroformatsInStatCvsPage() throws Exception, + IOException, ExtractionException { + extract("/microformats/hcalendar/empty-statcvs.html"); + assertModelEmpty(); + } + + @Test + public void testFullHCalendarClass() throws Exception { + assertExtract("/microformats/hcalendar/example5.3.html"); + assertModelNotEmpty(); + assertContains(baseURI, RDF.TYPE, vcal); + assertContains(null, RDF.TYPE, vevent); + } + + @Test + public void testHCalendarClassWithTodo() throws Exception { + assertExtract("/microformats/hcalendar/example5.4.html"); + assertModelNotEmpty(); + assertContains(baseURI, RDF.TYPE, vcal); + assertContains(null, RDF.TYPE, vtodo); + } + + @Test + public void testHCalendarClassWithJournal() throws Exception { + assertExtract("/microformats/hcalendar/example5.5.html"); + assertModelNotEmpty(); + assertContains(baseURI, RDF.TYPE, vcal); + assertContains(null, RDF.TYPE, vjournal); + } + + private Resource getExactlyOneComponent(Resource r) throws Exception { + RepositoryResult<Statement> result = getStatements(null, RDF.TYPE, r); + try { + Assert.assertTrue(result.hasNext()); + Resource sub = result.next().getSubject(); + Assert.assertFalse(result.hasNext()); + return sub; + } finally { + result.close(); + } + } + + private void assertDefault(String name) throws Exception { + assertExtract(name); + assertModelNotEmpty(); + assertContains(baseURI, RDF.TYPE, vcal); + assertStatementsSize(RDF.TYPE, vcal, 1); + } }
