ANY23-226 : Make JSONLD extraction work Also make services work in eclipse using M2E.
@MetaInfServices output isn't recognised by M2E, and M2E insist it isn't their issue. Project: http://git-wip-us.apache.org/repos/asf/any23/repo Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/fd822849 Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/fd822849 Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/fd822849 Branch: refs/heads/master Commit: fd822849190240b8cf981ecc7abd0b4f592381d5 Parents: 1e3eb9c Author: Peter Ansell <[email protected]> Authored: Sat Mar 21 15:49:33 2015 +1100 Committer: Peter Ansell <[email protected]> Committed: Sat Mar 21 15:54:15 2015 +1100 ---------------------------------------------------------------------- core/pom.xml | 9 - .../any23/cli/ExtractorDocumentation.java | 2 - .../org/apache/any23/cli/MicrodataParser.java | 2 - .../java/org/apache/any23/cli/MimeDetector.java | 2 - .../org/apache/any23/cli/PluginVerifier.java | 2 - .../main/java/org/apache/any23/cli/Rover.java | 2 - .../java/org/apache/any23/cli/VocabPrinter.java | 2 - .../extractor/csv/CSVExtractorFactory.java | 2 - .../extractor/html/AdrExtractorFactory.java | 2 - .../extractor/html/EmbeddedJSONLDExtractor.java | 398 ++-- .../html/EmbeddedJSONLDExtractorFactory.java | 2 - .../extractor/html/GeoExtractorFactory.java | 2 - .../html/HCalendarExtractorFactory.java | 2 - .../extractor/html/HCardExtractorFactory.java | 2 - .../html/HListingExtractorFactory.java | 2 - .../extractor/html/HRecipeExtractorFactory.java | 2 - .../extractor/html/HResumeExtractorFactory.java | 2 - .../html/HReviewAggregateExtractorFactory.java | 2 - .../extractor/html/HReviewExtractorFactory.java | 2 - .../html/HTMLMetaExtractorFactory.java | 2 - .../html/HeadLinkExtractorFactory.java | 2 - .../extractor/html/ICBMExtractorFactory.java | 2 - .../extractor/html/LicenseExtractorFactory.java | 2 - .../extractor/html/SpeciesExtractorFactory.java | 2 - .../extractor/html/TitleExtractorFactory.java | 2 - .../html/TurtleHTMLExtractorFactory.java | 2 - .../extractor/html/XFNExtractorFactory.java | 2 - .../microdata/MicrodataExtractorFactory.java | 2 - .../any23/extractor/rdf/BaseRDFExtractor.java | 2 +- .../extractor/rdf/JSONLDExtractorFactory.java | 2 - .../extractor/rdf/NQuadsExtractorFactory.java | 2 - .../extractor/rdf/NTriplesExtractorFactory.java | 2 - .../extractor/rdf/RDFXMLExtractorFactory.java | 2 - .../extractor/rdf/TriXExtractorFactory.java | 2 - .../extractor/rdf/TurtleExtractorFactory.java | 2 - .../extractor/rdfa/RDFa11ExtractorFactory.java | 2 - .../extractor/rdfa/RDFaExtractorFactory.java | 2 - .../extractor/xpath/XPathExtractorFactory.java | 2 - .../apache/any23/writer/JSONWriterFactory.java | 2 - .../any23/writer/NQuadsWriterFactory.java | 2 - .../any23/writer/NTriplesWriterFactory.java | 2 - .../any23/writer/RDFXMLWriterFactory.java | 2 - .../apache/any23/writer/TriXWriterFactory.java | 2 - .../any23/writer/TurtleWriterFactory.java | 2 - .../any23/writer/URIListWriterFactory.java | 2 - .../META-INF/services/org.apache.any23.cli.Tool | 6 + .../org.apache.any23.extractor.ExtractorFactory | 28 + .../org.apache.any23.writer.WriterFactory | 7 + .../any23/extractor/csv/CSVExtractorTest.java | 164 +- .../example/ExampleExtractorFactory.java | 2 - .../html/AbstractExtractorTestCase.java | 1459 +++++++------- .../html/EmbeddedJSONLDExtractorTest.java | 34 +- .../extractor/html/HCalendarExtractorTest.java | 730 +++---- .../extractor/html/HCardExtractorTest.java | 1872 +++++++++--------- .../extractor/html/HListingExtractorTest.java | 600 +++--- .../extractor/html/HRecipeExtractorTest.java | 56 +- .../extractor/html/HResumeExtractorTest.java | 244 ++- .../extractor/html/HReviewExtractorTest.java | 539 ++--- .../extractor/html/HTMLMetaExtractorTest.java | 95 +- .../any23/extractor/html/RDFMergerTest.java | 920 ++++----- .../extractor/html/SpeciesExtractorTest.java | 2 +- .../extractor/html/TurtleHTMLExtractorTest.java | 2 +- .../extractor/rdf/JSONLDExtractorTest.java | 8 - .../rdfa/AbstractRDFaExtractorTestCase.java | 243 ++- .../any23/io/nquads/NQuadsParserFactory.java | 2 - .../any23/io/nquads/NQuadsWriterFactory.java | 2 - .../main/java/org/apache/any23/cli/Crawler.java | 2 - .../META-INF/services/org.apache.any23.cli.Tool | 1 + .../htmlscraper/HTMLScraperExtractor.java | 2 - .../HTMLScraperExtractorFactory.java | 2 - .../org.apache.any23.extractor.ExtractorFactory | 1 + .../plugin/officescraper/ExcelExtractor.java | 2 - .../officescraper/ExcelExtractorFactory.java | 2 - .../org.apache.any23.extractor.ExtractorFactory | 1 + src/site/apt/any23-plugins.apt | 1 - ...html-embedded-jsonld-extractor-multiple.html | 45 + .../html/html-embedded-jsonld-extractor.html | 4 +- 77 files changed, 3833 insertions(+), 3738 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/pom.xml ---------------------------------------------------------------------- diff --git a/core/pom.xml b/core/pom.xml index 8e38051..b6a0427 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -153,15 +153,6 @@ </dependency> <!-- END: Apache Commons CSV --> - <!-- BEGIN: plugins --> - <dependency> - <groupId>org.kohsuke.metainf-services</groupId> - <artifactId>metainf-services</artifactId> - <scope>compile</scope> - <optional>true</optional> - </dependency> - <!-- END: plugins --> - <!-- BEGIN: Test Dependencies --> <dependency> <groupId>junit</groupId> http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java b/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java index 16d7b4f..eb5dd7e 100644 --- a/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java +++ b/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java @@ -28,7 +28,6 @@ import org.apache.any23.extractor.Extractor.ContentExtractor; import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.ExtractorRegistry; -import org.kohsuke.MetaInfServices; import java.io.IOException; import java.util.LinkedList; @@ -38,7 +37,6 @@ import java.util.List; * This class provides some command-line documentation * about available extractors and their usage. */ -@MetaInfServices @Parameters( commandNames = { "extractor" }, commandDescription= "Utility for obtaining documentation about metadata extractors.") public class ExtractorDocumentation implements Tool { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/MicrodataParser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/MicrodataParser.java b/core/src/main/java/org/apache/any23/cli/MicrodataParser.java index 3fa6348..9a593e4 100644 --- a/core/src/main/java/org/apache/any23/cli/MicrodataParser.java +++ b/core/src/main/java/org/apache/any23/cli/MicrodataParser.java @@ -27,7 +27,6 @@ import org.apache.any23.source.DocumentSource; import org.apache.any23.source.FileDocumentSource; import org.apache.any23.source.HTTPDocumentSource; import org.apache.any23.util.StreamUtils; -import org.kohsuke.MetaInfServices; import java.io.File; import java.io.InputStream; @@ -44,7 +43,6 @@ import java.util.regex.Pattern; * * @author Michele Mostarda ([email protected]) */ -@MetaInfServices @Parameters( commandNames = { "microdata" }, commandDescription = "Commandline Tool for extracting Microdata from file/HTTP source.") public class MicrodataParser implements Tool { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/MimeDetector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/MimeDetector.java b/core/src/main/java/org/apache/any23/cli/MimeDetector.java index 5684473..87148ca 100644 --- a/core/src/main/java/org/apache/any23/cli/MimeDetector.java +++ b/core/src/main/java/org/apache/any23/cli/MimeDetector.java @@ -30,7 +30,6 @@ import org.apache.any23.source.DocumentSource; import org.apache.any23.source.FileDocumentSource; import org.apache.any23.source.HTTPDocumentSource; import org.apache.any23.source.StringDocumentSource; -import org.kohsuke.MetaInfServices; import java.io.File; import java.net.URISyntaxException; @@ -44,7 +43,6 @@ import java.util.List; * * @author Michele Mostarda ([email protected]) */ -@MetaInfServices @Parameters(commandNames = { "mimes" }, commandDescription = "MIME Type Detector Tool.") public class MimeDetector implements Tool{ http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/PluginVerifier.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/PluginVerifier.java b/core/src/main/java/org/apache/any23/cli/PluginVerifier.java index 295c86f..70b72f5 100644 --- a/core/src/main/java/org/apache/any23/cli/PluginVerifier.java +++ b/core/src/main/java/org/apache/any23/cli/PluginVerifier.java @@ -25,7 +25,6 @@ import org.apache.any23.mime.MIMEType; import org.apache.any23.plugin.Any23PluginManager; import org.apache.any23.plugin.Author; import org.apache.any23.plugin.ExtractorPlugin; -import org.kohsuke.MetaInfServices; import java.io.File; import java.io.PrintStream; @@ -40,7 +39,6 @@ import java.util.List; * * @author Michele Mostarda ([email protected]) */ -@MetaInfServices @Parameters(commandNames = { "verify" }, commandDescription = "Utility for plugin management verification.") public class PluginVerifier implements Tool { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/Rover.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/Rover.java b/core/src/main/java/org/apache/any23/cli/Rover.java index 6324872..1c77860 100644 --- a/core/src/main/java/org/apache/any23/cli/Rover.java +++ b/core/src/main/java/org/apache/any23/cli/Rover.java @@ -36,7 +36,6 @@ import org.apache.any23.writer.ReportingTripleHandler; import org.apache.any23.writer.TripleHandler; import org.apache.any23.writer.TripleHandlerException; import org.apache.any23.writer.WriterFactoryRegistry; -import org.kohsuke.MetaInfServices; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -59,7 +58,6 @@ import static java.lang.String.format; * @author Richard Cyganiak ([email protected]) * @author Gabriele Renzi */ -@MetaInfServices @Parameters(commandNames = { "rover" }, commandDescription = "Any23 Command Line Tool.") public class Rover implements Tool { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/VocabPrinter.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/cli/VocabPrinter.java b/core/src/main/java/org/apache/any23/cli/VocabPrinter.java index 411b67a..70bf16f 100644 --- a/core/src/main/java/org/apache/any23/cli/VocabPrinter.java +++ b/core/src/main/java/org/apache/any23/cli/VocabPrinter.java @@ -18,7 +18,6 @@ package org.apache.any23.cli; import org.apache.any23.vocab.RDFSchemaUtils; -import org.kohsuke.MetaInfServices; import org.openrdf.rio.RDFFormat; import com.beust.jcommander.IStringConverter; @@ -30,7 +29,6 @@ import com.beust.jcommander.Parameters; * * @author Michele Mostarda ([email protected]) */ -@MetaInfServices @Parameters(commandNames = { "vocab" }, commandDescription = "Prints out the RDF Schema of the vocabularies used by Any23.") public class VocabPrinter implements Tool { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java index 5f1dc8f..822cfd2 100644 --- a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java @@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class CSVExtractorFactory extends SimpleExtractorFactory<CSVExtractor> implements ExtractorFactory<CSVExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java index be7bb7a..6584e0c 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class AdrExtractorFactory extends SimpleExtractorFactory<AdrExtractor> implements ExtractorFactory<AdrExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java index 5506a10..fbf2832 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java @@ -42,201 +42,215 @@ import java.util.Map; import java.util.Set; /** - * This extractor represents the HTML script tags used to embed blocks of data in documents. - * This way, JSON-LD content can be easily embedded in HTML by placing it in a script element - * with the type attribute set to application/ld+json - * according the <a href="http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents">JSON-LD specification</a>. + * This extractor represents the HTML script tags used to embed blocks of data + * in documents. This way, JSON-LD content can be easily embedded in HTML by + * placing it in a script element with the type attribute set to + * application/ld+json according the <a + * href="http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents" + * >JSON-LD specification</a>. * */ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor { - private static final SINDICE vSINDICE = SINDICE.getInstance(); - - private URI profile; - - private Map<String, URI> prefixes = new HashMap<String, URI>(); - - private String documentLang; - - private JSONLDExtractor extractor; - - /** - * {@inheritDoc} - */ - @Override - public void run( - ExtractionParameters extractionParameters, - ExtractionContext extractionContext, - Document in, - ExtractionResult out - ) throws IOException, ExtractionException { - profile = extractProfile(in); - documentLang = getDocumentLanguage(in); - extractLinkDefinedPrefixes(in); - - String baseProfile = vSINDICE.NS; - if(profile != null) { - baseProfile = profile.toString(); - } - - final URI documentURI = extractionContext.getDocumentURI(); - Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile, extractionParameters, extractionContext, out); - for(JSONLDScript jsonldScript : jsonldScripts) { - String lang = documentLang; - if(jsonldScript.getLang() != null) { - lang = jsonldScript.getLang(); - } - out.writeTriple( - documentURI, - jsonldScript.getName(), - new LiteralImpl(jsonldScript.getContent(), lang) - ); - } - } - - /** - * Returns the {@link Document} language if declared, <code>null</code> otherwise. - * - * @param in a instance of {@link Document}. - * @return the language declared, could be <code>null</code>. - */ - private String getDocumentLanguage(Document in) { - String lang = DomUtils.find(in, "string(/HTML/@lang)"); - if (lang.equals("")) { - return null; - } - return lang; - } - - private URI extractProfile(Document in) { - String profile = DomUtils.find(in, "string(/HTML/@profile)"); - if (profile.equals("")) { - return null; - } - return new URIImpl(profile); - } - - /** - * It extracts prefixes defined in the <i>LINK</i> meta tags. - * - * @param in - */ - private void extractLinkDefinedPrefixes(Document in) { - List<Node> linkNodes = DomUtils.findAll(in, "/HTML/HEAD/LINK"); - for(Node linkNode : linkNodes) { - NamedNodeMap attributes = linkNode.getAttributes(); - String rel = attributes.getNamedItem("rel").getTextContent(); - String href = attributes.getNamedItem("href").getTextContent(); - if(rel != null && href !=null && RDFUtils.isAbsoluteURI(href)) { - prefixes.put(rel, new URIImpl(href)); - } - } - } - - private Set<JSONLDScript> extractJSONLDScript(Document in, String baseProfile, ExtractionParameters extractionParameters, - ExtractionContext extractionContext, ExtractionResult out) throws IOException, ExtractionException { - List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT"); - Set<JSONLDScript> result = new HashSet<JSONLDScript>(); - extractor = new JSONLDExtractorFactory().createExtractor(); - for (Node jsonldNode : scriptNodes) { - NamedNodeMap attributes = jsonldNode.getAttributes(); - for (int i = 0; i < attributes.getLength(); i++) { - if (attributes.item(i).getTextContent().equalsIgnoreCase("application/ld+json")) { - extractor.run(extractionParameters, extractionContext, DomUtils.nodeToInputStream(jsonldNode), out); - } - } - Node nameAttribute = attributes.getNamedItem("name"); - Node contentAttribute = attributes.getNamedItem("content"); - if (nameAttribute == null || contentAttribute == null) { - continue; - } - String name = nameAttribute.getTextContent(); - String content = contentAttribute.getTextContent(); - String xpath = DomUtils.getXPathForNode(jsonldNode); - URI nameAsURI = getPrefixIfExists(name); - if (nameAsURI == null) { - nameAsURI = new URIImpl(baseProfile + name); - } - JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsURI, content); - result.add(jsonldScript); - } - return result; - } - - private URI getPrefixIfExists(String name) { - String[] split = name.split("\\."); - if(split.length == 2 && prefixes.containsKey(split[0])) { - return new URIImpl(prefixes.get(split[0]) + split[1]); - } - return null; - } - - @Override - public ExtractorDescription getDescription() { - return HTMLMetaExtractorFactory.getDescriptionInstance(); - } - - private class JSONLDScript { - - private String xpath; - - private URI name; - - private String lang; - - private String content; - - public JSONLDScript(String xpath, URI name, String content) { - this.xpath = xpath; - this.name = name; - this.content = content; - } - - public JSONLDScript(String xpath, URI name, String content, String lang) { - this(xpath, name, content); - this.lang = lang; - } - - public URI getName() { - return name; - } - - public void setName(URI name) { - this.name = name; - } - - public String getLang() { - return lang; - } - - public void setLang(String lang) { - this.lang = lang; - } - - public String getContent() { - return content; - } - - public void setContent(String content) { - this.content = content; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - - JSONLDScript meta = (JSONLDScript) o; - - if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) return false; - - return true; - } - - @Override - public int hashCode() { - return xpath != null ? xpath.hashCode() : 0; - } - } + private static final SINDICE vSINDICE = SINDICE.getInstance(); + + private URI profile; + + private Map<String, URI> prefixes = new HashMap<String, URI>(); + + private String documentLang; + + private JSONLDExtractor extractor; + + /** + * {@inheritDoc} + */ + @Override + public void run(ExtractionParameters extractionParameters, + ExtractionContext extractionContext, Document in, + ExtractionResult out) throws IOException, ExtractionException { + profile = extractProfile(in); + documentLang = getDocumentLanguage(in); + extractLinkDefinedPrefixes(in); + + String baseProfile = vSINDICE.NS; + if (profile != null) { + baseProfile = profile.toString(); + } + + final URI documentURI = extractionContext.getDocumentURI(); + Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile, + extractionParameters, extractionContext, out); + for (JSONLDScript jsonldScript : jsonldScripts) { + //String lang = documentLang; + //if (jsonldScript.getLang() != null) { + // lang = jsonldScript.getLang(); + //} + //out.writeTriple(documentURI, jsonldScript.getName(), + // new LiteralImpl(jsonldScript.getContent(), lang)); + } + } + + /** + * Returns the {@link Document} language if declared, <code>null</code> + * otherwise. + * + * @param in + * a instance of {@link Document}. + * @return the language declared, could be <code>null</code>. + */ + private String getDocumentLanguage(Document in) { + String lang = DomUtils.find(in, "string(/HTML/@lang)"); + if (lang.equals("")) { + return null; + } + return lang; + } + + private URI extractProfile(Document in) { + String profile = DomUtils.find(in, "string(/HTML/@profile)"); + if (profile.equals("")) { + return null; + } + return new URIImpl(profile); + } + + /** + * It extracts prefixes defined in the <i>LINK</i> meta tags. + * + * @param in + */ + private void extractLinkDefinedPrefixes(Document in) { + List<Node> linkNodes = DomUtils.findAll(in, "/HTML/HEAD/LINK"); + for (Node linkNode : linkNodes) { + NamedNodeMap attributes = linkNode.getAttributes(); + String rel = attributes.getNamedItem("rel").getTextContent(); + String href = attributes.getNamedItem("href").getTextContent(); + if (rel != null && href != null && RDFUtils.isAbsoluteURI(href)) { + prefixes.put(rel, new URIImpl(href)); + } + } + } + + private Set<JSONLDScript> extractJSONLDScript(Document in, + String baseProfile, ExtractionParameters extractionParameters, + ExtractionContext extractionContext, ExtractionResult out) + throws IOException, ExtractionException { + List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT"); + Set<JSONLDScript> result = new HashSet<JSONLDScript>(); + extractor = new JSONLDExtractorFactory().createExtractor(); + for (Node jsonldNode : scriptNodes) { + NamedNodeMap attributes = jsonldNode.getAttributes(); + for (int i = 0; i < attributes.getLength(); i++) { + if (attributes.item(i).getTextContent() + .equalsIgnoreCase("application/ld+json")) { + extractor.run(extractionParameters, extractionContext, + DomUtils.nodeToInputStream(jsonldNode + .getFirstChild()), out); + } + } + Node nameAttribute = attributes.getNamedItem("name"); + Node contentAttribute = attributes.getNamedItem("content"); + if (nameAttribute == null || contentAttribute == null) { + continue; + } + String name = nameAttribute.getTextContent(); + String content = contentAttribute.getTextContent(); + String xpath = DomUtils.getXPathForNode(jsonldNode); + URI nameAsURI = getPrefixIfExists(name); + if (nameAsURI == null) { + nameAsURI = new URIImpl(baseProfile + name); + } + JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsURI, + content); + result.add(jsonldScript); + } + return result; + } + + private URI getPrefixIfExists(String name) { + String[] split = name.split("\\."); + if (split.length == 2 && prefixes.containsKey(split[0])) { + return new URIImpl(prefixes.get(split[0]) + split[1]); + } + return null; + } + + @Override + public ExtractorDescription getDescription() { + return HTMLMetaExtractorFactory.getDescriptionInstance(); + } + + private class JSONLDScript { + + private String xpath; + + private URI name; + + private String lang; + + private String content; + + public JSONLDScript(String xpath, URI name, String content) { + this.xpath = xpath; + this.name = name; + this.content = content; + } + + public JSONLDScript(String xpath, URI name, String content, String lang) { + this(xpath, name, content); + this.lang = lang; + } + + public URI getName() { + return name; + } + + public void setName(URI name) { + this.name = name; + } + + public String getLang() { + return lang; + } + + public void setLang(String lang) { + this.lang = lang; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null) { + return false; + } + if (!(o instanceof JSONLDScript)) { + return false; + } + + JSONLDScript meta = (JSONLDScript) o; + + if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) { + return false; + } + + return true; + } + + @Override + public int hashCode() { + return xpath != null ? xpath.hashCode() : 0; + } + } } http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java index 2e7810f..714a227 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java @@ -24,12 +24,10 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * */ -@MetaInfServices(ExtractorFactory.class) public class EmbeddedJSONLDExtractorFactory extends SimpleExtractorFactory<EmbeddedJSONLDExtractor> implements ExtractorFactory<EmbeddedJSONLDExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java index eefe764..3fe1204 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class GeoExtractorFactory extends SimpleExtractorFactory<GeoExtractor> implements ExtractorFactory<GeoExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java index d0bce6a..239edf7 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class HCalendarExtractorFactory extends SimpleExtractorFactory<HCalendarExtractor> implements ExtractorFactory<HCalendarExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java index e28c83f..70c8480 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class HCardExtractorFactory extends SimpleExtractorFactory<HCardExtractor> implements ExtractorFactory<HCardExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java index 0a8d1bc..b060290 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class HListingExtractorFactory extends SimpleExtractorFactory<HListingExtractor> implements ExtractorFactory<HListingExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java index 3c1376d..9a64b49 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class HRecipeExtractorFactory extends SimpleExtractorFactory<HRecipeExtractor> implements ExtractorFactory<HRecipeExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java index 876ee1e..d8f5b48 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class HResumeExtractorFactory extends SimpleExtractorFactory<HResumeExtractor> implements ExtractorFactory<HResumeExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java index a37ce7e..8022a66 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * * @author Peter Ansell [email protected] */ -@MetaInfServices(ExtractorFactory.class) public class HReviewAggregateExtractorFactory extends SimpleExtractorFactory<HReviewAggregateExtractor> implements ExtractorFactory<HReviewAggregateExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java index 7fcf4c7..c943800 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class HReviewExtractorFactory extends SimpleExtractorFactory<HReviewExtractor> implements ExtractorFactory<HReviewExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java index 3a010ba..0d24ebd 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class HTMLMetaExtractorFactory extends SimpleExtractorFactory<HTMLMetaExtractor> implements ExtractorFactory<HTMLMetaExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java index 3586049..b9dc280 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class HeadLinkExtractorFactory extends SimpleExtractorFactory<HeadLinkExtractor> implements ExtractorFactory<HeadLinkExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java index 45066f1..7f4dd80 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class ICBMExtractorFactory extends SimpleExtractorFactory<ICBMExtractor> implements ExtractorFactory<ICBMExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java index a83aace..d8ed293 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class LicenseExtractorFactory extends SimpleExtractorFactory<LicenseExtractor> implements ExtractorFactory<LicenseExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java index ebbe45c..14b7e08 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class SpeciesExtractorFactory extends SimpleExtractorFactory<SpeciesExtractor> implements ExtractorFactory<SpeciesExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java index 1585659..ca96dd1 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class TitleExtractorFactory extends SimpleExtractorFactory<TitleExtractor> implements ExtractorFactory<TitleExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java index 0445941..0810eb9 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class TurtleHTMLExtractorFactory extends SimpleExtractorFactory<TurtleHTMLExtractor> implements ExtractorFactory<TurtleHTMLExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java index 7a7c5b1..33de0b3 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class XFNExtractorFactory extends SimpleExtractorFactory<XFNExtractor> implements ExtractorFactory<XFNExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java index 95cf208..1dca82e 100644 --- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java @@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class MicrodataExtractorFactory extends SimpleExtractorFactory<MicrodataExtractor> implements ExtractorFactory<MicrodataExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java index 052bfa9..e32ec51 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java @@ -106,7 +106,7 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor { } catch (RDFHandlerException ex) { throw new IllegalStateException("Unexpected exception.", ex); } catch (RDFParseException ex) { -// throw new ExtractionException("Error while parsing RDF document.", ex, extractionResult); + throw new ExtractionException("Error while parsing RDF document.", ex, extractionResult); } } http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java index bedd200..0c19919 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java @@ -23,12 +23,10 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * */ -@MetaInfServices(ExtractorFactory.class) public class JSONLDExtractorFactory extends SimpleExtractorFactory<JSONLDExtractor> implements ExtractorFactory<JSONLDExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java index 3231b0d..4a6c6ab 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java @@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class NQuadsExtractorFactory extends SimpleExtractorFactory<NQuadsExtractor> implements ExtractorFactory<NQuadsExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java index 40fa269..8a886f5 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java @@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class NTriplesExtractorFactory extends SimpleExtractorFactory<NTriplesExtractor> implements ExtractorFactory<NTriplesExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java index 8dc1b1f..eed9cc4 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java @@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class RDFXMLExtractorFactory extends SimpleExtractorFactory<RDFXMLExtractor> implements ExtractorFactory<RDFXMLExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java index 2bfc908..cbdb45a 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java @@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class TriXExtractorFactory extends SimpleExtractorFactory<TriXExtractor> implements ExtractorFactory<TriXExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java index e31bba1..204c2f1 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java @@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class TurtleExtractorFactory extends SimpleExtractorFactory<TurtleExtractor> implements ExtractorFactory<TurtleExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java index 9f51864..4c2ffe4 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java @@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class RDFa11ExtractorFactory extends SimpleExtractorFactory<RDFa11Extractor> implements ExtractorFactory<RDFa11Extractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java index 4523bc9..75caafb 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java @@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class RDFaExtractorFactory extends SimpleExtractorFactory<RDFaExtractor> implements ExtractorFactory<RDFaExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java index b4e20e2..e240d30 100644 --- a/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java +++ b/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java @@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices(ExtractorFactory.class) public class XPathExtractorFactory extends SimpleExtractorFactory<XPathExtractor> implements ExtractorFactory<XPathExtractor> { http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java b/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java index d5a6d03..2b08552 100644 --- a/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java +++ b/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java @@ -19,14 +19,12 @@ package org.apache.any23.writer; import java.io.OutputStream; -import org.kohsuke.MetaInfServices; import org.openrdf.rio.RDFFormat; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices public class JSONWriterFactory implements WriterFactory { public static final String MIME_TYPE = "text/json"; http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java b/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java index 85adc82..8bd0b53 100644 --- a/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java +++ b/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java @@ -19,14 +19,12 @@ package org.apache.any23.writer; import java.io.OutputStream; -import org.kohsuke.MetaInfServices; import org.openrdf.rio.RDFFormat; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices public class NQuadsWriterFactory implements WriterFactory { public static final String MIME_TYPE = RDFFormat.NQUADS.getDefaultMIMEType(); http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java b/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java index 8032174..059c91e 100644 --- a/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java +++ b/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java @@ -19,14 +19,12 @@ package org.apache.any23.writer; import java.io.OutputStream; -import org.kohsuke.MetaInfServices; import org.openrdf.rio.RDFFormat; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices public class NTriplesWriterFactory implements WriterFactory { public static final String MIME_TYPE = RDFFormat.NTRIPLES.getDefaultMIMEType(); http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java b/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java index 39c3b27..f7c2340 100644 --- a/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java +++ b/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java @@ -19,14 +19,12 @@ package org.apache.any23.writer; import java.io.OutputStream; -import org.kohsuke.MetaInfServices; import org.openrdf.rio.RDFFormat; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices public class RDFXMLWriterFactory implements WriterFactory { public static final String MIME_TYPE = RDFFormat.RDFXML.getDefaultMIMEType(); http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java b/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java index 29de9cf..5b34869 100644 --- a/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java +++ b/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java @@ -19,14 +19,12 @@ package org.apache.any23.writer; import java.io.OutputStream; -import org.kohsuke.MetaInfServices; import org.openrdf.rio.RDFFormat; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices public class TriXWriterFactory implements WriterFactory { public static final String MIME_TYPE = RDFFormat.TRIX.getDefaultMIMEType(); http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java b/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java index aa4b7b5..df6f9fa 100644 --- a/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java +++ b/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java @@ -19,14 +19,12 @@ package org.apache.any23.writer; import java.io.OutputStream; -import org.kohsuke.MetaInfServices; import org.openrdf.rio.RDFFormat; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices public class TurtleWriterFactory implements WriterFactory { public static final String MIME_TYPE = RDFFormat.TURTLE.getDefaultMIMEType(); http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java b/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java index ebdc494..9fd0c75 100644 --- a/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java +++ b/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java @@ -19,14 +19,12 @@ package org.apache.any23.writer; import java.io.OutputStream; -import org.kohsuke.MetaInfServices; import org.openrdf.rio.RDFFormat; /** * @author Peter Ansell [email protected] * */ -@MetaInfServices public class URIListWriterFactory implements WriterFactory { public static final String MIME_TYPE = "text/plain"; http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/resources/META-INF/services/org.apache.any23.cli.Tool ---------------------------------------------------------------------- diff --git a/core/src/main/resources/META-INF/services/org.apache.any23.cli.Tool b/core/src/main/resources/META-INF/services/org.apache.any23.cli.Tool new file mode 100644 index 0000000..6e5a533 --- /dev/null +++ b/core/src/main/resources/META-INF/services/org.apache.any23.cli.Tool @@ -0,0 +1,6 @@ +org.apache.any23.cli.ExtractorDocumentation +org.apache.any23.cli.MicrodataParser +org.apache.any23.cli.MimeDetector +org.apache.any23.cli.PluginVerifier +org.apache.any23.cli.Rover +org.apache.any23.cli.VocabPrinter http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory ---------------------------------------------------------------------- diff --git a/core/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory b/core/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory new file mode 100644 index 0000000..cb350f2 --- /dev/null +++ b/core/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory @@ -0,0 +1,28 @@ +org.apache.any23.extractor.csv.CSVExtractorFactory +org.apache.any23.extractor.html.AdrExtractorFactory +org.apache.any23.extractor.html.EmbeddedJSONLDExtractorFactory +org.apache.any23.extractor.html.GeoExtractorFactory +org.apache.any23.extractor.html.HCalendarExtractorFactory +org.apache.any23.extractor.html.HCardExtractorFactory +org.apache.any23.extractor.html.HeadLinkExtractorFactory +org.apache.any23.extractor.html.HListingExtractorFactory +org.apache.any23.extractor.html.HRecipeExtractorFactory +org.apache.any23.extractor.html.HResumeExtractorFactory +org.apache.any23.extractor.html.HReviewAggregateExtractorFactory +org.apache.any23.extractor.html.HReviewExtractorFactory +org.apache.any23.extractor.html.HTMLMetaExtractorFactory +org.apache.any23.extractor.html.ICBMExtractorFactory +org.apache.any23.extractor.html.LicenseExtractorFactory +org.apache.any23.extractor.html.SpeciesExtractorFactory +org.apache.any23.extractor.html.TitleExtractorFactory +org.apache.any23.extractor.html.XFNExtractorFactory +org.apache.any23.extractor.microdata.MicrodataExtractorFactory +org.apache.any23.extractor.rdf.JSONLDExtractorFactory +org.apache.any23.extractor.rdf.NQuadsExtractorFactory +org.apache.any23.extractor.rdf.NTriplesExtractorFactory +org.apache.any23.extractor.rdf.RDFXMLExtractorFactory +org.apache.any23.extractor.rdf.TriXExtractorFactory +org.apache.any23.extractor.rdf.TurtleExtractorFactory +org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory +org.apache.any23.extractor.rdfa.RDFaExtractorFactory +org.apache.any23.extractor.xpath.XPathExtractorFactory http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/resources/META-INF/services/org.apache.any23.writer.WriterFactory ---------------------------------------------------------------------- diff --git a/core/src/main/resources/META-INF/services/org.apache.any23.writer.WriterFactory b/core/src/main/resources/META-INF/services/org.apache.any23.writer.WriterFactory new file mode 100644 index 0000000..03f32cd --- /dev/null +++ b/core/src/main/resources/META-INF/services/org.apache.any23.writer.WriterFactory @@ -0,0 +1,7 @@ +org.apache.any23.writer.JSONWriterFactory +org.apache.any23.writer.NQuadsWriterFactory +org.apache.any23.writer.NTriplesWriterFactory +org.apache.any23.writer.RDFXMLWriterFactory +org.apache.any23.writer.TriXWriterFactory +org.apache.any23.writer.TurtleWriterFactory +org.apache.any23.writer.URIListWriterFactory http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java index 271b498..b985b90 100644 --- a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java +++ b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java @@ -35,81 +35,93 @@ import org.slf4j.LoggerFactory; */ public class CSVExtractorTest extends AbstractExtractorTestCase { - private static final Logger logger = LoggerFactory.getLogger(CSVExtractorTest.class); - - @Override - protected ExtractorFactory<?> getExtractorFactory() { - return new CSVExtractorFactory(); - } - - @Test - public void testExtractionCommaSeparated() throws RepositoryException { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-comma.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 28); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, new LiteralImpl("4", XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER)); - } - - @Test - public void testExtractionSemicolonSeparated() throws RepositoryException { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 28); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, new LiteralImpl("4", XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER)); - } - - @Test - public void testExtractionTabSeparated() throws RepositoryException { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-tab.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 28); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, new LiteralImpl("4", XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER)); - } - - @Test - public void testTypeManagement() throws RepositoryException { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-type.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 21); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, new LiteralImpl("2", XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER)); - assertContains(null, null, new LiteralImpl("5.2", XMLSchema.FLOAT)); - assertContains(null, null, new LiteralImpl("7.9", XMLSchema.FLOAT)); - assertContains(null, null, new LiteralImpl("10" , XMLSchema.INTEGER)); - } - - @Test - public void testExtractionEmptyValue() throws RepositoryException { - CSV csv = CSV.getInstance(); - assertExtract("/org/apache/any23/extractor/csv/test-missing.csv"); - logger.debug(dumpModelToRDFXML()); - - assertModelNotEmpty(); - assertStatementsSize(null, null, null, 25); - assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); - assertContains(null, csv.numberOfColumns, new LiteralImpl("4", XMLSchema.INTEGER)); - assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER)); - assertContains(null, null, new LiteralImpl("Michele", XMLSchema.STRING)); - assertContains(null, null, new LiteralImpl("Giovanni", XMLSchema.STRING)); - } + private static final Logger logger = LoggerFactory + .getLogger(CSVExtractorTest.class); + + @Override + protected ExtractorFactory<?> getExtractorFactory() { + return new CSVExtractorFactory(); + } + + @Test + public void testExtractionCommaSeparated() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-comma.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 28); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, new LiteralImpl("4", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, new LiteralImpl("3", + XMLSchema.INTEGER)); + } + + @Test + public void testExtractionSemicolonSeparated() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 28); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, new LiteralImpl("4", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, new LiteralImpl("3", + XMLSchema.INTEGER)); + } + + @Test + public void testExtractionTabSeparated() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-tab.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 28); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, new LiteralImpl("4", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, new LiteralImpl("3", + XMLSchema.INTEGER)); + } + + @Test + public void testTypeManagement() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-type.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 21); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, new LiteralImpl("2", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, new LiteralImpl("3", + XMLSchema.INTEGER)); + assertContains(null, null, new LiteralImpl("5.2", XMLSchema.FLOAT)); + assertContains(null, null, new LiteralImpl("7.9", XMLSchema.FLOAT)); + assertContains(null, null, new LiteralImpl("10", XMLSchema.INTEGER)); + } + + @Test + public void testExtractionEmptyValue() throws Exception { + CSV csv = CSV.getInstance(); + assertExtract("/org/apache/any23/extractor/csv/test-missing.csv"); + logger.debug(dumpModelToRDFXML()); + + assertModelNotEmpty(); + assertStatementsSize(null, null, null, 25); + assertStatementsSize(null, RDF.TYPE, csv.rowType, 3); + assertContains(null, csv.numberOfColumns, new LiteralImpl("4", + XMLSchema.INTEGER)); + assertContains(null, csv.numberOfRows, new LiteralImpl("3", + XMLSchema.INTEGER)); + assertContains(null, null, new LiteralImpl("Michele", XMLSchema.STRING)); + assertContains(null, null, + new LiteralImpl("Giovanni", XMLSchema.STRING)); + } } http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java ---------------------------------------------------------------------- diff --git a/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java b/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java index 24da686..04a6ecc 100644 --- a/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java +++ b/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java @@ -24,14 +24,12 @@ import org.apache.any23.extractor.ExtractorFactory; import org.apache.any23.extractor.SimpleExtractorFactory; import org.apache.any23.rdf.PopularPrefixes; import org.apache.any23.rdf.Prefixes; -import org.kohsuke.MetaInfServices; /** * @author Peter Ansell [email protected] * */ // NOTE: Not enabling this in META-INF/services -//@MetaInfServices(ExtractorFactory.class) public class ExampleExtractorFactory extends SimpleExtractorFactory<ExampleExtractor> implements ExtractorFactory<ExampleExtractor> {
