http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java index 5dea08f..7536304 100644 --- a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java @@ -28,13 +28,12 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.rdf.RDFUtils; import org.apache.any23.vocab.CSV; import org.apache.commons.csv.CSVParser; -import org.openrdf.model.URI; -import org.openrdf.model.Value; -import org.openrdf.model.impl.LiteralImpl; -import org.openrdf.model.impl.URIImpl; -import org.openrdf.model.vocabulary.RDF; -import org.openrdf.model.vocabulary.RDFS; -import org.openrdf.model.vocabulary.XMLSchema; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.vocabulary.RDFS; +import org.eclipse.rdf4j.model.vocabulary.XMLSchema; import java.io.IOException; import java.io.InputStream; @@ -52,7 +51,7 @@ public class CSVExtractor implements Extractor.ContentExtractor { private CSVParser csvParser; - private URI[] headerURIs; + private IRI[] headerIRIs; private CSV csv = CSV.getInstance(); @@ -71,14 +70,14 @@ public class CSVExtractor implements Extractor.ContentExtractor { InputStream in , ExtractionResult out ) throws IOException, ExtractionException { - final URI documentURI = extractionContext.getDocumentURI(); + final IRI documentIRI = extractionContext.getDocumentIRI(); // build the parser csvParser = CSVReaderBuilder.build(in); - // get the header and generate the URIs for column names + // get the header and generate the IRIs for column names String[] header = csvParser.getLine(); - headerURIs = processHeader(header, documentURI); + headerIRIs = processHeader(header, documentIRI); // write triples to describe properties writeHeaderPropertiesMetadata(header, out); @@ -86,8 +85,8 @@ public class CSVExtractor implements Extractor.ContentExtractor { String[] nextLine; int index = 0; while ((nextLine = csvParser.getLine()) != null) { - URI rowSubject = RDFUtils.uri( - documentURI.toString(), + IRI rowSubject = RDFUtils.uri( + documentIRI.toString(), "row/" + index ); // add a row type @@ -95,21 +94,21 @@ public class CSVExtractor implements Extractor.ContentExtractor { // for each row produce its statements produceRowStatements(rowSubject, nextLine, out); // link the row to the document - out.writeTriple(documentURI, csv.row, rowSubject); + out.writeTriple(documentIRI, csv.row, rowSubject); // the progressive row number out.writeTriple( rowSubject, csv.rowPosition, - new LiteralImpl(String.valueOf(index)) + SimpleValueFactory.getInstance().createLiteral(String.valueOf(index)) ); index++; } // add some CSV metadata such as the number of rows and columns addTableMetadataStatements( - documentURI, + documentIRI, out, index, - headerURIs.length + headerIRIs.length ); } @@ -151,53 +150,53 @@ public class CSVExtractor implements Extractor.ContentExtractor { */ private void writeHeaderPropertiesMetadata(String[] header, ExtractionResult out) { int index = 0; - for (URI singleHeader : headerURIs) { - if (index > headerURIs.length) { + for (IRI singleHeader : headerIRIs) { + if (index > headerIRIs.length) { break; } - if (!RDFUtils.isAbsoluteURI(header[index])) { + if (!RDFUtils.isAbsoluteIRI(header[index])) { out.writeTriple( singleHeader, RDFS.LABEL, - new LiteralImpl(header[index]) + SimpleValueFactory.getInstance().createLiteral(header[index]) ); } out.writeTriple( singleHeader, csv.columnPosition, - new LiteralImpl(String.valueOf(index), XMLSchema.INTEGER) + SimpleValueFactory.getInstance().createLiteral(String.valueOf(index), XMLSchema.INTEGER) ); index++; } } /** - * It process the first row of the file, returning a list of {@link URI}s representing - * the properties for each column. If a value of the header is an absolute <i>URI</i> + * It process the first row of the file, returning a list of {@link IRI}s representing + * the properties for each column. If a value of the header is an absolute <i>IRI</i> * then it leave it as is. Otherwise the {@link org.apache.any23.vocab.CSV} vocabulary is used. * * @param header - * @return an array of {@link URI}s identifying the column names. + * @return an array of {@link IRI}s identifying the column names. */ - private URI[] processHeader(String[] header, URI documentURI) { - URI[] result = new URI[header.length]; + private IRI[] processHeader(String[] header, IRI documentIRI) { + IRI[] result = new IRI[header.length]; int index = 0; for (String h : header) { String candidate = h.trim(); - if (RDFUtils.isAbsoluteURI(candidate)) { - result[index] = new URIImpl(candidate); + if (RDFUtils.isAbsoluteIRI(candidate)) { + result[index] = SimpleValueFactory.getInstance().createIRI(candidate); } else { - result[index] = normalize(candidate, documentURI); + result[index] = normalize(candidate, documentIRI); } index++; } return result; } - private URI normalize(String toBeNormalized, URI documentURI) { + private IRI normalize(String toBeNormalized, IRI documentIRI) { toBeNormalized = toBeNormalized.trim().toLowerCase().replace("?", "").replace("&", ""); - StringBuilder result = new StringBuilder(documentURI.toString()); + StringBuilder result = new StringBuilder(documentIRI.toString()); StringTokenizer tokenizer = new StringTokenizer(toBeNormalized, " "); while (tokenizer.hasMoreTokens()) { @@ -206,12 +205,12 @@ public class CSVExtractor implements Extractor.ContentExtractor { result.append(toUpperCase(current.charAt(0))).append(current.substring(1)); } - return new URIImpl(result.toString()); + return SimpleValueFactory.getInstance().createIRI(result.toString()); } /** * It writes on the provided {@link ExtractionResult}, the </>RDF statements</> - * representing the row <i>cell</i>. If a row <i>cell</i> is an absolute <i>URI</i> + * representing the row <i>cell</i>. If a row <i>cell</i> is an absolute <i>IRI</i> * then an object property is written, literal otherwise. * * @param rowSubject @@ -219,13 +218,13 @@ public class CSVExtractor implements Extractor.ContentExtractor { * @param out */ private void produceRowStatements( - URI rowSubject, + IRI rowSubject, String[] values, ExtractionResult out ) { int index = 0; for (String cell : values) { - if (index >= headerURIs.length) { + if (index >= headerIRIs.length) { // there are some row cells that don't have an associated column name break; } @@ -233,7 +232,7 @@ public class CSVExtractor implements Extractor.ContentExtractor { index++; continue; } - URI predicate = headerURIs[index]; + IRI predicate = headerIRIs[index]; Value object = getObjectFromCell(cell); out.writeTriple(rowSubject, predicate, object); index++; @@ -243,16 +242,16 @@ public class CSVExtractor implements Extractor.ContentExtractor { private Value getObjectFromCell(String cell) { Value object; cell = cell.trim(); - if (RDFUtils.isAbsoluteURI(cell)) { - object = new URIImpl(cell); + if (RDFUtils.isAbsoluteIRI(cell)) { + object = SimpleValueFactory.getInstance().createIRI(cell); } else { - URI datatype = XMLSchema.STRING; + IRI datatype = XMLSchema.STRING; if (isInteger(cell)) { datatype = XMLSchema.INTEGER; } else if(isFloat(cell)) { datatype = XMLSchema.FLOAT; } - object = new LiteralImpl(cell, datatype); + object = SimpleValueFactory.getInstance().createLiteral(cell, datatype); } return object; } @@ -261,25 +260,25 @@ public class CSVExtractor implements Extractor.ContentExtractor { * It writes on the provided {@link ExtractionResult} some <i>RDF Statements</i> * on generic properties of the <i>CSV</i> file, such as number of rows and columns. * - * @param documentURI + * @param documentIRI * @param out * @param numberOfRows * @param numberOfColumns */ private void addTableMetadataStatements( - URI documentURI, + IRI documentIRI, ExtractionResult out, int numberOfRows, int numberOfColumns) { out.writeTriple( - documentURI, + documentIRI, csv.numberOfRows, - new LiteralImpl(String.valueOf(numberOfRows), XMLSchema.INTEGER) + SimpleValueFactory.getInstance().createLiteral(String.valueOf(numberOfRows), XMLSchema.INTEGER) ); out.writeTriple( - documentURI, + documentIRI, csv.numberOfColumns, - new LiteralImpl(String.valueOf(numberOfColumns), XMLSchema.INTEGER) + SimpleValueFactory.getInstance().createLiteral(String.valueOf(numberOfColumns), XMLSchema.INTEGER) ); }
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java index d57239d..21ed595 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java @@ -21,8 +21,8 @@ import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.vocab.VCard; -import org.openrdf.model.BNode; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; /** http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java index ad2b29f..818fc98 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java @@ -27,9 +27,9 @@ import org.apache.any23.extractor.rdf.JSONLDExtractor; import org.apache.any23.extractor.rdf.JSONLDExtractorFactory; import org.apache.any23.rdf.RDFUtils; import org.apache.any23.vocab.SINDICE; -import org.openrdf.model.URI; -import org.openrdf.model.impl.LiteralImpl; -import org.openrdf.model.impl.URIImpl; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.impl.LiteralImpl; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; @@ -54,9 +54,9 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor { private static final SINDICE vSINDICE = SINDICE.getInstance(); - private URI profile; + private IRI profile; - private Map<String, URI> prefixes = new HashMap<String, URI>(); + private Map<String, IRI> prefixes = new HashMap<String, IRI>(); private String documentLang; @@ -78,7 +78,7 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor { baseProfile = profile.toString(); } - final URI documentURI = extractionContext.getDocumentURI(); + final IRI documentIRI = extractionContext.getDocumentIRI(); Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile, extractionParameters, extractionContext, out); for (JSONLDScript jsonldScript : jsonldScripts) { @@ -86,8 +86,8 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor { //if (jsonldScript.getLang() != null) { // lang = jsonldScript.getLang(); //} - //out.writeTriple(documentURI, jsonldScript.getName(), - // new LiteralImpl(jsonldScript.getContent(), lang)); + //out.writeTriple(documentIRI, jsonldScript.getName(), + // SimpleValueFactory.getInstance().createLiteral(jsonldScript.getContent(), lang)); } } @@ -107,12 +107,12 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor { return lang; } - private URI extractProfile(Document in) { + private IRI extractProfile(Document in) { String profile = DomUtils.find(in, "string(/HTML/@profile)"); if (profile.equals("")) { return null; } - return new URIImpl(profile); + return SimpleValueFactory.getInstance().createIRI(profile); } /** @@ -126,8 +126,8 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor { NamedNodeMap attributes = linkNode.getAttributes(); String rel = attributes.getNamedItem("rel").getTextContent(); String href = attributes.getNamedItem("href").getTextContent(); - if (rel != null && href != null && RDFUtils.isAbsoluteURI(href)) { - prefixes.put(rel, new URIImpl(href)); + if (rel != null && href != null && RDFUtils.isAbsoluteIRI(href)) { + prefixes.put(rel, SimpleValueFactory.getInstance().createIRI(href)); } } } @@ -157,21 +157,21 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor { String name = nameAttribute.getTextContent(); String content = contentAttribute.getTextContent(); String xpath = DomUtils.getXPathForNode(jsonldNode); - URI nameAsURI = getPrefixIfExists(name); - if (nameAsURI == null) { - nameAsURI = new URIImpl(baseProfile + name); + IRI nameAsIRI = getPrefixIfExists(name); + if (nameAsIRI == null) { + nameAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + name); } - JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsURI, + JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsIRI, content); result.add(jsonldScript); } return result; } - private URI getPrefixIfExists(String name) { + private IRI getPrefixIfExists(String name) { String[] split = name.split("\\."); if (split.length == 2 && prefixes.containsKey(split[0])) { - return new URIImpl(prefixes.get(split[0]) + split[1]); + return SimpleValueFactory.getInstance().createIRI(prefixes.get(split[0]) + split[1]); } return null; } @@ -185,28 +185,28 @@ public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor { private String xpath; - private URI name; + private IRI name; private String lang; private String content; - public JSONLDScript(String xpath, URI name, String content) { + public JSONLDScript(String xpath, IRI name, String content) { this.xpath = xpath; this.name = name; this.content = content; } - public JSONLDScript(String xpath, URI name, String content, String lang) { + public JSONLDScript(String xpath, IRI name, String content, String lang) { this(xpath, name, content); this.lang = lang; } - public URI getName() { + public IRI getName() { return name; } - public void setName(URI name) { + public void setName(IRI name) { this.name = name; } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java index 10e6872..677581d 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/EntityBasedMicroformatExtractor.java @@ -20,7 +20,7 @@ package org.apache.any23.extractor.html; import org.apache.any23.extractor.ExtractionException; import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.rdf.RDFUtils; -import org.openrdf.model.BNode; +import org.eclipse.rdf4j.model.BNode; import org.w3c.dom.Node; import java.util.List; @@ -76,7 +76,7 @@ public abstract class EntityBasedMicroformatExtractor extends MicroformatExtract * blank node ID like "MD5 of http://doc-uri/#xpath/to/node" */ protected BNode getBlankNodeFor(Node node) { - return RDFUtils.getBNode(getDocumentURI() + "#" + DomUtils.getXPathForNode(node)); + return RDFUtils.getBNode(getDocumentIRI() + "#" + DomUtils.getXPathForNode(node)); } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java index f818ccd..d85af79 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java @@ -21,8 +21,8 @@ import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.vocab.VCard; -import org.openrdf.model.BNode; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; /** http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java index e41ce72..3a14fca 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java @@ -22,10 +22,10 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.rdf.RDFUtils; import org.apache.any23.vocab.ICAL; -import org.openrdf.model.BNode; -import org.openrdf.model.Resource; -import org.openrdf.model.URI; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; import javax.xml.datatype.DatatypeConfigurationException; @@ -86,8 +86,8 @@ public class HCalendarExtractor extends MicroformatExtractor { } private boolean extractCalendar(Node node) throws ExtractionException { - URI cal = getDocumentURI(); - addURIProperty(cal, RDF.TYPE, vICAL.Vcalendar); + IRI cal = getDocumentIRI(); + addIRIProperty(cal, RDF.TYPE, vICAL.Vcalendar); return addComponents(node, cal); } @@ -106,7 +106,7 @@ public class HCalendarExtractor extends MicroformatExtractor { private boolean extractComponent(Node node, Resource cal, String component) throws ExtractionException { HTMLDocument compoNode = new HTMLDocument(node); BNode evt = valueFactory.createBNode(); - addURIProperty(evt, RDF.TYPE, vICAL.getClass(component)); + addIRIProperty(evt, RDF.TYPE, vICAL.getClass(component)); addTextProps(compoNode, evt); addUrl(compoNode, evt); addRRule(compoNode, evt); @@ -131,13 +131,13 @@ public class HCalendarExtractor extends MicroformatExtractor { private void addUrl(HTMLDocument compoNode, Resource evt) throws ExtractionException { TextField url = compoNode.getSingularUrlField("url"); if ("".equals(url.value())) return; - addURIProperty(evt, vICAL.url, getHTMLDocument().resolveURI(url.value())); + addIRIProperty(evt, vICAL.url, getHTMLDocument().resolveIRI(url.value())); } private void addRRule(HTMLDocument compoNode, Resource evt) { for (Node rule : compoNode.findAllByClassName("rrule")) { BNode rrule = valueFactory.createBNode(); - addURIProperty(rrule, RDF.TYPE, vICAL.DomainOf_rrule); + addIRIProperty(rrule, RDF.TYPE, vICAL.DomainOf_rrule); TextField freq = new HTMLDocument(rule).getSingularTextField("freq"); conditionallyAddStringProperty( freq.source(), http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java index cb65e9c..c1160fa 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java @@ -25,10 +25,10 @@ import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.extractor.html.annotations.Includes; import org.apache.any23.vocab.VCard; import org.apache.commons.lang.StringUtils; -import org.openrdf.model.BNode; -import org.openrdf.model.Resource; -import org.openrdf.model.URI; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; @@ -99,7 +99,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { if( DomUtils.isAncestorOf(included, current) ) { final int[] nodeLocation = DomUtils.getNodeLocation(current); report.notifyIssue( - IssueReport.IssueLevel.Warning, + IssueReport.IssueLevel.WARNING, "Current node tries to include an ancestor node.", nodeLocation[0], nodeLocation[1] ); @@ -182,10 +182,10 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { } private boolean addTel(Resource card, String type, String value) { - URI tel = super.fixLink(value, "tel"); - URI composed = vCARD.getProperty(type + "Tel", null); + IRI tel = super.fixLink(value, "tel"); + IRI composed = vCARD.getProperty(type + "Tel", null); if (composed == null) { - URI simple = vCARD.getProperty(type, null); + IRI simple = vCARD.getProperty(type, null); if (simple == null) { return conditionallyAddResourceProperty(card, vCARD.tel, tel); } @@ -194,7 +194,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { return conditionallyAddResourceProperty(card, composed, tel); } - private boolean addSubMicroformat(String className, Resource resource, URI property) { + private boolean addSubMicroformat(String className, Resource resource, IRI property) { List<Node> nodes = fragment.findAllByClassName(className); if (nodes.isEmpty()) return false; for (Node node : nodes) { @@ -206,7 +206,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { return true; } - private boolean addStringProperty(String className, Resource resource, URI property) { + private boolean addStringProperty(String className, Resource resource, IRI property) { final HTMLDocument.TextField textField = fragment.getSingularTextField(className); return conditionallyAddStringProperty( textField.source(), @@ -222,7 +222,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { * @param property * @return <code>true</code> if the multi property has been added, <code>false</code> otherwise. */ - private boolean addStringMultiProperty(String className, Resource resource, URI property) { + private boolean addStringMultiProperty(String className, Resource resource, IRI property) { HTMLDocument.TextField[] fields = fragment.getPluralTextField(className); boolean found = false; for(HTMLDocument.TextField field : fields) { @@ -267,7 +267,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { boolean found = false; for (TextField link : links) { found |= conditionallyAddResourceProperty( - card, vCARD.logo, getHTMLDocument().resolveURI(link.value()) + card, vCARD.logo, getHTMLDocument().resolveIRI(link.value()) ); } return found; @@ -278,7 +278,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { boolean found = false; for (TextField link : links) { found |= conditionallyAddResourceProperty( - card, vCARD.photo, getHTMLDocument().resolveURI(link.value()) + card, vCARD.photo, getHTMLDocument().resolveIRI(link.value()) ); } return found; @@ -320,7 +320,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { this.fragment.getDocument(), card, vCARD.n, n ); - addURIProperty(n, RDF.TYPE, vCARD.Name); + addIRIProperty(n, RDF.TYPE, vCARD.Name); for (String fieldName : HCardName.FIELDS) { if (!name.containsField(fieldName)) { @@ -385,7 +385,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { this.fragment.getDocument(), card, vCARD.org, org ); - addURIProperty(org, RDF.TYPE, vCARD.Organization); + addIRIProperty(org, RDF.TYPE, vCARD.Organization); final TextField organizationTextField = name.getOrganization(); conditionallyAddLiteralProperty( organizationTextField.source(), @@ -405,7 +405,7 @@ public class HCardExtractor extends EntityBasedMicroformatExtractor { TextField[] links = fragment.getPluralUrlField("url"); boolean found = false; for (TextField link : links) { - found |= conditionallyAddResourceProperty(card, vCARD.url, getHTMLDocument().resolveURI(link.value())); + found |= conditionallyAddResourceProperty(card, vCARD.url, getHTMLDocument().resolveIRI(link.value())); } return found; } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java index 123c03a..308c3e2 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java @@ -23,10 +23,10 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.vocab.FOAF; import org.apache.any23.vocab.HListing; -import org.openrdf.model.BNode; -import org.openrdf.model.Resource; -import org.openrdf.model.URI; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; import java.util.ArrayList; @@ -123,7 +123,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor { node, listing, hLISTING.item, blankItem ); - addURIProperty(blankItem, RDF.TYPE, hLISTING.Item); + addIRIProperty(blankItem, RDF.TYPE, hLISTING.Item); HTMLDocument item = new HTMLDocument(node); @@ -143,7 +143,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor { String value = node.getNodeValue(); // do not use conditionallyAdd, it won't work cause of evaluation rules if (!(null == value || "".equals(value))) { - URI property = hLISTING.getPropertyCamelCase(klass); + IRI property = hLISTING.getPropertyCamelCase(klass); conditionallyAddLiteralProperty( node, blankItem, property, valueFactory.createLiteral(value) @@ -200,7 +200,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor { private Resource addLister() throws ExtractionException { Resource blankLister = valueFactory.createBNode(); - addURIProperty(blankLister, RDF.TYPE, hLISTING.Lister); + addIRIProperty(blankLister, RDF.TYPE, hLISTING.Lister); Node node = fragment.findMicroformattedObjectNode("*", "lister"); if (null == node) return blankLister; @@ -224,7 +224,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor { private void addListerUrl(HTMLDocument doc, Resource blankLister) throws ExtractionException { TextField url = doc.getSingularUrlField("url"); - conditionallyAddResourceProperty(blankLister, hLISTING.listerUrl, getHTMLDocument().resolveURI(url.value())); + conditionallyAddResourceProperty(blankLister, hLISTING.listerUrl, getHTMLDocument().resolveIRI(url.value())); } private void addListerEmail(HTMLDocument doc, Resource blankLister) { @@ -242,7 +242,7 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor { private void addListerLogo(HTMLDocument doc, Resource blankLister) throws ExtractionException { TextField logo = doc.getSingularUrlField("logo"); - conditionallyAddResourceProperty(blankLister, hLISTING.listerLogo, getHTMLDocument().resolveURI(logo.value())); + conditionallyAddResourceProperty(blankLister, hLISTING.listerLogo, getHTMLDocument().resolveIRI(logo.value())); } private void addListerOrg(HTMLDocument doc, Resource blankLister) { @@ -263,18 +263,18 @@ public class HListingExtractor extends EntityBasedMicroformatExtractor { private void addItemUrl(HTMLDocument item, Resource blankItem) throws ExtractionException { TextField url = item.getSingularUrlField("url"); - conditionallyAddResourceProperty(blankItem, hLISTING.itemUrl, getHTMLDocument().resolveURI(url.value())); + conditionallyAddResourceProperty(blankItem, hLISTING.itemUrl, getHTMLDocument().resolveIRI(url.value())); } private void addItemPhoto(HTMLDocument doc, Resource blankLister) throws ExtractionException { // as per spec String url = doc.findMicroformattedValue("*", "item", "A", "photo", "@href"); - conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveURI(url)); + conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveIRI(url)); url = doc.findMicroformattedValue("*", "item", "IMG", "photo", "@src"); - conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveURI(url)); + conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveIRI(url)); // as per kelkoo. Remember that contains(foo,'') is true in xpath url = doc.findMicroformattedValue("*", "photo", "IMG", "", "@src"); - conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveURI(url)); + conditionallyAddResourceProperty(blankLister, hLISTING.itemPhoto, getHTMLDocument().resolveIRI(url)); } private List<String> findActions(HTMLDocument doc) { http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java index 51735ae..1d6bdb1 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java @@ -21,9 +21,9 @@ import org.apache.any23.extractor.ExtractionException; import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.vocab.HRecipe; -import org.openrdf.model.BNode; -import org.openrdf.model.URI; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; /** @@ -78,7 +78,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor { * @param fieldClass * @param property */ - private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, URI property) { + private void mapFieldWithProperty(HTMLDocument fragment, BNode recipe, String fieldClass, IRI property) { HTMLDocument.TextField title = fragment.getSingularTextField(fieldClass); conditionallyAddStringProperty( title.source(), recipe, property, title.value() @@ -104,7 +104,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor { */ private BNode addIngredient(HTMLDocument fragment, HTMLDocument.TextField ingredient) { final BNode ingredientBnode = getBlankNodeFor(ingredient.source()); - addURIProperty(ingredientBnode, RDF.TYPE, vHRECIPE.Ingredient); + addIRIProperty(ingredientBnode, RDF.TYPE, vHRECIPE.Ingredient); conditionallyAddStringProperty( ingredient.source(), ingredientBnode, @@ -161,7 +161,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor { //TODO: USE http://microformats.org/wiki/value-class-pattern to read correct date format. private BNode addDuration(HTMLDocument fragment, HTMLDocument.TextField duration) { final BNode durationBnode = getBlankNodeFor(duration.source()); - addURIProperty(durationBnode, RDF.TYPE, vHRECIPE.Duration); + addIRIProperty(durationBnode, RDF.TYPE, vHRECIPE.Duration); conditionallyAddStringProperty( duration.source(), durationBnode, vHRECIPE.durationTime, duration.value() @@ -193,7 +193,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor { private void addPhoto(HTMLDocument fragment, BNode recipe) throws ExtractionException { final HTMLDocument.TextField[] photos = fragment.getPluralUrlField("photo"); for(HTMLDocument.TextField photo : photos) { - addURIProperty(recipe, vHRECIPE.photo, fragment.resolveURI(photo.value())); + addIRIProperty(recipe, vHRECIPE.photo, fragment.resolveIRI(photo.value())); } } @@ -243,7 +243,7 @@ public class HRecipeExtractor extends EntityBasedMicroformatExtractor { */ private BNode addNutrition(HTMLDocument fragment, HTMLDocument.TextField nutrition) { final BNode nutritionBnode = getBlankNodeFor(nutrition.source()); - addURIProperty(nutritionBnode, RDF.TYPE, vHRECIPE.Nutrition); + addIRIProperty(nutritionBnode, RDF.TYPE, vHRECIPE.Nutrition); conditionallyAddStringProperty( nutrition.source(), nutritionBnode, vHRECIPE.nutritionValue, nutrition.value() http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java index 1b04d13..a4b19af 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java @@ -22,9 +22,9 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.vocab.DOAC; import org.apache.any23.vocab.FOAF; -import org.openrdf.model.BNode; -import org.openrdf.model.Resource; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; import java.util.List; http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java index 249162a..7652b04 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractor.java @@ -27,9 +27,9 @@ import org.apache.any23.extractor.html.HTMLDocument.TextField; import org.apache.any23.vocab.Review; import org.apache.any23.vocab.ReviewAggregate; import org.apache.any23.vocab.VCard; -import org.openrdf.model.BNode; -import org.openrdf.model.Resource; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; /** @@ -105,11 +105,11 @@ public class HReviewAggregateExtractor extends EntityBasedMicroformatExtractor { val.value()); final TextField url = item.getSingularUrlField("url"); conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument() - .resolveURI(url.value())); + .resolveIRI(url.value())); TextField pics[] = item.getPluralUrlField("photo"); for (TextField pic : pics) { - addURIProperty(blank, vVCARD.photo, - getHTMLDocument().resolveURI(pic.value())); + addIRIProperty(blank, vVCARD.photo, + getHTMLDocument().resolveIRI(pic.value())); } return blank; } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java index 91d07fc..d0699a1 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java @@ -24,9 +24,9 @@ import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.vocab.DCTerms; import org.apache.any23.vocab.Review; import org.apache.any23.vocab.VCard; -import org.openrdf.model.BNode; -import org.openrdf.model.Resource; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; import java.util.List; @@ -121,10 +121,10 @@ public class HReviewExtractor extends EntityBasedMicroformatExtractor { blank, vVCARD.fn, val.value() ); final TextField url = item.getSingularUrlField("url"); - conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument().resolveURI(url.value())); + conditionallyAddResourceProperty(blank, vVCARD.url, getHTMLDocument().resolveIRI(url.value())); TextField pics[] = item.getPluralUrlField("photo"); for (TextField pic : pics) { - addURIProperty(blank, vVCARD.photo, getHTMLDocument().resolveURI(pic.value())); + addIRIProperty(blank, vVCARD.photo, getHTMLDocument().resolveIRI(pic.value())); } return blank; } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java b/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java index bdb9cdf..bb958c7 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HTMLDocument.java @@ -20,8 +20,8 @@ package org.apache.any23.extractor.html; import org.apache.any23.extractor.ExtractionException; import org.apache.any23.rdf.Any23ValueFactoryWrapper; import org.apache.any23.rdf.RDFUtils; -import org.openrdf.model.URI; -import org.openrdf.model.impl.ValueFactoryImpl; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.NamedNodeMap; @@ -50,10 +50,10 @@ public class HTMLDocument { private final static Logger log = LoggerFactory.getLogger(HTMLDocument.class); private Node document; - private java.net.URI baseURI; + private java.net.URI baseIRI; private final Any23ValueFactoryWrapper valueFactory = - new Any23ValueFactoryWrapper(ValueFactoryImpl.getInstance()); + new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance()); /** * Reads a text field from the given node adding the content to the given <i>res</i> list. @@ -179,12 +179,12 @@ public class HTMLDocument { } /** - * @param uri string to resolve to {@link org.openrdf.model.URI} - * @return An absolute URI, or null if the URI is not fixable - * @throws org.apache.any23.extractor.ExtractionException If the base URI is invalid + * @param uri string to resolve to {@link org.eclipse.rdf4j.model.IRI} + * @return An absolute IRI, or null if the IRI is not fixable + * @throws org.apache.any23.extractor.ExtractionException If the base IRI is invalid */ - public URI resolveURI(String uri) throws ExtractionException { - return valueFactory.resolveURI(uri, getBaseURI()); + public IRI resolveIRI(String uri) throws ExtractionException { + return valueFactory.resolveIRI(uri, getBaseIRI()); } public String find(String xpath) { @@ -373,20 +373,20 @@ public class HTMLDocument { return result.toArray( new TextField[result.size()] ); } - private java.net.URI getBaseURI() throws ExtractionException { - if (baseURI == null) { + private java.net.URI getBaseIRI() throws ExtractionException { + if (baseIRI == null) { try { if (document.getBaseURI() == null) { log.warn("document.getBaseURI() is null, this should not happen"); } - baseURI = new java.net.URI(RDFUtils.fixAbsoluteURI(document.getBaseURI())); + baseIRI = new java.net.URI(RDFUtils.fixAbsoluteIRI(document.getBaseURI())); } catch (IllegalArgumentException ex) { - throw new ExtractionException("Error in base URI: " + document.getBaseURI(), ex); + throw new ExtractionException("Error in base IRI: " + document.getBaseURI(), ex); } catch (URISyntaxException ex) { - throw new ExtractionException("Error in base URI: " + document.getBaseURI(), ex); + throw new ExtractionException("Error in base IRI: " + document.getBaseURI(), ex); } } - return baseURI; + return baseIRI; } /** http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java index 3e0c84e..e67ec42 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java @@ -25,9 +25,9 @@ import org.apache.any23.extractor.Extractor; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.rdf.RDFUtils; import org.apache.any23.vocab.SINDICE; -import org.openrdf.model.URI; -import org.openrdf.model.impl.LiteralImpl; -import org.openrdf.model.impl.URIImpl; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.impl.LiteralImpl; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; @@ -49,9 +49,9 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor { private static final SINDICE vSINDICE = SINDICE.getInstance(); - private URI profile; + private IRI profile; - private Map<String, URI> prefixes = new HashMap<String, URI>(); + private Map<String, IRI> prefixes = new HashMap<String, IRI>(); private String documentLang; @@ -74,7 +74,7 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor { baseProfile = profile.toString(); } - final URI documentURI = extractionContext.getDocumentURI(); + final IRI documentIRI = extractionContext.getDocumentIRI(); Set<Meta> metas = extractMetaElement(in, baseProfile); for(Meta meta : metas) { String lang = documentLang; @@ -82,17 +82,29 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor { lang = meta.getLang(); } if(meta.isPragmaDirective){ - out.writeTriple( - documentURI, + if(lang != null) { + out.writeTriple( + documentIRI, meta.getHttpEquiv(), - new LiteralImpl(meta.getContent(), lang) - ); + SimpleValueFactory.getInstance().createLiteral(meta.getContent(), lang)); + } else { + out.writeTriple( + documentIRI, + meta.getHttpEquiv(), + SimpleValueFactory.getInstance().createLiteral(meta.getContent())); + } }else { - out.writeTriple( - documentURI, + if(lang != null) { + out.writeTriple( + documentIRI, meta.getName(), - new LiteralImpl(meta.getContent(), lang) - ); + SimpleValueFactory.getInstance().createLiteral(meta.getContent(), lang)); + } else { + out.writeTriple( + documentIRI, + meta.getName(), + SimpleValueFactory.getInstance().createLiteral(meta.getContent())); + } } } } @@ -111,12 +123,12 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor { return lang; } - private URI extractProfile(Document in) { + private IRI extractProfile(Document in) { String profile = DomUtils.find(in, "string(/HTML/@profile)"); if (profile.equals("")) { return null; } - return new URIImpl(profile); + return SimpleValueFactory.getInstance().createIRI(profile); } /** @@ -130,8 +142,8 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor { NamedNodeMap attributes = linkNode.getAttributes(); String rel = attributes.getNamedItem("rel").getTextContent(); String href = attributes.getNamedItem("href").getTextContent(); - if(rel != null && href !=null && RDFUtils.isAbsoluteURI(href)) { - prefixes.put(rel, new URIImpl(href)); + if(rel != null && href !=null && RDFUtils.isAbsoluteIRI(href)) { + prefixes.put(rel, SimpleValueFactory.getInstance().createIRI(href)); } } } @@ -156,31 +168,31 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor { String httpEquiv = httpEquivAttribute.getTextContent(); String content = contentAttribute.getTextContent(); String xpath = DomUtils.getXPathForNode(metaNode); - URI httpEquivAsURI = getPrefixIfExists(httpEquiv); - if (httpEquivAsURI == null) { - httpEquivAsURI = new URIImpl(baseProfile + httpEquiv); + IRI httpEquivAsIRI = getPrefixIfExists(httpEquiv); + if (httpEquivAsIRI == null) { + httpEquivAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + httpEquiv); } - Meta meta = new Meta(xpath, content, httpEquivAsURI); + Meta meta = new Meta(xpath, content, httpEquivAsIRI); result.add(meta); } else { String name = nameAttribute.getTextContent(); String content = contentAttribute.getTextContent(); String xpath = DomUtils.getXPathForNode(metaNode); - URI nameAsURI = getPrefixIfExists(name); - if (nameAsURI == null) { - nameAsURI = new URIImpl(baseProfile + name); + IRI nameAsIRI = getPrefixIfExists(name); + if (nameAsIRI == null) { + nameAsIRI = SimpleValueFactory.getInstance().createIRI(baseProfile + name); } - Meta meta = new Meta(xpath, nameAsURI, content); + Meta meta = new Meta(xpath, nameAsIRI, content); result.add(meta); } } return result; } - private URI getPrefixIfExists(String name) { + private IRI getPrefixIfExists(String name) { String[] split = name.split("\\."); if(split.length == 2 && prefixes.containsKey(split[0])) { - return new URIImpl(prefixes.get(split[0]) + split[1]); + return SimpleValueFactory.getInstance().createIRI(prefixes.get(split[0]) + split[1]); } return null; } @@ -194,9 +206,9 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor { private String xpath; - private URI name; + private IRI name; - private URI httpEquiv; + private IRI httpEquiv; private String lang; @@ -204,25 +216,25 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor { private boolean isPragmaDirective; - public Meta(String xpath, String content, URI httpEquiv) { + public Meta(String xpath, String content, IRI httpEquiv) { this.xpath = xpath; this.content = content; this.httpEquiv = httpEquiv; this.setPragmaDirective(true); } - public Meta(String xpath, String content, URI httpEquiv, String lang) { + public Meta(String xpath, String content, IRI httpEquiv, String lang) { this(xpath,content,httpEquiv); this.lang = lang; } - public Meta(String xpath, URI name, String content) { + public Meta(String xpath, IRI name, String content) { this.xpath = xpath; this.name = name; this.content = content; } - public Meta(String xpath, URI name, String content, String lang) { + public Meta(String xpath, IRI name, String content, String lang) { this(xpath, name, content); this.lang = lang; } @@ -235,19 +247,19 @@ public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor { this.isPragmaDirective=value; } - public URI getHttpEquiv(){ + public IRI getHttpEquiv(){ return httpEquiv; } - public void setHttpEquiv(URI httpEquiv){ + public void setHttpEquiv(IRI httpEquiv){ this.httpEquiv=httpEquiv; } - public URI getName() { + public IRI getName() { return name; } - public void setName(URI name) { + public void setName(IRI name) { this.name = name; } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java index 279ec3a..c987fa3 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java @@ -24,9 +24,9 @@ import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.vocab.XHTML; import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor; -import org.openrdf.model.URI; -import org.openrdf.model.ValueFactory; -import org.openrdf.model.impl.ValueFactoryImpl; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -46,7 +46,7 @@ public class HeadLinkExtractor implements TagSoupDOMExtractor { ExtractionResult out ) throws IOException, ExtractionException { HTMLDocument html = new HTMLDocument(in); - ValueFactory vf = ValueFactoryImpl.getInstance(); + ValueFactory vf = SimpleValueFactory.getInstance(); final List<Node> headLinkNodes = DomUtils.findAll( in, @@ -60,11 +60,11 @@ public class HeadLinkExtractor implements TagSoupDOMExtractor { ") and @href and @rel]" ); for (Node node : headLinkNodes) { - final URI href = html.resolveURI(DomUtils.find(node, "@href")); + final IRI href = html.resolveIRI(DomUtils.find(node, "@href")); final String rel = DomUtils.find(node, "@rel"); out.writeTriple( - extractionContext.getDocumentURI(), - vf.createURI(XHTML.NS + rel), + extractionContext.getDocumentIRI(), + vf.createIRI(XHTML.NS + rel), href ); final String title = DomUtils.find(node, "@title"); http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java index 30c2362..eb2524a 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java @@ -24,10 +24,10 @@ import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.rdf.Any23ValueFactoryWrapper; import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor; -import org.openrdf.model.BNode; -import org.openrdf.model.URI; -import org.openrdf.model.ValueFactory; -import org.openrdf.model.impl.ValueFactoryImpl; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.w3c.dom.Document; import java.io.IOException; @@ -62,15 +62,15 @@ public class ICBMExtractor implements TagSoupDOMExtractor { return; } - final ValueFactory factory = new Any23ValueFactoryWrapper(ValueFactoryImpl.getInstance(), out); + final ValueFactory factory = new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance(), out); BNode point = factory.createBNode(); - out.writeTriple(extractionContext.getDocumentURI(), expand("dcterms:related"), point); + out.writeTriple(extractionContext.getDocumentIRI(), expand("dcterms:related"), point); out.writeTriple(point, expand("rdf:type"), expand("geo:Point")); out.writeTriple(point, expand("geo:lat"), factory.createLiteral(Float.toString(lat))); out.writeTriple(point, expand("geo:long"), factory.createLiteral(Float.toString(lon))); } - private URI expand(String curie) { + private IRI expand(String curie) { return getDescription().getPrefixes().expand(curie); } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java index 9e0dfa7..6f666a9 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java @@ -25,7 +25,7 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.IssueReport; import org.apache.any23.vocab.XHTML; import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor; -import org.openrdf.model.URI; +import org.eclipse.rdf4j.model.IRI; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -50,21 +50,21 @@ public class LicenseExtractor implements TagSoupDOMExtractor { ExtractionResult out ) throws IOException, ExtractionException { HTMLDocument document = new HTMLDocument(in); - final URI documentURI = extractionContext.getDocumentURI(); + final IRI documentIRI = extractionContext.getDocumentIRI(); for (Node node : DomUtils.findAll(in, "//A[@rel='license']/@href")) { String link = node.getNodeValue(); if ("".equals(link)) { out.notifyIssue( - IssueReport.IssueLevel.Warning, + IssueReport.IssueLevel.WARNING, String.format( "Invalid license link detected within document %s.", - documentURI.toString() + documentIRI.toString() ), 0, 0 ); continue; } - out.writeTriple(documentURI, vXHTML.license, document.resolveURI(link)); + out.writeTriple(documentIRI, vXHTML.license, document.resolveIRI(link)); } } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java index 31cbeb6..ad6f901 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/MicroformatExtractor.java @@ -27,11 +27,11 @@ import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.extractor.html.annotations.Includes; import org.apache.any23.rdf.Any23ValueFactoryWrapper; import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor; -import org.openrdf.model.BNode; -import org.openrdf.model.Literal; -import org.openrdf.model.Resource; -import org.openrdf.model.URI; -import org.openrdf.model.impl.ValueFactoryImpl; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.Literal; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -50,12 +50,12 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor { private ExtractionContext context; - private URI documentURI; + private IRI documentIRI; private ExtractionResult out; protected final Any23ValueFactoryWrapper valueFactory = - new Any23ValueFactoryWrapper(ValueFactoryImpl.getInstance()); + new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance()); /** * Returns the description of this extractor. @@ -83,8 +83,8 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor { return context; } - public URI getDocumentURI() { - return documentURI; + public IRI getDocumentIRI() { + return documentIRI; } public final void run( @@ -95,7 +95,7 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor { ) throws IOException, ExtractionException { this.htmlDocument = new HTMLDocument(in); this.context = extractionContext; - this.documentURI = extractionContext.getDocumentURI(); + this.documentIRI = extractionContext.getDocumentIRI(); this.out = out; valueFactory.setIssueReport(out); try { @@ -129,13 +129,13 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor { * * @param n the <i>HTML</i> node from which the property value has been extracted. * @param subject the property subject. - * @param p the property URI. + * @param p the property IRI. * @param value the property value. * @return returns <code>true</code> if the value has been accepted and added, <code>false</code> otherwise. */ protected boolean conditionallyAddStringProperty( Node n, - Resource subject, URI p, String value + Resource subject, IRI p, String value ) { if (value == null) return false; value = value.trim(); @@ -153,20 +153,20 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor { * * @param n the <i>HTML</i> node from which the property value has been extracted. * @param subject subject the property subject. - * @param property the property URI. + * @param property the property IRI. * @param literal value the property value. * @return returns <code>true</code> if the literal has been accepted and added, <code>false</code> otherwise. */ protected boolean conditionallyAddLiteralProperty( Node n, Resource subject, - URI property, + IRI property, Literal literal ) { final String literalStr = literal.stringValue(); if( containsScriptBlock(literalStr) ) { out.notifyIssue( - IssueReport.IssueLevel.Warning, + IssueReport.IssueLevel.WARNING, String.format("Detected script in literal: [%s]", literalStr) , -1 , -1 @@ -180,13 +180,13 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor { } /** - * Helper method that adds a URI property to a node. + * Helper method that adds a IRI property to a node. * @param subject the property subject. - * @param property the property URI. + * @param property the property IRI. * @param uri the property object. * @return <code>true</code> if the the resource has been added, <code>false</code> otherwise. */ - protected boolean conditionallyAddResourceProperty(Resource subject, URI property, URI uri) { + protected boolean conditionallyAddResourceProperty(Resource subject, IRI property, IRI uri) { if (uri == null) return false; out.writeTriple(subject, property, uri); return true; @@ -197,10 +197,10 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor { * * @param n the <i>HTML</i> node used for extracting such property. * @param subject the property subject. - * @param property the property URI. + * @param property the property IRI. * @param bnode the property value. */ - protected void addBNodeProperty(Node n, Resource subject, URI property, BNode bnode) { + protected void addBNodeProperty(Node n, Resource subject, IRI property, BNode bnode) { out.writeTriple(subject, property, bnode); TagSoupExtractionResult tser = (TagSoupExtractionResult) out; tser.addPropertyPath(this.getClass(), subject, property, bnode, DomUtils.getXPathListForNode(n) ); @@ -210,29 +210,29 @@ public abstract class MicroformatExtractor implements TagSoupDOMExtractor { * Helper method that adds a BNode property to a node. * * @param subject the property subject. - * @param property the property URI. + * @param property the property IRI. * @param bnode the property value. */ - protected void addBNodeProperty( Resource subject, URI property, BNode bnode) { + protected void addBNodeProperty( Resource subject, IRI property, BNode bnode) { out.writeTriple(subject, property, bnode); } /** - * Helper method that adds a URI property to a node. + * Helper method that adds a IRI property to a node. * * @param subject subject to add * @param property predicate to add * @param object object to add */ - protected void addURIProperty(Resource subject, URI property, URI object) { + protected void addIRIProperty(Resource subject, IRI property, IRI object) { out.writeTriple(subject, property, object); } - protected URI fixLink(String link) { + protected IRI fixLink(String link) { return valueFactory.fixLink(link, null); } - protected URI fixLink(String link, String defaultSchema) { + protected IRI fixLink(String link, String defaultSchema) { return valueFactory.fixLink(link, defaultSchema); } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java index 0e9f51f..11a6223 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java @@ -22,10 +22,10 @@ import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.vocab.WO; -import org.openrdf.model.BNode; -import org.openrdf.model.Resource; -import org.openrdf.model.URI; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; /** @@ -138,7 +138,7 @@ public class SpeciesExtractor extends EntityBasedMicroformatExtractor { } } - private URI resolvePropertyName(String clazz) { + private IRI resolvePropertyName(String clazz) { return vWO.getProperty( String.format( "%sName", @@ -147,7 +147,7 @@ public class SpeciesExtractor extends EntityBasedMicroformatExtractor { ); } - private URI resolveClassName(String clazz) { + private IRI resolveClassName(String clazz) { String upperCaseClass = clazz.substring(0, 1); return vWO.getClass( String.format("%s%s", http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java b/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java index 50311bd..e6eb9cd 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java +++ b/core/src/main/java/org/apache/any23/extractor/html/TagSoupParser.java @@ -25,6 +25,8 @@ import org.apache.xerces.xni.QName; import org.apache.xerces.xni.XMLAttributes; import org.apache.xerces.xni.XNIException; import org.cyberneko.html.parsers.DOMParser; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; @@ -66,29 +68,29 @@ public class TagSoupParser { private final InputStream input; - private final String documentURI; + private final String documentIRI; private final String encoding; private Document result = null; - public TagSoupParser(InputStream input, String documentURI) { + public TagSoupParser(InputStream input, String documentIRI) { this.input = input; - this.documentURI = documentURI; + this.documentIRI = documentIRI; this.encoding = null; } - public TagSoupParser(InputStream input, String documentURI, String encoding) { + public TagSoupParser(InputStream input, String documentIRI, String encoding) { if(encoding != null && !Charset.isSupported(encoding)) throw new UnsupportedCharsetException(String.format("Charset %s is not supported", encoding)); this.input = input; - this.documentURI = documentURI; + this.documentIRI = documentIRI; this.encoding = encoding; } /** - * Returns the DOM of the given document URI. + * Returns the DOM of the given document IRI. * * @return the <i>HTML</i> DOM. * @throws IOException if there is an error whilst accessing the DOM @@ -112,10 +114,10 @@ public class TagSoupParser { } } finally { long elapsed = System.currentTimeMillis() - startTime; - logger.debug("Parsed " + documentURI + " with NekoHTML, " + elapsed + "ms"); + logger.debug("Parsed " + documentIRI + " with NekoHTML, " + elapsed + "ms"); } } - result.setDocumentURI(documentURI); + result.setDocumentURI(documentIRI); return result; } @@ -131,15 +133,15 @@ public class TagSoupParser { * @throws org.apache.any23.validator.ValidatorException if there is an error validating the DOM */ public DocumentReport getValidatedDOM(boolean applyFix) throws IOException, ValidatorException { - final URI dURI; + final URI dIRI; try { - dURI = new URI(documentURI); - } catch (URISyntaxException urise) { - throw new ValidatorException("Error while performing validation, invalid document URI.", urise); + dIRI = new URI(documentIRI); + } catch (IllegalArgumentException | URISyntaxException urise) { + throw new ValidatorException("Error while performing validation, invalid document IRI.", urise); } Validator validator = new DefaultValidator(); Document document = getDOM(); - return new DocumentReport( validator.validate(dURI, document, applyFix), document ); + return new DocumentReport( validator.validate(dIRI, document, applyFix), document ); } private Document parse() throws IOException, SAXException, TransformerException { http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java index 8651ed4..3788af9 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java @@ -25,7 +25,7 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.rdf.Any23ValueFactoryWrapper; import org.apache.any23.vocab.DCTerms; import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor; -import org.openrdf.model.impl.ValueFactoryImpl; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.w3c.dom.Document; import java.io.IOException; @@ -48,13 +48,13 @@ public class TitleExtractor implements TagSoupDOMExtractor { ExtractionResult out ) throws IOException, ExtractionException { final Any23ValueFactoryWrapper valueFactory = new Any23ValueFactoryWrapper( - ValueFactoryImpl.getInstance(), out, extractionContext.getDefaultLanguage() + SimpleValueFactory.getInstance(), out, extractionContext.getDefaultLanguage() ); try { String title = DomUtils.find(in, "/HTML/HEAD/TITLE/text()").trim(); if (title != null && (title.length() != 0)) { - out.writeTriple(extractionContext.getDocumentURI(), vDCTERMS.title, valueFactory.createLiteral(title)); + out.writeTriple(extractionContext.getDocumentIRI(), vDCTERMS.title, valueFactory.createLiteral(title)); } } finally { valueFactory.setIssueReport(null); http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java index 9ede50a..17b54e6 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java @@ -25,9 +25,9 @@ import org.apache.any23.extractor.ExtractionResult; import org.apache.any23.extractor.Extractor; import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.rdf.RDFParserFactory; -import org.openrdf.model.URI; -import org.openrdf.rio.RDFParseException; -import org.openrdf.rio.RDFParser; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.rio.RDFParseException; +import org.eclipse.rdf4j.rio.RDFParser; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -57,16 +57,16 @@ public class TurtleHTMLExtractor implements Extractor.TagSoupDOMExtractor { ) throws IOException, ExtractionException { List<Node> scriptNodes; HTMLDocument htmlDocument = new HTMLDocument(in); - final URI documentURI = extractionContext.getDocumentURI(); + final IRI documentIRI = extractionContext.getDocumentIRI(); scriptNodes = htmlDocument.findAll(".//SCRIPT[contains(@type,'text/turtle')]"); - processScriptNodes(documentURI, extractionContext, out, scriptNodes); + processScriptNodes(documentIRI, extractionContext, out, scriptNodes); scriptNodes = htmlDocument.findAll(".//SCRIPT[contains(@type,'text/n3')]"); - processScriptNodes(documentURI, extractionContext, out, scriptNodes); + processScriptNodes(documentIRI, extractionContext, out, scriptNodes); scriptNodes = htmlDocument.findAll(".//SCRIPT[contains(@type,'text/plain')]"); - processScriptNodes(documentURI, extractionContext,out, scriptNodes); + processScriptNodes(documentIRI, extractionContext,out, scriptNodes); } @Override @@ -77,16 +77,16 @@ public class TurtleHTMLExtractor implements Extractor.TagSoupDOMExtractor { /** * Processes a list of <i>html script</i> nodes retrieving the N3 / Turtle content. * - * @param documentURI the URI of the original HTML document. + * @param documentIRI the IRI of the original HTML document. * @param er the extraction result used to store triples. * @param ns the list of script nodes. */ - private void processScriptNodes(URI documentURI, ExtractionContext ec, ExtractionResult er, List<Node> ns) { + private void processScriptNodes(IRI documentIRI, ExtractionContext ec, ExtractionResult er, List<Node> ns) { if(ns.size() > 0 && turtleParser == null) { turtleParser = RDFParserFactory.getInstance().getTurtleParserInstance(true, false, ec, er); } for(Node n : ns) { - processScriptNode(turtleParser, documentURI, n, er); + processScriptNode(turtleParser, documentIRI, n, er); } } @@ -94,20 +94,20 @@ public class TurtleHTMLExtractor implements Extractor.TagSoupDOMExtractor { * Processes a single <i>html script</i> node. * * @param turtleParser the parser used to digest node content. - * @param documentURI the URI of the original HTML document. + * @param documentIRI the IRI of the original HTML document. * @param n the script node. * @param er the extraction result used to store triples. */ - private void processScriptNode(RDFParser turtleParser, URI documentURI, Node n, ExtractionResult er) { + private void processScriptNode(RDFParser turtleParser, IRI documentIRI, Node n, ExtractionResult er) { final Node idAttribute = n.getAttributes().getNamedItem("id"); final String graphName = - documentURI.stringValue() + + documentIRI.stringValue() + ( idAttribute == null ? "" : "#" + idAttribute.getTextContent() ); try { turtleParser.parse( new StringReader(n.getTextContent()), graphName ); } catch (RDFParseException rdfpe) { er.notifyIssue( - IssueReport.IssueLevel.Error, + IssueReport.IssueLevel.ERROR, String.format( "An error occurred while parsing turtle content within script node: %s", Arrays.toString(DomUtils.getXPathListForNode(n)) @@ -115,7 +115,7 @@ public class TurtleHTMLExtractor implements Extractor.TagSoupDOMExtractor { rdfpe.getLineNumber(), rdfpe.getColumnNumber() ); } catch (Exception e) { - er.notifyIssue(IssueReport.IssueLevel.Error, "An error occurred while processing RDF data.", -1, -1); + er.notifyIssue(IssueReport.IssueLevel.ERROR, "An error occurred while processing RDF data.", -1, -1); } } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java index ab16fe5..af971fa 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java @@ -26,10 +26,10 @@ import org.apache.any23.rdf.Any23ValueFactoryWrapper; import org.apache.any23.vocab.FOAF; import org.apache.any23.vocab.XFN; import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor; -import org.openrdf.model.BNode; -import org.openrdf.model.URI; -import org.openrdf.model.impl.ValueFactoryImpl; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -47,7 +47,7 @@ public class XFNExtractor implements TagSoupDOMExtractor { private static final XFN vXFN = XFN.getInstance(); private final static Any23ValueFactoryWrapper factoryWrapper = - new Any23ValueFactoryWrapper(ValueFactoryImpl.getInstance()); + new Any23ValueFactoryWrapper(SimpleValueFactory.getInstance()); private HTMLDocument document; private ExtractionResult out; @@ -71,36 +71,36 @@ public class XFNExtractor implements TagSoupDOMExtractor { BNode subject = factoryWrapper.createBNode(); boolean foundAnyXFN = false; - final URI documentURI = extractionContext.getDocumentURI(); + final IRI documentIRI = extractionContext.getDocumentIRI(); for (Node link : document.findAll("//A[@rel][@href]")) { - foundAnyXFN |= extractLink(link, subject, documentURI); + foundAnyXFN |= extractLink(link, subject, documentIRI); } if (!foundAnyXFN) return; out.writeTriple(subject, RDF.TYPE, vFOAF.Person); - out.writeTriple(subject, vXFN.mePage, documentURI); + out.writeTriple(subject, vXFN.mePage, documentIRI); } finally { factoryWrapper.setIssueReport(null); } } - private boolean extractLink(Node firstLink, BNode subject, URI documentURI) + private boolean extractLink(Node firstLink, BNode subject, IRI documentIRI) throws ExtractionException { String href = firstLink.getAttributes().getNamedItem("href").getNodeValue(); String rel = firstLink.getAttributes().getNamedItem("rel").getNodeValue(); String[] rels = rel.split("\\s+"); - URI link = document.resolveURI(href); + IRI link = document.resolveIRI(href); if (containsRelMe(rels)) { if (containsXFNRelExceptMe(rels)) { return false; // "me" cannot be combined with any other XFN values } out.writeTriple(subject, vXFN.mePage, link); - out.writeTriple(documentURI, vXFN.getExtendedProperty("me"), link); + out.writeTriple(documentIRI, vXFN.getExtendedProperty("me"), link); } else { BNode person2 = factoryWrapper.createBNode(); boolean foundAnyXFNRel = false; for (String aRel : rels) { - foundAnyXFNRel |= extractRel(aRel, subject, documentURI, person2, link); + foundAnyXFNRel |= extractRel(aRel, subject, documentIRI, person2, link); } if (!foundAnyXFNRel) { return false; @@ -129,9 +129,9 @@ public class XFNExtractor implements TagSoupDOMExtractor { return false; } - private boolean extractRel(String rel, BNode person1, URI uri1, BNode person2, URI uri2) { - URI peopleProp = vXFN.getPropertyByLocalName(rel); - URI hyperlinkProp = vXFN.getExtendedProperty(rel); + private boolean extractRel(String rel, BNode person1, IRI uri1, BNode person2, IRI uri2) { + IRI peopleProp = vXFN.getPropertyByLocalName(rel); + IRI hyperlinkProp = vXFN.getExtendedProperty(rel); if (peopleProp == null) { return false; } http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java index d0d9257..0e21b06 100644 --- a/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/html/microformats2/HAdrExtractor.java @@ -23,9 +23,9 @@ import org.apache.any23.extractor.ExtractorDescription; import org.apache.any23.extractor.TagSoupExtractionResult; import org.apache.any23.extractor.html.microformats2.annotations.Includes; import org.apache.any23.vocab.VCard; -import org.openrdf.model.BNode; -import org.openrdf.model.Resource; -import org.openrdf.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.BNode; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.vocabulary.RDF; import org.w3c.dom.Node; import org.apache.any23.extractor.html.EntityBasedMicroformatExtractor; import org.apache.any23.extractor.html.HTMLDocument; @@ -101,7 +101,7 @@ public class HAdrExtractor extends EntityBasedMicroformatExtractor { private void addGeoAsUrlResource(Resource card,HTMLDocument document) throws ExtractionException { HTMLDocument.TextField[] links = document.getPluralUrlField(Microformats2Prefixes.URL_PROPERTY_PREFIX+"geo"); for (HTMLDocument.TextField link : links) { - conditionallyAddResourceProperty(card, vVCARD.geo, getHTMLDocument().resolveURI(link.value())); + conditionallyAddResourceProperty(card, vVCARD.geo, getHTMLDocument().resolveIRI(link.value())); } }
