This is an automated email from the ASF dual-hosted git repository. hansbrende pushed a commit to branch ANY23-433 in repository https://gitbox.apache.org/repos/asf/any23.git
commit 12be13966f5a655f55b24f541cc5e497c069c54b Author: Hans <[email protected]> AuthorDate: Wed Sep 25 00:42:55 2019 -0500 ANY23-433 upgrade rdf4j to v3.0.0 --- .../any23/extractor/rdf/JSONLDExtractor.java | 59 ++++++++++-- .../apache/any23/extractor/rdf/JSONLDJavaSink.java | 107 +++++++++++++-------- pom.xml | 2 +- 3 files changed, 115 insertions(+), 53 deletions(-) diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java index 59998cb..93f8ce1 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java @@ -25,7 +25,12 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.github.jsonldjava.core.JsonLdOptions; import com.github.jsonldjava.core.JsonLdProcessor; import com.github.jsonldjava.utils.JsonUtils; -import org.apache.any23.extractor.*; +import org.apache.any23.extractor.ExtractionContext; +import org.apache.any23.extractor.ExtractionException; +import org.apache.any23.extractor.ExtractionParameters; +import org.apache.any23.extractor.ExtractionResult; +import org.apache.any23.extractor.ExtractorDescription; +import org.apache.any23.extractor.IssueReport; import org.apache.any23.rdf.Any23ValueFactoryWrapper; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.rio.RDFParser; @@ -58,13 +63,18 @@ public class JSONLDExtractor extends BaseRDFExtractor { JSON_FACTORY.disable(JsonParser.Feature.STRICT_DUPLICATE_DETECTION); } - + /** + * @deprecated since 2.4. This extractor has never supported these settings. Use {@link #JSONLDExtractor()} instead. + * @param verifyDataType has no effect + * @param stopAtFirstError has no effect + */ + @Deprecated public JSONLDExtractor(boolean verifyDataType, boolean stopAtFirstError) { super(verifyDataType, stopAtFirstError); } public JSONLDExtractor() { - this(false, false); + super(false, false); } @Override @@ -73,13 +83,6 @@ public class JSONLDExtractor extends BaseRDFExtractor { } @Override - protected RDFParser getParser(ExtractionContext extractionContext, ExtractionResult extractionResult) { - return RDFParserFactory.getInstance().getJSONLDParser( - isVerifyDataType(), isStopAtFirstError(), extractionContext, extractionResult - ); - } - - @Override public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, InputStream in, ExtractionResult extractionResult) throws IOException, ExtractionException { JSONLDJavaSink handler = new JSONLDJavaSink(extractionResult, new Any23ValueFactoryWrapper( SimpleValueFactory.getInstance(), @@ -106,4 +109,40 @@ public class JSONLDExtractor extends BaseRDFExtractor { } } + /* DEPRECATED METHODS */ + + /** + * @deprecated since 2.4. This extractor has never supported this setting. Do not use. + * @param stopAtFirstError has no effect + */ + @Deprecated + @Override + public void setStopAtFirstError(boolean stopAtFirstError) { + super.setStopAtFirstError(stopAtFirstError); + } + + /** + * @deprecated since 2.4. This extractor has never supported this setting. Do not use. + * @param verifyDataType has no effect + */ + @Deprecated + @Override + public void setVerifyDataType(boolean verifyDataType) { + super.setVerifyDataType(verifyDataType); + } + + /** + * @deprecated since 2.4. This extractor no longer wraps an RDF4J {@link RDFParser}. Do not use this method. + * @param extractionContext the extraction context + * @param extractionResult the extraction result + * @return a {@link RDFParser} + */ + @Deprecated + @Override + protected RDFParser getParser(ExtractionContext extractionContext, ExtractionResult extractionResult) { + return RDFParserFactory.getInstance().getJSONLDParser( + isVerifyDataType(), isStopAtFirstError(), extractionContext, extractionResult + ); + } + } diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java index 4fd5cf8..f458deb 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java @@ -1,91 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package org.apache.any23.extractor.rdf; -import java.util.List; import java.util.Map.Entry; +import java.util.UUID; import org.apache.any23.extractor.ExtractionResult; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Resource; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.model.ValueFactory; -import org.eclipse.rdf4j.model.vocabulary.RDF; -import org.eclipse.rdf4j.model.vocabulary.XMLSchema; - +import com.github.jsonldjava.core.JsonLdConsts; import com.github.jsonldjava.core.JsonLdTripleCallback; import com.github.jsonldjava.core.RDFDataset; - +/** + * @author Hans Brende ([email protected]) + */ class JSONLDJavaSink implements JsonLdTripleCallback { - private static final String BNODE_PREFIX = "_:"; + private static final String BNODE_PREFIX = JsonLdConsts.BLANK_NODE_PREFIX; private final ExtractionResult handler; private final ValueFactory valueFactory; + private final String bNodeUniquifier; JSONLDJavaSink(ExtractionResult handler, ValueFactory valueFactory) { this.handler = handler; this.valueFactory = valueFactory; + this.bNodeUniquifier = "n" + UUID.randomUUID().toString().replace("-", "") + "x"; } - private Resource createResource(String arg) { - if (arg.startsWith(BNODE_PREFIX)) { - String bNodeId = arg.substring(BNODE_PREFIX.length()); - return bNodeId.isEmpty() ? valueFactory.createBNode() : valueFactory.createBNode(bNodeId); + private Resource createResource(RDFDataset.Node resource) { + String value = resource == null ? null : resource.getValue(); + if (value != null && value.startsWith(BNODE_PREFIX)) { + String bNodeId = value.substring(BNODE_PREFIX.length()); + + if (bNodeId.length() < 32) { // not globally unique; will collide with other blank node ids + if (bNodeId.isEmpty()) { + bNodeId = Integer.toHexString(System.identityHashCode(resource)); + } + bNodeId = bNodeUniquifier + bNodeId; + } + + return valueFactory.createBNode(bNodeId); } - return valueFactory.createIRI(arg); + return valueFactory.createIRI(value); } - private void writeQuad(String s, String p, Value o, String graphName) { + private void writeQuad(RDFDataset.Node sNode, RDFDataset.Node pNode, Value o, String graphName) { + if (graphName != null && graphName.startsWith(BNODE_PREFIX)) { + // TODO support blank node graph names in Any23 + return; + } + Resource s = createResource(sNode); + IRI p = valueFactory.createIRI(pNode == null ? null : pNode.getValue()); if (s == null || p == null || o == null) { return; } - - if (graphName == null) { - handler.writeTriple(createResource(s), valueFactory.createIRI(p), o); + if (graphName == null || graphName.isEmpty() || JsonLdConsts.DEFAULT.equalsIgnoreCase(graphName)) { + handler.writeTriple(s, p, o); } else { - Resource g = createResource(graphName); - if (g instanceof IRI) { - handler.writeTriple(createResource(s), valueFactory.createIRI(p), o, (IRI)g); - } - // TODO support resource graph names in Any23 + handler.writeTriple(s, p, o, valueFactory.createIRI(graphName)); } } - @Override public Object call(final RDFDataset dataset) { - for (final Entry<String, String> nextNamespace : dataset.getNamespaces().entrySet()) { + for (Entry<String, String> nextNamespace : dataset.getNamespaces().entrySet()) { handler.writeNamespace(nextNamespace.getKey(), nextNamespace.getValue()); } for (String graphName : dataset.keySet()) { - final List<RDFDataset.Quad> quads = dataset.getQuads(graphName); - if ("@default".equals(graphName)) { - graphName = null; - } - for (RDFDataset.Quad quad : quads) { - RDFDataset.Node object = quad.getObject(); - String s = quad.getSubject().getValue(); - String p = quad.getPredicate().getValue(); - String o = object.getValue(); - if (object.isLiteral()) { - String lang = object.getLanguage(); - String datatype = object.getDatatype(); + for (RDFDataset.Quad quad : dataset.getQuads(graphName)) { + RDFDataset.Node s = quad.getSubject(); + RDFDataset.Node p = quad.getPredicate(); + RDFDataset.Node o = quad.getObject(); + if (o == null || !o.isLiteral()) { + writeQuad(s, p, createResource(o), graphName); + } else { + String lang = o.getLanguage(); + String datatype = o.getDatatype(); + String literal = o.getValue(); if (lang != null && !lang.isEmpty() && (datatype == null || datatype.indexOf(':') < 0 - || RDF.LANGSTRING.stringValue().equalsIgnoreCase(datatype) - || XMLSchema.STRING.stringValue().equalsIgnoreCase(datatype))) { - writeQuad(s, p, valueFactory.createLiteral(o, lang), graphName); + || JsonLdConsts.RDF_LANGSTRING.equalsIgnoreCase(datatype) + || JsonLdConsts.XSD_STRING.equalsIgnoreCase(datatype))) { + writeQuad(s, p, valueFactory.createLiteral(literal, lang), graphName); } else if (datatype != null && !datatype.isEmpty()) { - writeQuad(s, p, valueFactory.createLiteral(o, valueFactory.createIRI(datatype)), graphName); + writeQuad(s, p, valueFactory.createLiteral(literal, valueFactory.createIRI(datatype)), graphName); } else { - writeQuad(s, p, valueFactory.createLiteral(o), graphName); + writeQuad(s, p, valueFactory.createLiteral(literal), graphName); } - } else { - writeQuad(s, p, createResource(o), graphName); } } } return null; } - } diff --git a/pom.xml b/pom.xml index d45cd06..05483c8 100644 --- a/pom.xml +++ b/pom.xml @@ -274,7 +274,7 @@ <httpcore.version>4.4.12</httpcore.version> <owlapi.version>5.1.11</owlapi.version> <poi.version>4.1.0</poi.version> - <rdf4j.version>2.4.4</rdf4j.version> + <rdf4j.version>3.0.0</rdf4j.version> <semargl.version>0.7</semargl.version> <slf4j.logger.version>1.7.28</slf4j.logger.version> <tika.version>1.22</tika.version>
