This is an automated email from the ASF dual-hosted git repository. hansbrende pushed a commit to branch ANY23-433 in repository https://gitbox.apache.org/repos/asf/any23.git
commit 46ee071ec9424496fb975c4f27b1cde86c62f9cb Author: Hans <[email protected]> AuthorDate: Tue Sep 24 19:00:21 2019 -0500 ANY23-433 remove jsonld hack --- .../any23/extractor/rdf/BaseRDFExtractor.java | 25 +----- .../any23/extractor/rdf/JSONLDExtractor.java | 64 +++++++++------ .../apache/any23/extractor/rdf/JSONLDJavaSink.java | 91 ++++++++++++++++++++++ pom.xml | 2 +- 4 files changed, 134 insertions(+), 48 deletions(-) diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java index 2ea04a0..f2d1a47 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java @@ -93,36 +93,15 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor { ) throws IOException, ExtractionException { try { final RDFParser parser = getParser(extractionContext, extractionResult); - - RDFFormat format = parser.getRDFFormat(); - - if (format.hasFileExtension("jsonld") || format.hasMIMEType("application/ld+json")) { - in = new JsonCleaningInputStream(in); - } - parser.parse(in, extractionContext.getDocumentIRI().stringValue()); } catch (Exception ex) { - // ANY23-420: jsonld-java can sometimes throw IllegalArgumentException, - // so don't limit catch block to RDFParseExceptions - - Throwable cause = ex.getCause(); - if (cause instanceof JsonProcessingException) { - JsonProcessingException err = (JsonProcessingException)cause; - JsonLocation loc = err.getLocation(); - if (loc == null) { - extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, err.getOriginalMessage(), -1L, -1L); - } else { - extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, err.getOriginalMessage(), loc.getLineNr(), loc.getColumnNr()); - } - } else { - extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, toString(ex), -1, -1); - } + extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, toString(ex), -1, -1); } } // keep private to avoid backwards compatibility woes (may move around later) @SuppressWarnings("Duplicates") - private static String toString(Throwable th) { + static String toString(Throwable th) { StringWriter writer = new StringWriter(); try (PrintWriter pw = new PrintWriter(writer)) { th.printStackTrace(pw); diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java index 1806adf..59998cb 100644 --- a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java +++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java @@ -18,14 +18,20 @@ package org.apache.any23.extractor.rdf; import com.fasterxml.jackson.core.JsonFactory; +import com.fasterxml.jackson.core.JsonLocation; import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.github.jsonldjava.core.JsonLdOptions; +import com.github.jsonldjava.core.JsonLdProcessor; import com.github.jsonldjava.utils.JsonUtils; -import org.apache.any23.extractor.ExtractionContext; -import org.apache.any23.extractor.ExtractionResult; -import org.apache.any23.extractor.ExtractorDescription; +import org.apache.any23.extractor.*; +import org.apache.any23.rdf.Any23ValueFactoryWrapper; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.rio.RDFParser; -import java.lang.reflect.Field; +import java.io.IOException; +import java.io.InputStream; /** * Concrete implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor} @@ -34,27 +40,9 @@ import java.lang.reflect.Field; */ public class JSONLDExtractor extends BaseRDFExtractor { - static { - //See https://issues.apache.org/jira/browse/ANY23-336 - try { - //This field was introduced in jsonld-java version 0.12.0 - if ((Object)JsonUtils.JSONLD_JAVA_USER_AGENT instanceof Void) { - throw new Error("This error will never be thrown."); - } - } catch (NoSuchFieldError th) { - throw new AssertionError("You have an outdated version of jsonld-java on the classpath. " + - "Upgrade to at least version 0.12.0. See: https://issues.apache.org/jira/browse/ANY23-336", th); - } - - JsonFactory JSON_FACTORY; - try { - Field field = JsonUtils.class.getDeclaredField("JSON_FACTORY"); - field.setAccessible(true); - JSON_FACTORY = (JsonFactory)field.get(null); - } catch (Exception e) { - throw new AssertionError(e); - } + private static final JsonFactory JSON_FACTORY = new JsonFactory(new ObjectMapper()); + static { JSON_FACTORY.enable(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER); JSON_FACTORY.disable(JsonParser.Feature.ALLOW_COMMENTS); //handled by JsonCleaningInputStream JSON_FACTORY.disable(JsonParser.Feature.ALLOW_MISSING_VALUES); //handled by JsonCleaningInputStream @@ -90,4 +78,32 @@ public class JSONLDExtractor extends BaseRDFExtractor { isVerifyDataType(), isStopAtFirstError(), extractionContext, extractionResult ); } + + @Override + public void run(ExtractionParameters extractionParameters, ExtractionContext extractionContext, InputStream in, ExtractionResult extractionResult) throws IOException, ExtractionException { + JSONLDJavaSink handler = new JSONLDJavaSink(extractionResult, new Any23ValueFactoryWrapper( + SimpleValueFactory.getInstance(), + extractionResult, + extractionContext.getDefaultLanguage() + )); + + JsonLdOptions options = new JsonLdOptions(extractionContext.getDocumentIRI().stringValue()); + options.useNamespaces = true; + + try { + Object json = JsonUtils.fromJsonParser(JSON_FACTORY.createParser(new JsonCleaningInputStream(in))); + JsonLdProcessor.toRDF(json, handler, options); + } catch (JsonProcessingException e) { + JsonLocation loc = e.getLocation(); + if (loc == null) { + extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, e.getOriginalMessage(), -1L, -1L); + } else { + extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, e.getOriginalMessage(), loc.getLineNr(), loc.getColumnNr()); + } + } catch (Exception e) { + // ANY23-420: jsonld-java can sometimes throw IllegalArgumentException + extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, toString(e), -1, -1); + } + } + } diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java new file mode 100644 index 0000000..4fd5cf8 --- /dev/null +++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java @@ -0,0 +1,91 @@ +package org.apache.any23.extractor.rdf; + +import java.util.List; +import java.util.Map.Entry; + +import org.apache.any23.extractor.ExtractionResult; +import org.eclipse.rdf4j.model.IRI; +import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.vocabulary.RDF; +import org.eclipse.rdf4j.model.vocabulary.XMLSchema; + +import com.github.jsonldjava.core.JsonLdTripleCallback; +import com.github.jsonldjava.core.RDFDataset; + + +class JSONLDJavaSink implements JsonLdTripleCallback { + + private static final String BNODE_PREFIX = "_:"; + + private final ExtractionResult handler; + private final ValueFactory valueFactory; + + JSONLDJavaSink(ExtractionResult handler, ValueFactory valueFactory) { + this.handler = handler; + this.valueFactory = valueFactory; + } + + private Resource createResource(String arg) { + if (arg.startsWith(BNODE_PREFIX)) { + String bNodeId = arg.substring(BNODE_PREFIX.length()); + return bNodeId.isEmpty() ? valueFactory.createBNode() : valueFactory.createBNode(bNodeId); + } + return valueFactory.createIRI(arg); + } + + private void writeQuad(String s, String p, Value o, String graphName) { + if (s == null || p == null || o == null) { + return; + } + + if (graphName == null) { + handler.writeTriple(createResource(s), valueFactory.createIRI(p), o); + } else { + Resource g = createResource(graphName); + if (g instanceof IRI) { + handler.writeTriple(createResource(s), valueFactory.createIRI(p), o, (IRI)g); + } + // TODO support resource graph names in Any23 + } + } + + + @Override + public Object call(final RDFDataset dataset) { + for (final Entry<String, String> nextNamespace : dataset.getNamespaces().entrySet()) { + handler.writeNamespace(nextNamespace.getKey(), nextNamespace.getValue()); + } + for (String graphName : dataset.keySet()) { + final List<RDFDataset.Quad> quads = dataset.getQuads(graphName); + if ("@default".equals(graphName)) { + graphName = null; + } + for (RDFDataset.Quad quad : quads) { + RDFDataset.Node object = quad.getObject(); + String s = quad.getSubject().getValue(); + String p = quad.getPredicate().getValue(); + String o = object.getValue(); + if (object.isLiteral()) { + String lang = object.getLanguage(); + String datatype = object.getDatatype(); + if (lang != null && !lang.isEmpty() && + (datatype == null || datatype.indexOf(':') < 0 + || RDF.LANGSTRING.stringValue().equalsIgnoreCase(datatype) + || XMLSchema.STRING.stringValue().equalsIgnoreCase(datatype))) { + writeQuad(s, p, valueFactory.createLiteral(o, lang), graphName); + } else if (datatype != null && !datatype.isEmpty()) { + writeQuad(s, p, valueFactory.createLiteral(o, valueFactory.createIRI(datatype)), graphName); + } else { + writeQuad(s, p, valueFactory.createLiteral(o), graphName); + } + } else { + writeQuad(s, p, createResource(o), graphName); + } + } + } + return null; + } + +} diff --git a/pom.xml b/pom.xml index 125c85b..d45cd06 100644 --- a/pom.xml +++ b/pom.xml @@ -513,7 +513,7 @@ <dependency> <groupId>com.github.jsonld-java</groupId> <artifactId>jsonld-java</artifactId> - <version>0.12.3</version> + <version>0.12.5</version> </dependency> <dependency> <groupId>org.semarglproject</groupId>
