This is an automated email from the ASF dual-hosted git repository.

hansbrende pushed a commit to branch ANY23-433
in repository https://gitbox.apache.org/repos/asf/any23.git

commit 46ee071ec9424496fb975c4f27b1cde86c62f9cb
Author: Hans <[email protected]>
AuthorDate: Tue Sep 24 19:00:21 2019 -0500

    ANY23-433 remove jsonld hack
---
 .../any23/extractor/rdf/BaseRDFExtractor.java      | 25 +-----
 .../any23/extractor/rdf/JSONLDExtractor.java       | 64 +++++++++------
 .../apache/any23/extractor/rdf/JSONLDJavaSink.java | 91 ++++++++++++++++++++++
 pom.xml                                            |  2 +-
 4 files changed, 134 insertions(+), 48 deletions(-)

diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java 
b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index 2ea04a0..f2d1a47 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -93,36 +93,15 @@ public abstract class BaseRDFExtractor implements 
Extractor.ContentExtractor {
     ) throws IOException, ExtractionException {
         try {
             final RDFParser parser = getParser(extractionContext, 
extractionResult);
-
-            RDFFormat format = parser.getRDFFormat();
-
-            if (format.hasFileExtension("jsonld") || 
format.hasMIMEType("application/ld+json")) {
-                in = new JsonCleaningInputStream(in);
-            }
-
             parser.parse(in, extractionContext.getDocumentIRI().stringValue());
         } catch (Exception ex) {
-            // ANY23-420: jsonld-java can sometimes throw 
IllegalArgumentException,
-            // so don't limit catch block to RDFParseExceptions
-
-            Throwable cause = ex.getCause();
-            if (cause instanceof JsonProcessingException) {
-                JsonProcessingException err = (JsonProcessingException)cause;
-                JsonLocation loc = err.getLocation();
-                if (loc == null) {
-                    extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, 
err.getOriginalMessage(), -1L, -1L);
-                } else {
-                    extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, 
err.getOriginalMessage(), loc.getLineNr(), loc.getColumnNr());
-                }
-            } else {
-                extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, 
toString(ex), -1, -1);
-            }
+            extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, 
toString(ex), -1, -1);
         }
     }
 
     // keep private to avoid backwards compatibility woes (may move around 
later)
     @SuppressWarnings("Duplicates")
-    private static String toString(Throwable th) {
+    static String toString(Throwable th) {
         StringWriter writer = new StringWriter();
         try (PrintWriter pw = new PrintWriter(writer)) {
             th.printStackTrace(pw);
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java 
b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
index 1806adf..59998cb 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
@@ -18,14 +18,20 @@
 package org.apache.any23.extractor.rdf;
 
 import com.fasterxml.jackson.core.JsonFactory;
+import com.fasterxml.jackson.core.JsonLocation;
 import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonProcessingException;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.github.jsonldjava.core.JsonLdOptions;
+import com.github.jsonldjava.core.JsonLdProcessor;
 import com.github.jsonldjava.utils.JsonUtils;
-import org.apache.any23.extractor.ExtractionContext;
-import org.apache.any23.extractor.ExtractionResult;
-import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.*;
+import org.apache.any23.rdf.Any23ValueFactoryWrapper;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
 import org.eclipse.rdf4j.rio.RDFParser;
 
-import java.lang.reflect.Field;
+import java.io.IOException;
+import java.io.InputStream;
 
 /**
  * Concrete implementation of {@link 
org.apache.any23.extractor.Extractor.ContentExtractor}
@@ -34,27 +40,9 @@ import java.lang.reflect.Field;
  */
 public class JSONLDExtractor extends BaseRDFExtractor {
 
-    static {
-        //See https://issues.apache.org/jira/browse/ANY23-336
-        try {
-            //This field was introduced in jsonld-java version 0.12.0
-            if ((Object)JsonUtils.JSONLD_JAVA_USER_AGENT instanceof Void) {
-                throw new Error("This error will never be thrown.");
-            }
-        } catch (NoSuchFieldError th) {
-            throw new AssertionError("You have an outdated version of 
jsonld-java on the classpath. " +
-                    "Upgrade to at least version 0.12.0. See: 
https://issues.apache.org/jira/browse/ANY23-336";, th);
-        }
-
-        JsonFactory JSON_FACTORY;
-        try {
-            Field field = JsonUtils.class.getDeclaredField("JSON_FACTORY");
-            field.setAccessible(true);
-            JSON_FACTORY = (JsonFactory)field.get(null);
-        } catch (Exception e) {
-            throw new AssertionError(e);
-        }
+    private static final JsonFactory JSON_FACTORY = new JsonFactory(new 
ObjectMapper());
 
+    static {
         
JSON_FACTORY.enable(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER);
         JSON_FACTORY.disable(JsonParser.Feature.ALLOW_COMMENTS); //handled by 
JsonCleaningInputStream
         JSON_FACTORY.disable(JsonParser.Feature.ALLOW_MISSING_VALUES); 
//handled by JsonCleaningInputStream
@@ -90,4 +78,32 @@ public class JSONLDExtractor extends BaseRDFExtractor {
                 isVerifyDataType(), isStopAtFirstError(), extractionContext, 
extractionResult
         );
     }
+
+    @Override
+    public void run(ExtractionParameters extractionParameters, 
ExtractionContext extractionContext, InputStream in, ExtractionResult 
extractionResult) throws IOException, ExtractionException {
+        JSONLDJavaSink handler = new JSONLDJavaSink(extractionResult, new 
Any23ValueFactoryWrapper(
+                SimpleValueFactory.getInstance(),
+                extractionResult,
+                extractionContext.getDefaultLanguage()
+        ));
+
+        JsonLdOptions options = new 
JsonLdOptions(extractionContext.getDocumentIRI().stringValue());
+        options.useNamespaces = true;
+
+        try {
+            Object json = 
JsonUtils.fromJsonParser(JSON_FACTORY.createParser(new 
JsonCleaningInputStream(in)));
+            JsonLdProcessor.toRDF(json, handler, options);
+        } catch (JsonProcessingException e) {
+            JsonLocation loc = e.getLocation();
+            if (loc == null) {
+                extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, 
e.getOriginalMessage(), -1L, -1L);
+            } else {
+                extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, 
e.getOriginalMessage(), loc.getLineNr(), loc.getColumnNr());
+            }
+        } catch (Exception e) {
+            // ANY23-420: jsonld-java can sometimes throw 
IllegalArgumentException
+            extractionResult.notifyIssue(IssueReport.IssueLevel.FATAL, 
toString(e), -1, -1);
+        }
+    }
+
 }
diff --git 
a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java 
b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java
new file mode 100644
index 0000000..4fd5cf8
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDJavaSink.java
@@ -0,0 +1,91 @@
+package org.apache.any23.extractor.rdf;
+
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.any23.extractor.ExtractionResult;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.model.ValueFactory;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
+
+import com.github.jsonldjava.core.JsonLdTripleCallback;
+import com.github.jsonldjava.core.RDFDataset;
+
+
+class JSONLDJavaSink implements JsonLdTripleCallback {
+
+    private static final String BNODE_PREFIX = "_:";
+
+    private final ExtractionResult handler;
+    private final ValueFactory valueFactory;
+
+    JSONLDJavaSink(ExtractionResult handler, ValueFactory valueFactory) {
+        this.handler = handler;
+        this.valueFactory = valueFactory;
+    }
+
+    private Resource createResource(String arg) {
+        if (arg.startsWith(BNODE_PREFIX)) {
+            String bNodeId = arg.substring(BNODE_PREFIX.length());
+            return bNodeId.isEmpty() ? valueFactory.createBNode() : 
valueFactory.createBNode(bNodeId);
+        }
+        return valueFactory.createIRI(arg);
+    }
+
+    private void writeQuad(String s, String p, Value o, String graphName) {
+        if (s == null || p == null || o == null) {
+            return;
+        }
+
+        if (graphName == null) {
+            handler.writeTriple(createResource(s), valueFactory.createIRI(p), 
o);
+        } else {
+            Resource g = createResource(graphName);
+            if (g instanceof IRI) {
+                handler.writeTriple(createResource(s), 
valueFactory.createIRI(p), o, (IRI)g);
+            }
+            // TODO support resource graph names in Any23
+        }
+    }
+
+
+    @Override
+    public Object call(final RDFDataset dataset) {
+        for (final Entry<String, String> nextNamespace : 
dataset.getNamespaces().entrySet()) {
+            handler.writeNamespace(nextNamespace.getKey(), 
nextNamespace.getValue());
+        }
+        for (String graphName : dataset.keySet()) {
+            final List<RDFDataset.Quad> quads = dataset.getQuads(graphName);
+            if ("@default".equals(graphName)) {
+                graphName = null;
+            }
+            for (RDFDataset.Quad quad : quads) {
+                RDFDataset.Node object = quad.getObject();
+                String s = quad.getSubject().getValue();
+                String p = quad.getPredicate().getValue();
+                String o = object.getValue();
+                if (object.isLiteral()) {
+                    String lang = object.getLanguage();
+                    String datatype = object.getDatatype();
+                    if (lang != null && !lang.isEmpty() &&
+                            (datatype == null || datatype.indexOf(':') < 0
+                                    || 
RDF.LANGSTRING.stringValue().equalsIgnoreCase(datatype)
+                                    || 
XMLSchema.STRING.stringValue().equalsIgnoreCase(datatype))) {
+                        writeQuad(s, p, valueFactory.createLiteral(o, lang), 
graphName);
+                    } else if (datatype != null && !datatype.isEmpty()) {
+                        writeQuad(s, p, valueFactory.createLiteral(o, 
valueFactory.createIRI(datatype)), graphName);
+                    } else {
+                        writeQuad(s, p, valueFactory.createLiteral(o), 
graphName);
+                    }
+                } else {
+                    writeQuad(s, p, createResource(o), graphName);
+                }
+            }
+        }
+        return null;
+    }
+
+}
diff --git a/pom.xml b/pom.xml
index 125c85b..d45cd06 100644
--- a/pom.xml
+++ b/pom.xml
@@ -513,7 +513,7 @@
       <dependency>
         <groupId>com.github.jsonld-java</groupId>
         <artifactId>jsonld-java</artifactId>
-        <version>0.12.3</version>
+        <version>0.12.5</version>
       </dependency>
       <dependency>
         <groupId>org.semarglproject</groupId>

Reply via email to