TAVERNA-1044: Allow partial parsing of metadata.rdf

.. by setting errorHandler() for Jena

.. also move all W3CDTF code to RDFUtil


Project: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/commit/e1e95d19
Tree: 
http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/tree/e1e95d19
Diff: 
http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/diff/e1e95d19

Branch: refs/heads/master
Commit: e1e95d197fc91ae5d10307557a0ca9f1ef1f9e68
Parents: 0c83946
Author: Stian Soiland-Reyes <[email protected]>
Authored: Thu May 10 13:24:48 2018 +0100
Committer: Stian Soiland-Reyes <[email protected]>
Committed: Thu May 10 13:24:48 2018 +0100

----------------------------------------------------------------------
 .../manifest/combine/CombineManifest.java       | 20 ++---
 .../apache/taverna/robundle/utils/RDFUtils.java | 84 +++++++++++++-------
 2 files changed, 63 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/e1e95d19/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
----------------------------------------------------------------------
diff --git 
a/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
 
b/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
index b171835..0efdd81 100644
--- 
a/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
+++ 
b/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
@@ -67,6 +67,7 @@ import org.apache.jena.rdf.model.Statement;
 import org.apache.jena.riot.Lang;
 import org.apache.jena.riot.RDFParser;
 import org.apache.jena.riot.RiotException;
+import org.apache.jena.riot.system.ErrorHandlerFactory;
 import org.apache.taverna.robundle.Bundle;
 import org.apache.taverna.robundle.manifest.Agent;
 import org.apache.taverna.robundle.manifest.PathAnnotation;
@@ -242,8 +243,12 @@ public class CombineManifest {
                                        .base(fakeFileURI(metadata))
                                        .lang(Lang.RDFXML)
                                        .source(in)
+                                       // TAVERNA-1044 avoid bailing out on 
broken XML
+                                       
.errorHandler(ErrorHandlerFactory.errorHandlerWarn)
                                        .parse(model.getGraph());
                }
+               //System.out.println("Parsed:");
+               //model.write(System.out, "turtle");
                return model;
        }
 
@@ -351,7 +356,6 @@ public class CombineManifest {
                Model metadata;
                try {
                        metadata = parseRDF(metadataRdf);
-                       metadata.write(System.out, "turtle");
                } catch (IOException e) {
                        logger.log(WARNING, "Can't read " + metadataRdf, e);
                        return;
@@ -364,7 +368,9 @@ public class CombineManifest {
                for (URI subject : bundleSubjects()) {
                        Resource resource = 
metadata.getResource(fakeFileURI(subject));
                        if (!metadata.containsResource(resource)) {
-                               System.out.println("Nothing known about " + 
resource);
+                               // No metadata about that resource, probably 
OK, but
+                               // could be an absolute/relative path issue
+                               logger.info("No metadata.rdf triples found 
about " + resource);
                                continue;
                        }
 
@@ -391,16 +397,6 @@ public class CombineManifest {
                                createdSt = resource.getProperty(dcCreated);
                        if (createdSt != null) {
                                FileTime fileTime = 
literalAsFileTime(createdSt.getObject());
-                               if (fileTime == null && 
createdSt.getResource().isResource()) {
-                                       // perhaps one of those strange mixups 
of XML and RDF...
-                                       Property dcW3CDTF = metadata
-                                                       
.getProperty("http://purl.org/dc/terms/W3CDTF";);
-                                       Statement w3cSt = 
createdSt.getResource().getProperty(
-                                                       dcW3CDTF);
-                                       if (w3cSt != null) {
-                                               fileTime = 
literalAsFileTime(w3cSt.getObject());
-                                       }
-                               }
                                if (fileTime != null) {
                                        pathMetadata.setCreatedOn(fileTime);
                                        if (pathMetadata.getFile() != null)

http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/e1e95d19/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
----------------------------------------------------------------------
diff --git 
a/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
 
b/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
index 2b13922..f92cc71 100644
--- 
a/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
+++ 
b/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
@@ -10,9 +10,9 @@ import static java.nio.file.attribute.FileTime.fromMillis;
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -31,7 +31,9 @@ import java.util.logging.Logger;
 import org.apache.jena.datatypes.DatatypeFormatException;
 import org.apache.jena.datatypes.xsd.XSDDateTime;
 import org.apache.jena.rdf.model.Literal;
+import org.apache.jena.rdf.model.Property;
 import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
 import org.apache.jena.rdf.model.Statement;
 import org.apache.jena.vocabulary.RDF;
 
@@ -43,45 +45,69 @@ public class RDFUtils {
                if (rdfNode == null) {
                        return null;
                }
-               final Literal literal;
+               Literal literal = null;
                if (rdfNode.isLiteral()) {
+                       /* Example:
+                          <dcterms:created
+                            
rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime";>2014-06-26T10:29:00Z</dcterms:created>
+                        */
                        literal = rdfNode.asLiteral();
-               } else { 
-                       // TAVERNA-1044: not a literal, so assume a resource.
-                       // Let's climb into rdf:value if it exists, in case 
we're in a
-                       // <dct:W3CDTF> typed bnode.
-                       Statement valueStmt = 
rdfNode.asResource().getProperty(RDF.value);
-                       if (valueStmt == null) {
-                               // Make our own exception so logger gets a 
stacktrace
-                               Exception ex = new Exception("Can't find 
timestamp as literal");
-                               logger.log(Level.WARNING, 
-                                          "Expected literal or resource with 
rdf:value. not " + rdfNode, 
-                                          ex);
-                               return null;
-                       }
-                       if (valueStmt.getObject().isLiteral()) {
-                               literal = valueStmt.getObject().asLiteral();
-                       } else {        
-                               Exception ex = new Exception("Invalid timestamp 
literal");
-                               logger.log(Level.WARNING, 
-                                          "Expected rdf:value statement with 
literal object, not" + valueStmt,
-                                          ex);
-                               return null;                            
+               } else {
+                       // TAVERNA-1044: not a literal, so assume a resource
+                       // with the literal nested somehow
+                       Resource resource = rdfNode.asResource();
+
+                       // Potential type of bnode
+                       Resource dctW3CDTF = 
rdfNode.getModel().getResource("http://purl.org/dc/terms/W3CDTF";);
+                       // TAVERNA-1044 - COMBIE sometimes mis-use DCT:W3CDTF 
as if it was a property
+                       Property dctW3CDTFProp = 
rdfNode.getModel().getProperty("http://purl.org/dc/terms/W3CDTF";);
+
+                       if (resource.hasProperty(RDF.type, dctW3CDTF)) {
+                               // Semantically correct pattern, pick up 
rdf:value directly.
+                               /* Example:
+                            <dcterms:created>
+                             <dcterms:W3CDTF>
+                               <rdf:value>2018-05-10T02:38:51Z</rdf:value>
+                             </dcterms:W3CDTF>
+                           </dcterms:created>
+                                */
+                               Statement valueStmt = 
rdfNode.asResource().getProperty(RDF.value);
+                               if (valueStmt != null && 
valueStmt.getObject().isLiteral()) {
+                                       literal = valueStmt.getLiteral();
+                               }
+                       } else if (resource.hasProperty(dctW3CDTFProp)) {
+                               // TAVERNA-1044: Weird, dct:W3CDTF is a type, 
not a property,
+                               // but we'll pretend it is to be compatible 
with example in
+                               // 
http://identifiers.org/combine.specifications/omex.version-1
+
+                               /* Example:
+                               <dcterms:created rdf:parseType="Resource">
+                                 
<dcterms:W3CDTF>2014-06-26T10:29:00Z</dcterms:W3CDTF>
+                               </dcterms:created>
+                               */
+                               Statement w3cDtfStmt = 
resource.getProperty(dctW3CDTFProp);
+                               if (w3cDtfStmt != null && 
w3cDtfStmt.getObject().isLiteral()) {
+                                       literal = w3cDtfStmt.getLiteral();
+                               }
                        }
                }
+               if (literal == null) {
+                       Exception ex = new Exception("Invalid timestamp 
literal");
+                       logger.log(Level.WARNING,
+                                  "Expected literal value or dcterms:W3CDTF 
instance, not: " + rdfNode,
+                                  ex);
+                       return null;
+               }
 
                Object value = literal.getValue();
                XSDDateTime dateTime;
                if (value instanceof XSDDateTime) {
                        dateTime = (XSDDateTime) value;
                } else {
-                       logger.info("Literal not an XSDDateTime, but: " + 
value.getClass()
-                                       + " " + value);
-
+                       logger.info("Literal not an XSDDateTime, but: " + 
value.getClass() + " " + value);
                        // Try to parse it anyway
                        try {
-                               dateTime = (XSDDateTime) 
XSDdateTime.parse(literal
-                                               .getLexicalForm());
+                               dateTime = (XSDDateTime) 
XSDdateTime.parse(literal.getLexicalForm());
                        } catch (DatatypeFormatException e) {
                                logger.warning("Invalid datetime: " + literal);
                                return null;

Reply via email to