TAVERNA-1044: Allow partial parsing of metadata.rdf .. by setting errorHandler() for Jena
.. also move all W3CDTF code to RDFUtil Project: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/commit/e1e95d19 Tree: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/tree/e1e95d19 Diff: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/diff/e1e95d19 Branch: refs/heads/master Commit: e1e95d197fc91ae5d10307557a0ca9f1ef1f9e68 Parents: 0c83946 Author: Stian Soiland-Reyes <[email protected]> Authored: Thu May 10 13:24:48 2018 +0100 Committer: Stian Soiland-Reyes <[email protected]> Committed: Thu May 10 13:24:48 2018 +0100 ---------------------------------------------------------------------- .../manifest/combine/CombineManifest.java | 20 ++--- .../apache/taverna/robundle/utils/RDFUtils.java | 84 +++++++++++++------- 2 files changed, 63 insertions(+), 41 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/e1e95d19/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java ---------------------------------------------------------------------- diff --git a/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java b/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java index b171835..0efdd81 100644 --- a/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java +++ b/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java @@ -67,6 +67,7 @@ import org.apache.jena.rdf.model.Statement; import org.apache.jena.riot.Lang; import org.apache.jena.riot.RDFParser; import org.apache.jena.riot.RiotException; +import org.apache.jena.riot.system.ErrorHandlerFactory; import org.apache.taverna.robundle.Bundle; import org.apache.taverna.robundle.manifest.Agent; import org.apache.taverna.robundle.manifest.PathAnnotation; @@ -242,8 +243,12 @@ public class CombineManifest { .base(fakeFileURI(metadata)) .lang(Lang.RDFXML) .source(in) + // TAVERNA-1044 avoid bailing out on broken XML + .errorHandler(ErrorHandlerFactory.errorHandlerWarn) .parse(model.getGraph()); } + //System.out.println("Parsed:"); + //model.write(System.out, "turtle"); return model; } @@ -351,7 +356,6 @@ public class CombineManifest { Model metadata; try { metadata = parseRDF(metadataRdf); - metadata.write(System.out, "turtle"); } catch (IOException e) { logger.log(WARNING, "Can't read " + metadataRdf, e); return; @@ -364,7 +368,9 @@ public class CombineManifest { for (URI subject : bundleSubjects()) { Resource resource = metadata.getResource(fakeFileURI(subject)); if (!metadata.containsResource(resource)) { - System.out.println("Nothing known about " + resource); + // No metadata about that resource, probably OK, but + // could be an absolute/relative path issue + logger.info("No metadata.rdf triples found about " + resource); continue; } @@ -391,16 +397,6 @@ public class CombineManifest { createdSt = resource.getProperty(dcCreated); if (createdSt != null) { FileTime fileTime = literalAsFileTime(createdSt.getObject()); - if (fileTime == null && createdSt.getResource().isResource()) { - // perhaps one of those strange mixups of XML and RDF... - Property dcW3CDTF = metadata - .getProperty("http://purl.org/dc/terms/W3CDTF"); - Statement w3cSt = createdSt.getResource().getProperty( - dcW3CDTF); - if (w3cSt != null) { - fileTime = literalAsFileTime(w3cSt.getObject()); - } - } if (fileTime != null) { pathMetadata.setCreatedOn(fileTime); if (pathMetadata.getFile() != null) http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/e1e95d19/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java ---------------------------------------------------------------------- diff --git a/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java b/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java index 2b13922..f92cc71 100644 --- a/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java +++ b/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java @@ -10,9 +10,9 @@ import static java.nio.file.attribute.FileTime.fromMillis; * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -31,7 +31,9 @@ import java.util.logging.Logger; import org.apache.jena.datatypes.DatatypeFormatException; import org.apache.jena.datatypes.xsd.XSDDateTime; import org.apache.jena.rdf.model.Literal; +import org.apache.jena.rdf.model.Property; import org.apache.jena.rdf.model.RDFNode; +import org.apache.jena.rdf.model.Resource; import org.apache.jena.rdf.model.Statement; import org.apache.jena.vocabulary.RDF; @@ -43,45 +45,69 @@ public class RDFUtils { if (rdfNode == null) { return null; } - final Literal literal; + Literal literal = null; if (rdfNode.isLiteral()) { + /* Example: + <dcterms:created + rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-26T10:29:00Z</dcterms:created> + */ literal = rdfNode.asLiteral(); - } else { - // TAVERNA-1044: not a literal, so assume a resource. - // Let's climb into rdf:value if it exists, in case we're in a - // <dct:W3CDTF> typed bnode. - Statement valueStmt = rdfNode.asResource().getProperty(RDF.value); - if (valueStmt == null) { - // Make our own exception so logger gets a stacktrace - Exception ex = new Exception("Can't find timestamp as literal"); - logger.log(Level.WARNING, - "Expected literal or resource with rdf:value. not " + rdfNode, - ex); - return null; - } - if (valueStmt.getObject().isLiteral()) { - literal = valueStmt.getObject().asLiteral(); - } else { - Exception ex = new Exception("Invalid timestamp literal"); - logger.log(Level.WARNING, - "Expected rdf:value statement with literal object, not" + valueStmt, - ex); - return null; + } else { + // TAVERNA-1044: not a literal, so assume a resource + // with the literal nested somehow + Resource resource = rdfNode.asResource(); + + // Potential type of bnode + Resource dctW3CDTF = rdfNode.getModel().getResource("http://purl.org/dc/terms/W3CDTF"); + // TAVERNA-1044 - COMBIE sometimes mis-use DCT:W3CDTF as if it was a property + Property dctW3CDTFProp = rdfNode.getModel().getProperty("http://purl.org/dc/terms/W3CDTF"); + + if (resource.hasProperty(RDF.type, dctW3CDTF)) { + // Semantically correct pattern, pick up rdf:value directly. + /* Example: + <dcterms:created> + <dcterms:W3CDTF> + <rdf:value>2018-05-10T02:38:51Z</rdf:value> + </dcterms:W3CDTF> + </dcterms:created> + */ + Statement valueStmt = rdfNode.asResource().getProperty(RDF.value); + if (valueStmt != null && valueStmt.getObject().isLiteral()) { + literal = valueStmt.getLiteral(); + } + } else if (resource.hasProperty(dctW3CDTFProp)) { + // TAVERNA-1044: Weird, dct:W3CDTF is a type, not a property, + // but we'll pretend it is to be compatible with example in + // http://identifiers.org/combine.specifications/omex.version-1 + + /* Example: + <dcterms:created rdf:parseType="Resource"> + <dcterms:W3CDTF>2014-06-26T10:29:00Z</dcterms:W3CDTF> + </dcterms:created> + */ + Statement w3cDtfStmt = resource.getProperty(dctW3CDTFProp); + if (w3cDtfStmt != null && w3cDtfStmt.getObject().isLiteral()) { + literal = w3cDtfStmt.getLiteral(); + } } } + if (literal == null) { + Exception ex = new Exception("Invalid timestamp literal"); + logger.log(Level.WARNING, + "Expected literal value or dcterms:W3CDTF instance, not: " + rdfNode, + ex); + return null; + } Object value = literal.getValue(); XSDDateTime dateTime; if (value instanceof XSDDateTime) { dateTime = (XSDDateTime) value; } else { - logger.info("Literal not an XSDDateTime, but: " + value.getClass() - + " " + value); - + logger.info("Literal not an XSDDateTime, but: " + value.getClass() + " " + value); // Try to parse it anyway try { - dateTime = (XSDDateTime) XSDdateTime.parse(literal - .getLexicalForm()); + dateTime = (XSDDateTime) XSDdateTime.parse(literal.getLexicalForm()); } catch (DatatypeFormatException e) { logger.warning("Invalid datetime: " + literal); return null;
