This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new 53df3373d8 GH-2473: Accept rdf:parseType="Statements"
53df3373d8 is described below
commit 53df3373d861d2592d3d34d0e2652b1b5d0d3f3a
Author: Andy Seaborne <[email protected]>
AuthorDate: Thu May 16 16:25:34 2024 +0100
GH-2473: Accept rdf:parseType="Statements"
---
.../riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java | 21 +++++++++++------
.../rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java | 25 ++++++++++++--------
.../rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java | 26 ++++++++++++---------
.../jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java | 7 +++---
.../apache/jena/riot/lang/rdfxml/rrx/TestRRX.java | 15 ++++++++----
jena-arq/testing/RIOT/rrx-files/README | 12 ++++++++++
.../testing/RIOT/rrx-files/cim_statements01.rdf | 27 ++++++++++++++++++++++
jena-arq/testing/RIOT/rrx-files/error02.rdf | 14 -----------
jena-arq/testing/RIOT/rrx-files/warn01.rdf | 17 ++++++++++++++
9 files changed, 115 insertions(+), 49 deletions(-)
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
index 827b3691c4..1652836697 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
@@ -28,7 +28,6 @@ import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import org.apache.jena.atlas.io.IndentedWriter;
-import org.apache.jena.atlas.logging.Log;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
@@ -38,7 +37,6 @@ import org.apache.jena.irix.IRIException;
import org.apache.jena.irix.IRIs;
import org.apache.jena.irix.IRIx;
import org.apache.jena.riot.RiotException;
-import org.apache.jena.riot.SysRIOT;
import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
import org.apache.jena.riot.out.NodeFmtLib;
import org.apache.jena.riot.system.FactoryRDF;
@@ -252,7 +250,10 @@ class ParserRDFXML_SAX
/** Node holder for collection items. Holds the node for the last item
added in the collection at this level. */
private static class NodeHolder { Node node = null; }
- /** rdf:parseType for objects, with a default "Lexical" case */
+ /**
+ * rdf:parseType for objects, with a default "Lexical" case - see
+ * {@link #objectParseType} for alternative, non-standard names
+ */
private enum ObjectParseType { Literal, Collection, Resource,
// This is a extra parseType to indicate the "no ParseType" case
// which is a plain lexical or nested resource.
@@ -1042,15 +1043,21 @@ class ParserRDFXML_SAX
return langStr;
}
-
private ObjectParseType objectParseType(String parseTypeStr, Position
position) {
if ( parseTypeStr == null )
return ObjectParseType.Plain;
try {
String parseTypeName = parseTypeStr;
- if ( parseTypeName.equals("literal") ) {
- Log.warn(SysRIOT.getLogger(), "Encountered
rdf:parseType='literal'. Treated as rdf:parseType='literal'");
- parseTypeName = "Literal";
+ switch(parseTypeName) {
+ case "literal" -> {
+ RDFXMLparseWarning("Encountered rdf:parseType='literal'.
Treated as rdf:parseType='Literal'", position);
+ parseTypeName = "Literal";
+ }
+ // CIM (Common Information Model) - see github issue 2473
+ case "Statements" -> {
+ RDFXMLparseWarning("Encountered
rdf:parseType='Statements'. Treated as rdf:parseType='Literal'", position);
+ parseTypeName = "Literal";
+ }
}
return ObjectParseType.valueOf(parseTypeName);
} catch (IllegalArgumentException ex) {
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
index 5e35efa519..e4a0a3a80e 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
@@ -31,7 +31,6 @@ import javax.xml.stream.events.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.atlas.io.IndentedWriter;
-import org.apache.jena.atlas.logging.Log;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
@@ -40,7 +39,6 @@ import org.apache.jena.graph.Triple;
import org.apache.jena.irix.IRIException;
import org.apache.jena.irix.IRIx;
import org.apache.jena.riot.RiotException;
-import org.apache.jena.riot.SysRIOT;
import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
import org.apache.jena.riot.system.ErrorHandler;
import org.apache.jena.riot.system.ParserProfile;
@@ -200,10 +198,11 @@ class ParserRDFXML_StAX_EV {
// whitespace characters inside elements. Skip it.
private static final QName xmlQNameSpace = new
QName(XMLConstants.XML_NS_URI, "space");
- private static final String parseTypeCollection = "Collection";
- private static final String parseTypeLiteral = "Literal";
- private static final String parseTypeLiteralAlt = "literal";
- private static final String parseTypeResource = "Resource";
+ private static final String parseTypeCollection = "Collection";
+ private static final String parseTypeLiteral = "Literal";
+ private static final String parseTypeLiteralAlt = "literal";
+ private static final String parseTypeLiteralStmts = "Statements"; //
CIM Github issue 2473
+ private static final String parseTypeResource = "Resource";
// This is a dummy parseType for when there is no given rdf:parseType.
private static final String parseTypePlain = "$$";
@@ -579,7 +578,7 @@ class ParserRDFXML_StAX_EV {
}
private XMLEvent propertyElementProcess(Node subject, StartElement
startElt, Counter listElementCounter) {
- Location location = startElt.getLocation();
+ final Location location = startElt.getLocation();
Node property;
if ( qNameMatches(rdfContainerItem, startElt.getName()) )
property =
iriDirect(rdfNS+"_"+Integer.toString(listElementCounter.value++), location);
@@ -638,9 +637,15 @@ class ParserRDFXML_StAX_EV {
}
String parseTypeName = parseType;
- if ( parseTypeName.equals(parseTypeLiteralAlt) ) {
- Log.warn(SysRIOT.getLogger(), "Encountered
rdf:parseType='literal'. Treated as rdf:parseType='literal'");
- parseTypeName = "Literal";
+ switch( parseTypeName) {
+ case parseTypeLiteralAlt -> {
+ RDFXMLparseWarning("Encountered rdf:parseType='literal'.
Treated as rdf:parseType='Literal'", location);
+ parseTypeName = "Literal";
+ }
+ case parseTypeLiteralStmts -> {
+ RDFXMLparseWarning("Encountered rdf:parseType='Statements'.
Treated as rdf:parseType='Literal'", location);
+ parseTypeName = "Literal";
+ }
}
switch(parseTypeName) {
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
index ba9aa8e1cd..17a1aebafe 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
@@ -35,7 +35,6 @@ import javax.xml.stream.events.XMLEvent;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.atlas.io.IndentedWriter;
-import org.apache.jena.atlas.logging.Log;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
@@ -44,7 +43,6 @@ import org.apache.jena.graph.Triple;
import org.apache.jena.irix.IRIException;
import org.apache.jena.irix.IRIx;
import org.apache.jena.riot.RiotException;
-import org.apache.jena.riot.SysRIOT;
import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
import org.apache.jena.riot.system.ErrorHandler;
import org.apache.jena.riot.system.ParserProfile;
@@ -201,10 +199,11 @@ class ParserRDFXML_StAX_SR {
// whitespace characters inside elements. Skip it.
private static final QName xmlQNameSpace = new
QName(XMLConstants.XML_NS_URI, "space");
- private static final String parseTypeCollection = "Collection";
- private static final String parseTypeLiteral = "Literal";
- private static final String parseTypeLiteralAlt = "literal";
- private static final String parseTypeResource = "Resource";
+ private static final String parseTypeCollection = "Collection";
+ private static final String parseTypeLiteral = "Literal";
+ private static final String parseTypeLiteralAlt = "literal";
+ private static final String parseTypeLiteralStmts = "Statements"; //
CIM Github issue 2473
+ private static final String parseTypeResource = "Resource";
// This is a dummy parseType for when there is no given rdf:parseType.
private static final String parseTypePlain = "$$";
@@ -608,16 +607,21 @@ class ParserRDFXML_StAX_SR {
// Must be an empty element.
int event = nextEventAny();
if ( ! lookingAt(event, END_ELEMENT) )
- throw RDFXMLparseError("Expecting end element tag when using
rdf:resource or rdf:NodeId on a proeprty.");
+ throw RDFXMLparseError("Expecting end element tag when using
rdf:resource or rdf:NodeId on a property.");
return event;
}
String parseTypeName = parseType;
- if ( parseTypeName.equals(parseTypeLiteralAlt) ) {
- Log.warn(SysRIOT.getLogger(), "Encountered
rdf:parseType='literal'. Treated as rdf:parseType='literal'");
- parseTypeName = "Literal";
+ switch( parseTypeName) {
+ case parseTypeLiteralAlt -> {
+ RDFXMLparseWarning("Encountered rdf:parseType='literal'.
Treated as rdf:parseType='Literal'", location());
+ parseTypeName = "Literal";
+ }
+ case parseTypeLiteralStmts -> {
+ RDFXMLparseWarning("Encountered rdf:parseType='Statements'.
Treated as rdf:parseType='Literal'", location());
+ parseTypeName = "Literal";
+ }
}
-
switch(parseTypeName) {
case parseTypeResource -> {
// Implicit <rdf:Description><rdf:Description> i.e. fresh
blank node
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
index d520886656..09aaea80ec 100644
---
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
+++
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
@@ -328,18 +328,19 @@ public class RunTestRDFXML {
parseFile(testSubjectFactory, actualErrorHandler, filename);
output.printf("## Expected RiotExpection : %-4s : %s : %s",
subjectLabel, testLabel, filename);
});
- checkErrorHandler(testLabel, actualErrorHandler, -1, 1, -1);
+ checkErrorHandler(testLabel, actualErrorHandler, -1, 1, 0);
}
/** Run a test expecting a warning.. */
static void runTestExpectWarning(String testLabel,
ReaderRIOTFactory testSubjectFactory,
String subjectLabel,
+ int numWarnings,
String filename) {
ErrorHandlerCollector actualErrorHandler = new ErrorHandlerCollector();
LogCtl.withLevel(SysRIOT.getLogger(), "Error", ()->
parseFile(testSubjectFactory, actualErrorHandler, filename)
);
- checkErrorHandler(testLabel, actualErrorHandler, 0, 0, 1);
+ checkErrorHandler(testLabel, actualErrorHandler, numWarnings, 0, 0);
}
/**
@@ -446,7 +447,7 @@ public class RunTestRDFXML {
/** Counts check of an error handler */
private static void checkErrorHandler(String testLabel,
ErrorHandlerCollector errorHandler, int countWarnings, int countErrors, int
countFatals) {
if ( countFatals >= 0 )
- assertEquals("Fatal message counts different", countWarnings,
errorHandler.fatals.size());
+ assertEquals("Fatal message counts different", countFatals,
errorHandler.fatals.size());
if ( countErrors >= 0 )
assertEquals("Error message counts different", countErrors,
errorHandler.errors.size());
if ( countWarnings >= 0 )
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
index 91029ee0c2..00eac3a12b 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
@@ -64,12 +64,19 @@ public class TestRRX {
errorTest("error01.rdf");
}
- @Test public void error02() {
+ @Test public void warn_literal() {
// Now valid. parseType="literal" -> parseType="Literal"
// because ARP behaved that way.
+ // Warning issued.
+ warningTest("warn01.rdf", 1);
+ }
+
+ @Test public void cim_statements01() {
+ // parseType="Statements"
+ // because ARP behaved that way.
//errorTest("error02.rdf");
// Warning issued.
- warningTest("error02.rdf");
+ warningTest("cim_statements01.rdf", 2);
}
@Test public void noBase01() {
@@ -98,10 +105,10 @@ public class TestRRX {
}
}
- private void warningTest(String filename) {
+ private void warningTest(String filename, int warnings) {
ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
String fn = "testing/RIOT/rrx-files/"+filename;
- RunTestRDFXML.runTestExpectWarning(filename, factory, label, fn);
+ RunTestRDFXML.runTestExpectWarning(filename, factory, label, warnings,
fn);
}
private void errorTest(String filename) {
diff --git a/jena-arq/testing/RIOT/rrx-files/README
b/jena-arq/testing/RIOT/rrx-files/README
new file mode 100644
index 0000000000..79eefb4d90
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx-files/README
@@ -0,0 +1,12 @@
+# Extensions to RDF/XML
+
+
+CIM - uses rdf:parseType="Statements" for rdf:parseType="Literal"
+ https://github.com/apache/jena/issues/2473
+ rdfxml-cim-1.rdf
+
+Lower case "l" for parse type literal
+ A common mistake.
+ https://github.com/apache/jena/issues/2430
+
https://github.com/apache/jena/pull/2431/commits/a324fd4c1502c410fdb91c557ed2785795acbba3
+ rdfxml-literal-1.rdf
diff --git a/jena-arq/testing/RIOT/rrx-files/cim_statements01.rdf
b/jena-arq/testing/RIOT/rrx-files/cim_statements01.rdf
new file mode 100644
index 0000000000..a1d223b25c
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx-files/cim_statements01.rdf
@@ -0,0 +1,27 @@
+<?xml version='1.0'?>
+<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0
-->
+
+<rdf:RDF
+ xmlns:cim="http://iec.ch/TC57/2014/CIM-schema-cim16#"
+ xmlns:dm="http://iec.ch/2002/schema/CIM_difference_model#"
+ xmlns:md="http://iec.ch/TC57/61970-552/ModelDescription/1#"
+ xmlns:meta="http://iec.ch/TC57/2014/CIM-schema-cim16#"
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns="http://local/"
+ xmlns:ex="http://example/"
+ xml:base="http://base/">
+
+ <!-- parse type "Statements" -->
+ <dm:DifferenceModel rdf:about="#_248c809d-1d7b-397c-830f-6928007ae6d9">
+ <dm:forwardDifferences rdf:parseType="Statements">
+ <cim:A rdf:about="#_individual-A-1">
+ <cim:A-2-B rdf:resource="#_individual-B-1"/>
+ </cim:A>
+ <cim:B rdf:about="#_individual-B-1"/>
+ <cim:D rdf:about="#_individual-D-1"/>
+ </dm:forwardDifferences>
+ <dm:reverseDifferences rdf:parseType="Statements">
+ </dm:reverseDifferences>
+ </dm:DifferenceModel>
+
+</rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/error02.rdf
b/jena-arq/testing/RIOT/rrx-files/error02.rdf
deleted file mode 100644
index 1cbee6fd55..0000000000
--- a/jena-arq/testing/RIOT/rrx-files/error02.rdf
+++ /dev/null
@@ -1,14 +0,0 @@
-<?xml version='1.0'?>
-<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0
-->
-
-<rdf:RDF
- xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:ex="http://example/"
- >
-
- <!-- rdf:parseType not recognized (should be capitalized "Literal") -->
- <rdf:Description rdf:about="http://host/subject">
- <ex:property rdf:parseType="literal"/>
- </rdf:Description>
-
-</rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/warn01.rdf
b/jena-arq/testing/RIOT/rrx-files/warn01.rdf
new file mode 100644
index 0000000000..988705dd9f
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx-files/warn01.rdf
@@ -0,0 +1,17 @@
+<?xml version='1.0'?>
+<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0
-->
+
+<rdf:RDF
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns="http://local/"
+ xmlns:ex="http://example/"
+ xml:base="http://base/">
+
+ <rdf:Description rdf:about="http://example.org/basket">
+ <!-- Lower case "literal - it should be uppercase "Literal" -->
+ <ex:xmlliteral rdf:parseType="literal">
+ <innerTag>Inner Tag</innerTag>
+ </ex:xmlliteral>
+ </rdf:Description>
+
+</rdf:RDF>