This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git


The following commit(s) were added to refs/heads/main by this push:
     new 53df3373d8 GH-2473: Accept rdf:parseType="Statements"
53df3373d8 is described below

commit 53df3373d861d2592d3d34d0e2652b1b5d0d3f3a
Author: Andy Seaborne <[email protected]>
AuthorDate: Thu May 16 16:25:34 2024 +0100

    GH-2473: Accept rdf:parseType="Statements"
---
 .../riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java     | 21 +++++++++++------
 .../rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java   | 25 ++++++++++++--------
 .../rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java   | 26 ++++++++++++---------
 .../jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java   |  7 +++---
 .../apache/jena/riot/lang/rdfxml/rrx/TestRRX.java  | 15 ++++++++----
 jena-arq/testing/RIOT/rrx-files/README             | 12 ++++++++++
 .../testing/RIOT/rrx-files/cim_statements01.rdf    | 27 ++++++++++++++++++++++
 jena-arq/testing/RIOT/rrx-files/error02.rdf        | 14 -----------
 jena-arq/testing/RIOT/rrx-files/warn01.rdf         | 17 ++++++++++++++
 9 files changed, 115 insertions(+), 49 deletions(-)

diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
index 827b3691c4..1652836697 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
@@ -28,7 +28,6 @@ import javax.xml.namespace.NamespaceContext;
 import javax.xml.namespace.QName;
 
 import org.apache.jena.atlas.io.IndentedWriter;
-import org.apache.jena.atlas.logging.Log;
 import org.apache.jena.datatypes.RDFDatatype;
 import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
 import org.apache.jena.graph.Node;
@@ -38,7 +37,6 @@ import org.apache.jena.irix.IRIException;
 import org.apache.jena.irix.IRIs;
 import org.apache.jena.irix.IRIx;
 import org.apache.jena.riot.RiotException;
-import org.apache.jena.riot.SysRIOT;
 import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
 import org.apache.jena.riot.out.NodeFmtLib;
 import org.apache.jena.riot.system.FactoryRDF;
@@ -252,7 +250,10 @@ class ParserRDFXML_SAX
     /** Node holder for collection items. Holds the node for the last item 
added in the collection at this level. */
     private static class NodeHolder { Node node = null; }
 
-    /** rdf:parseType for objects, with a default "Lexical" case */
+    /**
+     * rdf:parseType for objects, with a default "Lexical" case - see
+     * {@link #objectParseType} for alternative, non-standard names
+     */
     private enum ObjectParseType { Literal, Collection, Resource,
         // This is a extra parseType to indicate the "no ParseType" case
         // which is a plain lexical or nested resource.
@@ -1042,15 +1043,21 @@ class ParserRDFXML_SAX
         return langStr;
     }
 
-
     private ObjectParseType objectParseType(String parseTypeStr, Position 
position) {
         if ( parseTypeStr == null )
             return ObjectParseType.Plain;
         try {
             String parseTypeName = parseTypeStr;
-            if ( parseTypeName.equals("literal") ) {
-                Log.warn(SysRIOT.getLogger(), "Encountered 
rdf:parseType='literal'. Treated as rdf:parseType='literal'");
-                parseTypeName = "Literal";
+            switch(parseTypeName) {
+                case "literal" -> {
+                    RDFXMLparseWarning("Encountered rdf:parseType='literal'. 
Treated as rdf:parseType='Literal'", position);
+                    parseTypeName = "Literal";
+                }
+                // CIM (Common Information Model) - see github issue 2473
+                case "Statements" -> {
+                    RDFXMLparseWarning("Encountered 
rdf:parseType='Statements'. Treated as rdf:parseType='Literal'", position);
+                    parseTypeName = "Literal";
+                }
             }
             return ObjectParseType.valueOf(parseTypeName);
         } catch (IllegalArgumentException ex) {
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
index 5e35efa519..e4a0a3a80e 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
@@ -31,7 +31,6 @@ import javax.xml.stream.events.*;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.jena.atlas.io.IndentedWriter;
-import org.apache.jena.atlas.logging.Log;
 import org.apache.jena.datatypes.RDFDatatype;
 import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
 import org.apache.jena.graph.Node;
@@ -40,7 +39,6 @@ import org.apache.jena.graph.Triple;
 import org.apache.jena.irix.IRIException;
 import org.apache.jena.irix.IRIx;
 import org.apache.jena.riot.RiotException;
-import org.apache.jena.riot.SysRIOT;
 import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
 import org.apache.jena.riot.system.ErrorHandler;
 import org.apache.jena.riot.system.ParserProfile;
@@ -200,10 +198,11 @@ class ParserRDFXML_StAX_EV {
     // whitespace characters inside elements. Skip it.
     private static final QName xmlQNameSpace = new 
QName(XMLConstants.XML_NS_URI, "space");
 
-    private static final String parseTypeCollection  = "Collection";
-    private static final String parseTypeLiteral     = "Literal";
-    private static final String parseTypeLiteralAlt  = "literal";
-    private static final String parseTypeResource    = "Resource";
+    private static final String parseTypeCollection    = "Collection";
+    private static final String parseTypeLiteral       = "Literal";
+    private static final String parseTypeLiteralAlt    = "literal";
+    private static final String parseTypeLiteralStmts  = "Statements";    // 
CIM Github issue 2473
+    private static final String parseTypeResource      = "Resource";
     // This is a dummy parseType for when there is no given rdf:parseType.
     private static final String parseTypePlain = "$$";
 
@@ -579,7 +578,7 @@ class ParserRDFXML_StAX_EV {
     }
 
     private XMLEvent propertyElementProcess(Node subject, StartElement 
startElt, Counter listElementCounter) {
-        Location location = startElt.getLocation();
+        final Location location = startElt.getLocation();
         Node property;
         if ( qNameMatches(rdfContainerItem, startElt.getName()) )
             property = 
iriDirect(rdfNS+"_"+Integer.toString(listElementCounter.value++), location);
@@ -638,9 +637,15 @@ class ParserRDFXML_StAX_EV {
         }
 
         String parseTypeName = parseType;
-        if ( parseTypeName.equals(parseTypeLiteralAlt) ) {
-            Log.warn(SysRIOT.getLogger(), "Encountered 
rdf:parseType='literal'. Treated as rdf:parseType='literal'");
-            parseTypeName = "Literal";
+        switch( parseTypeName) {
+            case parseTypeLiteralAlt -> {
+                RDFXMLparseWarning("Encountered rdf:parseType='literal'. 
Treated as rdf:parseType='Literal'", location);
+                parseTypeName = "Literal";
+            }
+            case parseTypeLiteralStmts -> {
+                RDFXMLparseWarning("Encountered rdf:parseType='Statements'. 
Treated as rdf:parseType='Literal'", location);
+                parseTypeName = "Literal";
+            }
         }
 
         switch(parseTypeName) {
diff --git 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
index ba9aa8e1cd..17a1aebafe 100644
--- 
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
+++ 
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
@@ -35,7 +35,6 @@ import javax.xml.stream.events.XMLEvent;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.jena.atlas.io.IndentedWriter;
-import org.apache.jena.atlas.logging.Log;
 import org.apache.jena.datatypes.RDFDatatype;
 import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
 import org.apache.jena.graph.Node;
@@ -44,7 +43,6 @@ import org.apache.jena.graph.Triple;
 import org.apache.jena.irix.IRIException;
 import org.apache.jena.irix.IRIx;
 import org.apache.jena.riot.RiotException;
-import org.apache.jena.riot.SysRIOT;
 import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
 import org.apache.jena.riot.system.ErrorHandler;
 import org.apache.jena.riot.system.ParserProfile;
@@ -201,10 +199,11 @@ class ParserRDFXML_StAX_SR {
     // whitespace characters inside elements. Skip it.
     private static final QName xmlQNameSpace = new 
QName(XMLConstants.XML_NS_URI, "space");
 
-    private static final String parseTypeCollection  = "Collection";
-    private static final String parseTypeLiteral     = "Literal";
-    private static final String parseTypeLiteralAlt  = "literal";
-    private static final String parseTypeResource    = "Resource";
+    private static final String parseTypeCollection    = "Collection";
+    private static final String parseTypeLiteral       = "Literal";
+    private static final String parseTypeLiteralAlt    = "literal";
+    private static final String parseTypeLiteralStmts  = "Statements";    // 
CIM Github issue 2473
+    private static final String parseTypeResource      = "Resource";
     // This is a dummy parseType for when there is no given rdf:parseType.
     private static final String parseTypePlain = "$$";
 
@@ -608,16 +607,21 @@ class ParserRDFXML_StAX_SR {
             // Must be an empty element.
             int event = nextEventAny();
             if ( ! lookingAt(event, END_ELEMENT) )
-                throw RDFXMLparseError("Expecting end element tag when using 
rdf:resource or rdf:NodeId on a proeprty.");
+                throw RDFXMLparseError("Expecting end element tag when using 
rdf:resource or rdf:NodeId on a property.");
             return event;
         }
 
         String parseTypeName = parseType;
-        if ( parseTypeName.equals(parseTypeLiteralAlt) ) {
-            Log.warn(SysRIOT.getLogger(), "Encountered 
rdf:parseType='literal'. Treated as rdf:parseType='literal'");
-            parseTypeName = "Literal";
+        switch( parseTypeName) {
+            case parseTypeLiteralAlt -> {
+                RDFXMLparseWarning("Encountered rdf:parseType='literal'. 
Treated as rdf:parseType='Literal'", location());
+                parseTypeName = "Literal";
+            }
+            case parseTypeLiteralStmts -> {
+                RDFXMLparseWarning("Encountered rdf:parseType='Statements'. 
Treated as rdf:parseType='Literal'", location());
+                parseTypeName = "Literal";
+            }
         }
-
         switch(parseTypeName) {
             case parseTypeResource -> {
                 // Implicit <rdf:Description><rdf:Description> i.e. fresh 
blank node
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
index d520886656..09aaea80ec 100644
--- 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
+++ 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
@@ -328,18 +328,19 @@ public class RunTestRDFXML {
             parseFile(testSubjectFactory, actualErrorHandler, filename);
             output.printf("## Expected RiotExpection : %-4s : %s : %s", 
subjectLabel, testLabel, filename);
         });
-        checkErrorHandler(testLabel, actualErrorHandler, -1, 1, -1);
+        checkErrorHandler(testLabel, actualErrorHandler, -1, 1, 0);
     }
 
     /** Run a test expecting a warning.. */
     static void runTestExpectWarning(String testLabel,
                                      ReaderRIOTFactory testSubjectFactory, 
String subjectLabel,
+                                     int numWarnings,
                                      String filename) {
         ErrorHandlerCollector actualErrorHandler = new ErrorHandlerCollector();
         LogCtl.withLevel(SysRIOT.getLogger(), "Error", ()->
             parseFile(testSubjectFactory, actualErrorHandler, filename)
             );
-        checkErrorHandler(testLabel, actualErrorHandler, 0, 0, 1);
+        checkErrorHandler(testLabel, actualErrorHandler, numWarnings, 0, 0);
     }
 
     /**
@@ -446,7 +447,7 @@ public class RunTestRDFXML {
     /** Counts check of an error handler */
     private static void checkErrorHandler(String testLabel, 
ErrorHandlerCollector errorHandler, int countWarnings, int countErrors, int 
countFatals) {
         if ( countFatals >= 0 )
-            assertEquals("Fatal message counts different", countWarnings, 
errorHandler.fatals.size());
+            assertEquals("Fatal message counts different", countFatals, 
errorHandler.fatals.size());
         if ( countErrors >= 0 )
             assertEquals("Error message counts different", countErrors, 
errorHandler.errors.size());
         if ( countWarnings >= 0 )
diff --git 
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java 
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
index 91029ee0c2..00eac3a12b 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
@@ -64,12 +64,19 @@ public class TestRRX {
         errorTest("error01.rdf");
     }
 
-    @Test public void error02() {
+    @Test public void warn_literal() {
         // Now valid. parseType="literal" -> parseType="Literal"
         // because ARP behaved that way.
+        // Warning issued.
+        warningTest("warn01.rdf", 1);
+    }
+
+    @Test public void cim_statements01() {
+        // parseType="Statements"
+        // because ARP behaved that way.
         //errorTest("error02.rdf");
         // Warning issued.
-        warningTest("error02.rdf");
+        warningTest("cim_statements01.rdf", 2);
     }
 
     @Test public void noBase01() {
@@ -98,10 +105,10 @@ public class TestRRX {
         }
     }
 
-    private void warningTest(String filename) {
+    private void warningTest(String filename, int warnings) {
         ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
         String fn = "testing/RIOT/rrx-files/"+filename;
-        RunTestRDFXML.runTestExpectWarning(filename, factory, label, fn);
+        RunTestRDFXML.runTestExpectWarning(filename, factory, label, warnings, 
fn);
     }
 
     private void errorTest(String filename) {
diff --git a/jena-arq/testing/RIOT/rrx-files/README 
b/jena-arq/testing/RIOT/rrx-files/README
new file mode 100644
index 0000000000..79eefb4d90
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx-files/README
@@ -0,0 +1,12 @@
+# Extensions to RDF/XML
+
+
+CIM - uses rdf:parseType="Statements" for rdf:parseType="Literal"
+  https://github.com/apache/jena/issues/2473
+  rdfxml-cim-1.rdf
+
+Lower case "l" for parse type literal
+  A common mistake.
+    https://github.com/apache/jena/issues/2430
+    
https://github.com/apache/jena/pull/2431/commits/a324fd4c1502c410fdb91c557ed2785795acbba3
+  rdfxml-literal-1.rdf
diff --git a/jena-arq/testing/RIOT/rrx-files/cim_statements01.rdf 
b/jena-arq/testing/RIOT/rrx-files/cim_statements01.rdf
new file mode 100644
index 0000000000..a1d223b25c
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx-files/cim_statements01.rdf
@@ -0,0 +1,27 @@
+<?xml version='1.0'?>
+<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 
-->
+
+<rdf:RDF
+    xmlns:cim="http://iec.ch/TC57/2014/CIM-schema-cim16#";
+    xmlns:dm="http://iec.ch/2002/schema/CIM_difference_model#";
+    xmlns:md="http://iec.ch/TC57/61970-552/ModelDescription/1#";
+    xmlns:meta="http://iec.ch/TC57/2014/CIM-schema-cim16#";
+    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+    xmlns="http://local/"; 
+    xmlns:ex="http://example/"; 
+    xml:base="http://base/";>
+
+  <!-- parse type "Statements" -->
+  <dm:DifferenceModel rdf:about="#_248c809d-1d7b-397c-830f-6928007ae6d9">      
          
+    <dm:forwardDifferences rdf:parseType="Statements">
+      <cim:A rdf:about="#_individual-A-1">
+        <cim:A-2-B rdf:resource="#_individual-B-1"/>
+      </cim:A>
+      <cim:B rdf:about="#_individual-B-1"/>
+      <cim:D rdf:about="#_individual-D-1"/>
+    </dm:forwardDifferences>
+    <dm:reverseDifferences rdf:parseType="Statements">
+    </dm:reverseDifferences>
+  </dm:DifferenceModel>
+
+</rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/error02.rdf 
b/jena-arq/testing/RIOT/rrx-files/error02.rdf
deleted file mode 100644
index 1cbee6fd55..0000000000
--- a/jena-arq/testing/RIOT/rrx-files/error02.rdf
+++ /dev/null
@@ -1,14 +0,0 @@
-<?xml version='1.0'?>
-<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 
-->
-
-<rdf:RDF
-    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
-    xmlns:ex="http://example/";
-    >
-
-  <!-- rdf:parseType not recognized (should be capitalized "Literal") -->
-  <rdf:Description rdf:about="http://host/subject";>
-    <ex:property rdf:parseType="literal"/>
-  </rdf:Description>
-
-</rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/warn01.rdf 
b/jena-arq/testing/RIOT/rrx-files/warn01.rdf
new file mode 100644
index 0000000000..988705dd9f
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx-files/warn01.rdf
@@ -0,0 +1,17 @@
+<?xml version='1.0'?>
+<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 
-->
+
+<rdf:RDF
+    xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+    xmlns="http://local/"; 
+    xmlns:ex="http://example/"; 
+    xml:base="http://base/";>
+
+  <rdf:Description rdf:about="http://example.org/basket";>
+    <!-- Lower case "literal - it should be uppercase "Literal" -->
+    <ex:xmlliteral rdf:parseType="literal">
+      <innerTag>Inner Tag</innerTag>
+    </ex:xmlliteral>
+  </rdf:Description> 
+
+</rdf:RDF>

Reply via email to