This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit 7a0f2ef3f960016346672cc135a4dd19707ed791 Author: Andy Seaborne <[email protected]> AuthorDate: Thu Apr 25 21:46:49 2024 +0100 Permit parseType='literal' --- .../riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java | 9 ++- .../rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java | 84 ++++++++++------------ .../rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java | 84 +++++++++++----------- .../jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java | 12 ++++ .../apache/jena/riot/lang/rdfxml/rrx/TestRRX.java | 12 +++- 5 files changed, 112 insertions(+), 89 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java index 20328c4a58..18cc028c0a 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java @@ -28,6 +28,7 @@ import javax.xml.namespace.NamespaceContext; import javax.xml.namespace.QName; import org.apache.jena.atlas.io.IndentedWriter; +import org.apache.jena.atlas.logging.Log; import org.apache.jena.datatypes.RDFDatatype; import org.apache.jena.datatypes.xsd.impl.XMLLiteralType; import org.apache.jena.graph.Node; @@ -37,6 +38,7 @@ import org.apache.jena.irix.IRIException; import org.apache.jena.irix.IRIs; import org.apache.jena.irix.IRIx; import org.apache.jena.riot.RiotException; +import org.apache.jena.riot.SysRIOT; import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException; import org.apache.jena.riot.out.NodeFmtLib; import org.apache.jena.riot.system.FactoryRDF; @@ -1046,7 +1048,12 @@ public class ParserRDFXML_SAX if ( parseTypeStr == null ) return ObjectParseType.Plain; try { - return ObjectParseType.valueOf(parseTypeStr); + String parseTypeName = parseTypeStr; + if ( parseTypeName.equals("literal") ) { + Log.warn(SysRIOT.getLogger(), "Encountered rdf:parseType='literal'. Treated as rdf:parseType='literal'"); + parseTypeName = "Literal"; + } + return ObjectParseType.valueOf(parseTypeName); } catch (IllegalArgumentException ex) { throw RDFXMLparseError("Not a legal value for rdf:parseType: '"+parseTypeStr+"'", position); } diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java index 7072ba2ac9..20a0a9558f 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java @@ -31,6 +31,7 @@ import javax.xml.stream.events.*; import org.apache.commons.lang3.StringUtils; import org.apache.jena.atlas.io.IndentedWriter; +import org.apache.jena.atlas.logging.Log; import org.apache.jena.datatypes.RDFDatatype; import org.apache.jena.datatypes.xsd.impl.XMLLiteralType; import org.apache.jena.graph.Node; @@ -39,6 +40,7 @@ import org.apache.jena.graph.Triple; import org.apache.jena.irix.IRIException; import org.apache.jena.irix.IRIx; import org.apache.jena.riot.RiotException; +import org.apache.jena.riot.SysRIOT; import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException; import org.apache.jena.riot.system.ErrorHandler; import org.apache.jena.riot.system.ParserProfile; @@ -198,9 +200,10 @@ public class ParserRDFXML_StAX_EV { // whitespace characters inside elements. Skip it. private static final QName xmlQNameSpace = new QName(XMLConstants.XML_NS_URI, "space"); - private static final String parseTypeCollection = "Collection"; - private static final String parseTypeLiteral = "Literal"; - private static final String parseTypeResource = "Resource"; + private static final String parseTypeCollection = "Collection"; + private static final String parseTypeLiteral = "Literal"; + private static final String parseTypeLiteralAlt = "literal"; + private static final String parseTypeResource = "Resource"; // This is a dummy parseType for when there is no given rdf:parseType. private static final String parseTypePlain = "$$"; @@ -634,30 +637,34 @@ public class ParserRDFXML_StAX_EV { return event; } - switch(parseType) { - case parseTypeResource: { + String parseTypeName = parseType; + if ( parseTypeName.equals(parseTypeLiteralAlt) ) { + Log.warn(SysRIOT.getLogger(), "Encountered rdf:parseType='literal'. Treated as rdf:parseType='literal'"); + parseTypeName = "Literal"; + } + + switch(parseTypeName) { + case parseTypeResource -> { // Implicit <rdf:Description><rdf:Description> i.e. fresh blank node if ( TRACE ) trace.println("rdfParseType=Resource"); XMLEvent event = parseTypeResource(subject, property, emitter, startElt); return event; } - case parseTypeLiteral: { + case parseTypeLiteral -> { if ( TRACE ) trace.println("rdfParseType=Literal"); XMLEvent event = parseTypeLiteral(subject, property, emitter, startElt); return event; } - case parseTypeCollection: { + case parseTypeCollection -> { if ( TRACE ) trace.println("rdfParseType=Collection"); XMLEvent event = parseTypeCollection(subject, property, emitter, startElt); return event; } - case parseTypePlain: - // The code below. - break; - default: + case parseTypePlain -> {} // The code below. + default -> throw RDFXMLparseError("Not a legal defined rdf:parseType: "+parseType, startElt); } @@ -1177,23 +1184,21 @@ public class ParserRDFXML_StAX_EV { XMLEvent ev = xmlEventReader.nextEvent(); int evType = ev.getEventType(); switch (evType) { - case START_ELEMENT, END_ELEMENT: + case START_ELEMENT, END_ELEMENT -> { if ( EVENTS ) System.out.println("-- Tag: "+str(ev)); return ev; - case CHARACTERS, CDATA: + } + case CHARACTERS, CDATA -> { Characters chars = ev.asCharacters(); if ( ! isWhitespace(ev) ) throw RDFXMLparseError("Read "+str(ev)+" when expecting a start or end element.", ev); - // Skip - break; - case COMMENT, DTD: - // Skip - break; - //case SPACE: - //case PROCESSING_INSTRUCTION: - //case ENTITY_DECLARATION: - default: + } + case COMMENT, DTD -> { } // Skip + //case SPACE -> + //case PROCESSING_INSTRUCTION -> + //case ENTITY_DECLARATION -> + default -> // Not expecting any other type of event. throw RDFXMLparseError("Unexpected event "+str(ev), ev); } @@ -1599,33 +1604,23 @@ public class ParserRDFXML_StAX_EV { } private static String str(XMLEvent event) { - switch (event.getEventType()) { - case XMLEvent.START_ELEMENT: - return str(event.asStartElement().getName()); - case XMLEvent.END_ELEMENT: - return "/"+str(event.asEndElement().getName()); - case XMLEvent.CHARACTERS: - return "Event Characters"; + return switch (event.getEventType()) { + case XMLEvent.START_ELEMENT -> str(event.asStartElement().getName()); + case XMLEvent.END_ELEMENT -> "/"+str(event.asEndElement().getName()); + case XMLEvent.CHARACTERS -> "Event Characters"; // @see #ATTRIBUTE // @see #NAMESPACE // @see #PROCESSING_INSTRUCTION // @see #SPACE: - case XMLEvent.COMMENT: - return "Event Comment"; - case XMLEvent.START_DOCUMENT: - return "Event StartDocument"; - case XMLEvent.END_DOCUMENT: - return "Event EndDocument"; - case XMLEvent.DTD: - return "DTD"; - case XMLEvent.ENTITY_DECLARATION: - return "DTD Entity Decl"; - case XMLEvent.ENTITY_REFERENCE: - return "DTD Entity Ref"; + case XMLEvent.COMMENT -> "Event Comment"; + case XMLEvent.START_DOCUMENT -> "Event StartDocument"; + case XMLEvent.END_DOCUMENT -> "Event EndDocument"; + case XMLEvent.DTD -> "DTD"; + case XMLEvent.ENTITY_DECLARATION -> "DTD Entity Decl"; + case XMLEvent.ENTITY_REFERENCE -> "DTD Entity Ref"; // @see #DTD - default: - return ""+event.getEventType(); - } + default ->""+event.getEventType(); + }; } /** The string for the first non-whitespace index. */ @@ -1638,5 +1633,4 @@ public class ParserRDFXML_StAX_EV { } throw new RDFXMLParseException("Failed to find any non-whitespace characters"); } - } diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java index 5b32239b5c..841f6c9c78 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java @@ -35,6 +35,7 @@ import javax.xml.stream.events.XMLEvent; import org.apache.commons.lang3.StringUtils; import org.apache.jena.atlas.io.IndentedWriter; +import org.apache.jena.atlas.logging.Log; import org.apache.jena.datatypes.RDFDatatype; import org.apache.jena.datatypes.xsd.impl.XMLLiteralType; import org.apache.jena.graph.Node; @@ -43,6 +44,7 @@ import org.apache.jena.graph.Triple; import org.apache.jena.irix.IRIException; import org.apache.jena.irix.IRIx; import org.apache.jena.riot.RiotException; +import org.apache.jena.riot.SysRIOT; import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException; import org.apache.jena.riot.system.ErrorHandler; import org.apache.jena.riot.system.ParserProfile; @@ -199,9 +201,10 @@ public class ParserRDFXML_StAX_SR { // whitespace characters inside elements. Skip it. private static final QName xmlQNameSpace = new QName(XMLConstants.XML_NS_URI, "space"); - private static final String parseTypeCollection = "Collection"; - private static final String parseTypeLiteral = "Literal"; - private static final String parseTypeResource = "Resource"; + private static final String parseTypeCollection = "Collection"; + private static final String parseTypeLiteral = "Literal"; + private static final String parseTypeLiteralAlt = "literal"; + private static final String parseTypeResource = "Resource"; // This is a dummy parseType for when there is no given rdf:parseType. private static final String parseTypePlain = "$$"; @@ -609,30 +612,34 @@ public class ParserRDFXML_StAX_SR { return event; } - switch(parseType) { - case parseTypeResource: { + String parseTypeName = parseType; + if ( parseTypeName.equals(parseTypeLiteralAlt) ) { + Log.warn(SysRIOT.getLogger(), "Encountered rdf:parseType='literal'. Treated as rdf:parseType='literal'"); + parseTypeName = "Literal"; + } + + switch(parseTypeName) { + case parseTypeResource -> { // Implicit <rdf:Description><rdf:Description> i.e. fresh blank node if ( TRACE ) trace.println("rdfParseType=Resource"); int event = parseTypeResource(subject, property, emitter, location); return event; } - case parseTypeLiteral: { + case parseTypeLiteral -> { if ( TRACE ) trace.println("rdfParseType=Literal"); int event = parseTypeLiteral(subject, property, emitter, location); return event; } - case parseTypeCollection: { + case parseTypeCollection -> { if ( TRACE ) trace.println("rdfParseType=Collection"); int event = parseTypeCollection(subject, property, emitter, location); return event; } - case parseTypePlain: - // The code below. - break; - default: + case parseTypePlain -> {} // The code below. + default -> throw RDFXMLparseError("Not a legal defined rdf:parseType: "+parseType); } @@ -1145,23 +1152,26 @@ public class ParserRDFXML_StAX_SR { while(xmlSource.hasNext()) { int evType = read(); switch (evType) { - case START_ELEMENT, END_ELEMENT: + case START_ELEMENT, END_ELEMENT -> { if ( EVENTS ) System.out.println("-- Tag: "+strEventType(evType)); return evType; - case CHARACTERS, CDATA: + } + case CHARACTERS, CDATA -> { String chars = xmlSource.getText(); if ( ! isWhitespace(chars) ) throw RDFXMLparseError("Read "+nonWhitespaceForMsg(chars)+" when expecting a start or end element."); // Skip break; - case COMMENT, DTD: + } + case COMMENT, DTD -> { // Loop continue; + } //case SPACE: //case PROCESSING_INSTRUCTION: //case ENTITY_DECLARATION: - default: + default -> // Not expecting any other type of event. throw RDFXMLparseError("Unexpected event "+strEventType(evType)); } @@ -1588,33 +1598,23 @@ public class ParserRDFXML_StAX_SR { } private String strEventType(int eventType) { - switch (eventType) { - case START_ELEMENT: - return str(xmlSource.getName()); - case END_ELEMENT: - return "/"+str(xmlSource.getName()); - case CHARACTERS: - return "Event Characters"; - // @see #ATTRIBUTE - // @see #NAMESPACE - // @see #PROCESSING_INSTRUCTION - // @see #SPACE: - case COMMENT: - return "Event Comment"; - case START_DOCUMENT: - return "Event StartDocument"; - case XMLEvent.END_DOCUMENT: - return "Event EndDocument"; - case DTD: - return "DTD"; - case ENTITY_DECLARATION: - return "DTD Entity Decl"; - case ENTITY_REFERENCE: - return "DTD Entity Ref"; - // @see #DTD - default: - return ""+eventType; - } + return switch (eventType) { + case XMLEvent.START_ELEMENT -> str(xmlSource.getName()); + case XMLEvent.END_ELEMENT -> "/"+str(xmlSource.getName()); + case XMLEvent.CHARACTERS -> "Event Characters"; + // @see #ATTRIBUTE + // @see #NAMESPACE + // @see #PROCESSING_INSTRUCTION + // @see #SPACE: + case XMLEvent.COMMENT -> "Event Comment"; + case XMLEvent.START_DOCUMENT -> "Event StartDocument"; + case XMLEvent.END_DOCUMENT -> "Event EndDocument"; + case XMLEvent.DTD -> "DTD"; + case XMLEvent.ENTITY_DECLARATION -> "DTD Entity Decl"; + case XMLEvent.ENTITY_REFERENCE -> "DTD Entity Ref"; + // @see #DTD + default ->""+eventType; + }; } /** The string for the first non-whitespace index. */ diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java index c9edf7e1d7..d520886656 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java @@ -35,6 +35,7 @@ import java.util.stream.Collectors; import org.apache.jena.atlas.io.IO; import org.apache.jena.atlas.io.IOX; +import org.apache.jena.atlas.logging.LogCtl; import org.apache.jena.graph.Graph; import org.apache.jena.riot.*; import org.apache.jena.riot.lang.rdfxml.ReaderRDFXML_ARP1; @@ -330,6 +331,17 @@ public class RunTestRDFXML { checkErrorHandler(testLabel, actualErrorHandler, -1, 1, -1); } + /** Run a test expecting a warning.. */ + static void runTestExpectWarning(String testLabel, + ReaderRIOTFactory testSubjectFactory, String subjectLabel, + String filename) { + ErrorHandlerCollector actualErrorHandler = new ErrorHandlerCollector(); + LogCtl.withLevel(SysRIOT.getLogger(), "Error", ()-> + parseFile(testSubjectFactory, actualErrorHandler, filename) + ); + checkErrorHandler(testLabel, actualErrorHandler, 0, 0, 1); + } + /** * Run a test, expecting a graph as the result. * Compare with the expected error handler if that argument is not null. diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java index 3054bdf11f..91029ee0c2 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java @@ -65,7 +65,11 @@ public class TestRRX { } @Test public void error02() { - errorTest("error02.rdf"); + // Now valid. parseType="literal" -> parseType="Literal" + // because ARP behaved that way. + //errorTest("error02.rdf"); + // Warning issued. + warningTest("error02.rdf"); } @Test public void noBase01() { @@ -94,6 +98,12 @@ public class TestRRX { } } + private void warningTest(String filename) { + ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); + String fn = "testing/RIOT/rrx-files/"+filename; + RunTestRDFXML.runTestExpectWarning(filename, factory, label, fn); + } + private void errorTest(String filename) { ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); String fn = "testing/RIOT/rrx-files/"+filename;
