This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit 978891e87aaf15e989416e7907dfb9c2f8d66b02 Author: Andy Seaborne <[email protected]> AuthorDate: Sat Aug 10 15:18:59 2024 +0100 RRX: Consistent behaviour for relative base URIs --- .../{ParserRDFXML_SAX.java => ParserRRX_SAX.java} | 133 +++++++++++---------- .../riot/lang/rdfxml/rrx/ReaderRDFXML_SAX.java | 2 +- ...rRDFXML_StAX_EV.java => ParserRRX_StAX_EV.java} | 85 ++++++++----- .../rdfxml/rrx_stax_ev/ReaderRDFXML_StAX_EV.java | 2 +- ...rRDFXML_StAX_SR.java => ParserRRX_StAX_SR.java} | 93 ++++++++------ .../rdfxml/rrx_stax_sr/ReaderRDFXML_StAX_SR.java | 2 +- .../jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java | 4 +- .../apache/jena/riot/lang/rdfxml/rrx/TestRRX.java | 62 ++++++---- ...-external-base.rdf => base-external-needed.rdf} | 0 .../{file-external-base.rdf => base-inner.rdf} | 5 +- .../rrx-files/{file-no-base.rdf => base-none.rdf} | 0 .../src/main/java/org/apache/jena/irix/IRIx.java | 1 - 12 files changed, 234 insertions(+), 155 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRRX_SAX.java similarity index 95% rename from jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java rename to jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRRX_SAX.java index c52d843727..aca5212fcb 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRRX_SAX.java @@ -27,6 +27,7 @@ import javax.xml.XMLConstants; import javax.xml.namespace.NamespaceContext; import javax.xml.namespace.QName; +import org.apache.commons.lang3.StringUtils; import org.apache.jena.atlas.io.IndentedWriter; import org.apache.jena.atlas.lib.EscapeStr; import org.apache.jena.datatypes.RDFDatatype; @@ -35,7 +36,6 @@ import org.apache.jena.graph.Node; import org.apache.jena.graph.NodeFactory; import org.apache.jena.graph.Triple; import org.apache.jena.irix.IRIException; -import org.apache.jena.irix.IRIs; import org.apache.jena.irix.IRIx; import org.apache.jena.riot.RiotException; import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException; @@ -52,7 +52,7 @@ import org.xml.sax.ext.DeclHandler; import org.xml.sax.ext.EntityResolver2; import org.xml.sax.ext.LexicalHandler; -class ParserRDFXML_SAX +class ParserRRX_SAX implements ContentHandler, ErrorHandler, @@ -426,7 +426,7 @@ class ParserRDFXML_SAX // // Forming objects. // private ParseType parseType = null; - ParserRDFXML_SAX(String xmlBase, ParserProfile parserProfile, StreamRDF destination, Context context) { + ParserRRX_SAX(String xmlBase, ParserProfile parserProfile, StreamRDF destination, Context context) { // Debug if ( ReaderRDFXML_SAX.TRACE ) { IndentedWriter out1 = IndentedWriter.stdout.clone(); @@ -665,8 +665,7 @@ class ParserRDFXML_SAX * ++ nodeElementURIs anyURI - ( coreSyntaxTerms | rdf:li | oldTerms ) */ private void startNodeElement(String namespaceURI, String localName, String qName, Attributes attributes, Position position) { - // Subject - maybe inside rdf:RDF - + // Subject String rdfResourceStr = attributes.getValue(rdfNS, rdfResource); if ( rdfResourceStr != null ) throw RDFXMLparseError("rdf:resource not allowed as attribute here: "+qName, position); @@ -934,14 +933,21 @@ class ParserRDFXML_SAX } private void processBaseAndLang(Attributes attributes, Position position) { - // Too early. - IRIx base = xmlBase(attributes, position); - if ( base != null ) { - currentBase = base; + //resolves. + IRIx xmlBase = xmlBase(attributes, position); + String xmlLang = xmlLang(attributes, position); + if ( ReaderRDFXML_SAX.TRACE ) { + if ( xmlBase != null ) + trace.printf("+ BASE <%s>\n", xmlBase); + if ( xmlLang != null ) + trace.printf("+ LANG @%s\n", xmlLang); + } + if ( xmlBase != null ) { + currentBase = xmlBase;// resolve. } - String lang = xmlLang(attributes, position); - if ( lang != null ) - currentLang = lang; + + if ( xmlLang != null ) + currentLang = xmlLang; } // Property attributes. @@ -951,9 +957,6 @@ class ParserRDFXML_SAX boolean isPropertyAttribute = checkPropertyAttribute(attributes, i, false, position); if ( ! isPropertyAttribute ) continue; - String namespace = attributes.getURI(i); - String localName = attributes.getLocalName(i); - String qName = attributes.getQName(i); propertyAttribute(subject, attributes, i, position); } } @@ -972,32 +975,34 @@ class ParserRDFXML_SAX /** Return true if this is a property attribute. */ private boolean checkPropertyAttribute(Attributes attributes, int index, boolean outputWarnings, Position position) { - String namespace = attributes.getURI(index); + String namespaceURI = attributes.getURI(index); String localName = attributes.getLocalName(index); String qName = attributes.getQName(index); - if ( namespace == null || namespace.isEmpty() ) { - // In SAX, xmlns: is qname, but namespace and local name are "". + if ( StringUtils.isBlank(namespaceURI) ) { + // Note about XML: The empty string namespace does not apply to XML attributes, + // only XML elements. ":attr" is not legal XML. //RDFXMLparseError("XML attribute '"+qName+"' used for RDF property attribute (no namespace)", position); + if ( outputWarnings ){ if ( ! localName.isEmpty() ) // Skip XML namespace declarations. RDFXMLparseWarning("XML attribute '"+qName+"' used for RDF property attribute - ignored", position); } return false; } - if ( isSyntaxAttribute(namespace, localName) ) + if ( isSyntaxAttribute(namespaceURI, localName) ) return false; - if ( ! allowedPropertyAttributeURIs(namespace, localName) ) + if ( ! allowedPropertyAttributeURIs(namespaceURI, localName) ) throw RDFXMLparseError("Not allowed as a property attribute: '"+attributes.getQName(index)+"'", position); - if ( outputWarnings && isNotRecognizedRDFproperty(namespace, localName) ) + if ( outputWarnings && isNotRecognizedRDFproperty(namespaceURI, localName) ) RDFXMLparseWarning(qName+" is not a recognized RDF term for a property attribute", position); - if ( isXMLQName(namespace, localName) ) + if ( isXMLQName(namespaceURI, localName) ) return false; - if ( isXMLNamespace(namespace) ) { + if ( isXMLNamespace(namespaceURI) ) { // Unrecognized qnames in the XMLnamespace are a warning and are ignored. RDFXMLparseWarning("Unrecognized XML attribute: '"+attributes.getQName(index)+"'", position); return false; @@ -1010,48 +1015,48 @@ class ParserRDFXML_SAX /** Output for a property attribute (already checked) */ private void propertyAttribute(Node subject, Attributes attributes, int index, Position position) { - String namespace = attributes.getURI(index); + String namespaceURI = attributes.getURI(index); String localName = attributes.getLocalName(index); String value = attributes.getValue(index); - if ( rdfNS.equals(namespace) ) { + if ( rdfNS.equals(namespaceURI) ) { if ( rdfType.equals(localName) ) { Node type = iriResolve(value, position); emit(subject, Nodes.type, type, position); return; } } - Node property = qNameToIRI(namespace, localName, position); + Node property = qNameToIRI(namespaceURI, localName, position); String lex = value; Node object = literal(lex, currentLang, position); emit(subject, property, object, position); } + /** + * Generate a new base IRIx. + * If this is relative, issue a warning. + * It it is an error to use it and the error is generated at the point of use. + */ private IRIx xmlBase(Attributes attributes, Position position) { String baseStr = attributes.getValue(xmlNS, xmlBaseLN); - return xmlBase(baseStr); - } - - private IRIx xmlBase(String baseStr) { if ( baseStr == null ) return null; - if ( currentBase == null ) - return null; - return currentBase.resolve(baseStr); - } - - private String xmlBaseStr(Attributes attributes, Position position) { - String baseStr = attributes.getValue(xmlNS, xmlBaseLN); - if ( baseStr == null ) - return null; - return IRIs.resolve(currentBase, baseStr); + IRIx irix = resolveIRIxAny(baseStr, position); + if ( irix.isRelative() ) + //throw RDFXMLparseError("BANG", position); + RDFXMLparseWarning("Relative URI for base: <"+baseStr+">", position); + return irix; } + /** + * Determine the xml:lang. + */ private String xmlLang(Attributes attributes, Position position) { + // We use null for "no language" so that explicit + // xml:lang="" is different. String langStr = attributes.getValue(xmlNS, xmlLangLN); if ( langStr == null ) - return null; // We use null for "no language" so that explicit - // xml:lang="" is different. + return null; return langStr; } @@ -1264,14 +1269,17 @@ class ParserRDFXML_SAX // ---- Creating terms. - private Node qNameToIRI(String namespace, String localName, Position position) { - String uriStr = qNameToIRI(namespace, localName); + private Node qNameToIRI(String namespaceURI, String localName, Position position) { + if ( StringUtils.isBlank(namespaceURI) ) { + RDFXMLparseWarning("Unqualified typed nodes are not allowed: <"+localName+">", position); + } + String uriStr = qNameToIRI(namespaceURI, localName); return iri(uriStr, position); } - /** This is the RDF rule for creating an IRI from QName. */ - private String qNameToIRI(String namespace, String localName) { - String iriStr = namespace + localName; + /** This is the RDF rule for creating an IRI from a QName. */ + private String qNameToIRI(String namespaceURI, String localName) { + String iriStr = namespaceURI + localName; return iriStr; } @@ -1294,26 +1302,31 @@ class ParserRDFXML_SAX private Node iriResolve(String uriStr, Position position) { Objects.requireNonNull(uriStr); Objects.requireNonNull(position); - int line = position.line(); - int col = position.column(); - String resolved = resolveIRI(uriStr, position); - return createURI(resolved, position); - } - - private String resolveIRI(String uriStr, Position position) { if ( uriStr.startsWith("_:") ) // <_:label> syntax. Handled by the FactoryRDF via the parser profile. - return uriStr; - return resolveIRIx(uriStr, position).str(); + return createURI(uriStr, position); + String resolved = resolveIRIx(uriStr, position).str(); + return createURI(resolved, position); } private IRIx resolveIRIx(String uriStr, Position position) { try { - if ( currentBase != null ) - return currentBase.resolve(uriStr); - IRIx iri = IRIx.create(uriStr); + + IRIx iri = resolveIRIxAny(uriStr, position); if ( iri.isRelative() ) - throw RDFXMLparseError("Base URI is null, but there are relative URIs to resolve" , position); + throw RDFXMLparseError("Relative URI encountered: <"+iri.str()+">" , position); + return iri; + } catch (IRIException ex) { + throw RDFXMLparseError(ex.getMessage(), position); + } + } + + /** String to IRIx, no opinion */ + private IRIx resolveIRIxAny(String uriStr, Position position) { + try { + IRIx iri = ( currentBase != null ) + ? currentBase.resolve(uriStr) + : IRIx.create(uriStr); return iri; } catch (IRIException ex) { throw RDFXMLparseError(ex.getMessage(), position); diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ReaderRDFXML_SAX.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ReaderRDFXML_SAX.java index c5597f0423..b7ff1a3624 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ReaderRDFXML_SAX.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ReaderRDFXML_SAX.java @@ -66,7 +66,7 @@ public class ReaderRDFXML_SAX implements ReaderRIOT } private void parse(InputSource inputSource, String xmlBase, ContentType ct, StreamRDF destination, Context context) { - ParserRDFXML_SAX sax2rdf = new ParserRDFXML_SAX(xmlBase, parserProfile, destination, RIOT.getContext().copy()); + ParserRRX_SAX sax2rdf = new ParserRRX_SAX(xmlBase, parserProfile, destination, RIOT.getContext().copy()); // Configured to avoid XXE XMLReader xmlReader; try { diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRRX_StAX_EV.java similarity index 96% rename from jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java rename to jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRRX_StAX_EV.java index 7ee9b26a88..fd354eb49b 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRRX_StAX_EV.java @@ -48,10 +48,9 @@ import org.apache.jena.sparql.graph.NodeConst; import org.apache.jena.sparql.util.Context; import org.apache.jena.util.XML11Char; import org.apache.jena.vocabulary.RDF; -import org.apache.jena.vocabulary.RDF.Nodes; /** StAX events */ -class ParserRDFXML_StAX_EV { +class ParserRRX_StAX_EV { private static boolean EVENTS = false; private final IndentedWriter trace; @@ -147,7 +146,7 @@ class ParserRDFXML_StAX_EV { /** Integer holder for rdf:li */ private static class Counter { int value = 1; } - ParserRDFXML_StAX_EV(XMLEventReader reader, String xmlBase, ParserProfile parserProfile, StreamRDF destination, Context context) { + ParserRRX_StAX_EV(XMLEventReader reader, String xmlBase, ParserProfile parserProfile, StreamRDF destination, Context context) { // Debug IndentedWriter out = IndentedWriter.stdout.clone(); out.setFlushOnNewline(true); @@ -165,7 +164,7 @@ class ParserRDFXML_StAX_EV { this.initialXmlLang = ""; if ( xmlBase != null ) { this.currentBase = IRIx.create(xmlBase); - parserProfile.setBaseIRI(currentBase.str()); + //parserProfile.setBaseIRI(currentBase.str()); } else { this.currentBase = null; } @@ -449,7 +448,7 @@ class ParserRDFXML_StAX_EV { RDFXMLparseWarning(str(qName)+" is not a recognized RDF term for a type", location); } - Node object = qNameToURI(qName, location); + Node object = qNameToIRI(qName, location); emit(subject, NodeConst.nodeRDFType, object, location); } @@ -490,7 +489,7 @@ class ParserRDFXML_StAX_EV { emit(subject, RDF.Nodes.type, type, location); continue; } - Node property = qNameToURI(attribute.getName(), location); + Node property = qNameToIRI(attribute.getName(), location); Node object = literal(attribute.getValue(), currentLang, location); emit(subject, property, object, location); } @@ -513,9 +512,8 @@ class ParserRDFXML_StAX_EV { String namespace = qName.getNamespaceURI(); String localName = qName.getLocalPart(); if ( namespace == null || namespace.isEmpty() ) { - // SAX passes xmlns as attributes with namespace and local name of "". The qname is "xmlns:"/"xmlns" - // StAX, does not pass namespaces. - + // Note about XML: The empty string namespace does not apply to XML attributes, + // only XML elements. ":attr" is not legal XML. //RDFXMLparseError("XML attribute '"+localName+"' used for RDF property attribute (no namespace)", event); if ( outputWarnings ) RDFXMLparseWarning("XML attribute '"+localName+"' used for RDF property attribute - ignored", event); @@ -591,7 +589,7 @@ class ParserRDFXML_StAX_EV { if ( qNameMatches(rdfContainerItem, startElt.getName()) ) property = iriDirect(rdfNS+"_"+Integer.toString(listElementCounter.value++), location); else - property = qNameToURI(startElt.getName(), location); + property = qNameToIRI(startElt.getName(), location); Node reify = reifyStatement(startElt); Emitter emitter = (reify==null) ? this::emit : (s,p,o,loc)->emitReify(reify, s, p, o, loc); @@ -1130,13 +1128,15 @@ class ParserRDFXML_StAX_EV { parserProfile.setBaseIRI(n.getURI()); } - private Node qNameToURI(QName qName, Location location) { - String uriStr = qNameToURI(qName); + private Node qNameToIRI(QName qName, Location location) { + if ( StringUtils.isBlank(qName.getNamespaceURI()) ) + RDFXMLparseWarning("Unqualified typed nodes are not allowed: <"+qName.getLocalPart()+">", location); + String uriStr = qNameToIRI(qName); return iriDirect(uriStr, location); } - /** RDF rule */ - private String qNameToURI(QName qName) { + /** This is the RDF rule for creating an IRI from a QName. */ + private String qNameToIRI(QName qName) { return qName.getNamespaceURI()+qName.getLocalPart(); } @@ -1303,8 +1303,8 @@ class ParserRDFXML_StAX_EV { } private boolean processBaseAndLang(StartElement startElt) { - String xmlBase = attribute(startElt, xmlQNameBase); - String xmlLang = attribute(startElt, xmlQNameLang); + IRIx xmlBase = xmlBase(startElt); + String xmlLang = xmlLang(startElt); if ( ReaderRDFXML_StAX_EV.TRACE ) { if ( xmlBase != null ) trace.printf("+ BASE <%s>\n", xmlBase); @@ -1314,11 +1314,8 @@ class ParserRDFXML_StAX_EV { boolean hasFrame = (xmlBase != null || xmlLang != null); if ( hasFrame ) { pushFrame(currentBase, currentLang); - if ( xmlBase != null ) { - currentBase = (currentBase != null) - ? currentBase.resolve(xmlBase) - : IRIx.create(xmlBase); - } + if ( xmlBase != null ) + currentBase = xmlBase; if ( xmlLang != null ) currentLang = xmlLang; } @@ -1356,10 +1353,10 @@ class ParserRDFXML_StAX_EV { private void emitReify(Node reify, Node subject, Node property, Node object, Location location) { emit(subject, property, object, location); if ( reify != null ) { - emit(reify, NodeConst.nodeRDFType, Nodes.Statement, location); - emit(reify, Nodes.subject, subject, location); - emit(reify, Nodes.predicate, property, location); - emit(reify, Nodes.object, object, location); + emit(reify, NodeConst.nodeRDFType, RDF.Nodes.Statement, location); + emit(reify, RDF.Nodes.subject, subject, location); + emit(reify, RDF.Nodes.predicate, property, location); + emit(reify, RDF.Nodes.object, object, location); } } @@ -1371,6 +1368,27 @@ class ParserRDFXML_StAX_EV { destination.prefix(prefix, iriStr); } + /** + * Generate a new base IRIx. + * If this is relative, issue a warning. + * It is an error to use it and the error is generated + * sin {@link #resolveIRIx}. + */ + private IRIx xmlBase(StartElement startElt) { + String baseStr = attribute(startElt, xmlQNameBase); + if ( baseStr == null ) + return null; + Location location = startElt.getLocation(); + IRIx irix = resolveIRIxNoWarning(baseStr, location); + if ( irix.isRelative() ) + RDFXMLparseWarning("Relative URI for base: <"+baseStr+">", location); + return irix; + } + + private String xmlLang(StartElement startElt) { + return attribute(startElt, xmlQNameLang); + } + // ---- RDF Terms (Nodes) private Node iriFromID(String idStr, Location location) { @@ -1412,11 +1430,20 @@ class ParserRDFXML_StAX_EV { private IRIx resolveIRIx(String uriStr, Location location) { // This does not use the parser profile because the base stacks and unstacks in RDF/XML. try { - if ( currentBase != null ) - return currentBase.resolve(uriStr); - IRIx iri = IRIx.create(uriStr); + IRIx iri = resolveIRIxNoWarning(uriStr, location); if ( iri.isRelative() ) - throw RDFXMLparseError("Base URI is null, but there are relative URIs to resolve" , location); + throw RDFXMLparseError("Relative URI encountered: <"+iri.str()+">" , location); + return iri; + } catch (IRIException ex) { + throw RDFXMLparseError(ex.getMessage(), location); + } + } + + private IRIx resolveIRIxNoWarning(String uriStr, Location location) { + try { + IRIx iri = ( currentBase != null ) + ? currentBase.resolve(uriStr) + : IRIx.create(uriStr); return iri; } catch (IRIException ex) { throw RDFXMLparseError(ex.getMessage(), location); diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ReaderRDFXML_StAX_EV.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ReaderRDFXML_StAX_EV.java index 0741ac28de..592545efde 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ReaderRDFXML_StAX_EV.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ReaderRDFXML_StAX_EV.java @@ -78,7 +78,7 @@ public class ReaderRDFXML_StAX_EV implements ReaderRIOT } private void parse(XMLEventReader xmlEventReader, String xmlBase, ContentType ct, StreamRDF destination, Context context) { - ParserRDFXML_StAX_EV parser = new ParserRDFXML_StAX_EV(xmlEventReader, xmlBase, parserProfile, destination, context); + ParserRRX_StAX_EV parser = new ParserRRX_StAX_EV(xmlEventReader, xmlBase, parserProfile, destination, context); destination.start(); try { parser.parse(); diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRRX_StAX_SR.java similarity index 96% rename from jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java rename to jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRRX_StAX_SR.java index 6c9487fc3c..9ed1672a2d 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRRX_StAX_SR.java @@ -29,8 +29,6 @@ import javax.xml.namespace.QName; import javax.xml.stream.Location; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; -import javax.xml.stream.events.Attribute; -import javax.xml.stream.events.StartElement; import javax.xml.stream.events.XMLEvent; import org.apache.commons.lang3.StringUtils; @@ -55,7 +53,7 @@ import org.apache.jena.vocabulary.RDF; import org.apache.jena.vocabulary.RDF.Nodes; /* StAX - stream reader */ -class ParserRDFXML_StAX_SR { +class ParserRRX_StAX_SR { private static boolean EVENTS = false; private final IndentedWriter trace; @@ -148,7 +146,7 @@ class ParserRDFXML_StAX_SR { /** Integer holder for rdf:li */ private static class Counter { int value = 1; } - ParserRDFXML_StAX_SR(XMLStreamReader reader, String xmlBase, ParserProfile parserProfile, StreamRDF destination, Context context) { + ParserRRX_StAX_SR(XMLStreamReader reader, String xmlBase, ParserProfile parserProfile, StreamRDF destination, Context context) { // Debug IndentedWriter out = IndentedWriter.stdout.clone(); out.setFlushOnNewline(true); @@ -430,7 +428,7 @@ class ParserRDFXML_StAX_SR { RDFXMLparseWarning(str(qName)+" is not a recognized RDF term for a type"); } - Node object = qNameToURI(qName, location); + Node object = qNameToIRI(qName, location); emit(subject, NodeConst.nodeRDFType, object, location); } @@ -464,7 +462,7 @@ class ParserRDFXML_StAX_SR { emit(subject, RDF.Nodes.type, type, location); continue; } - Node property = qNameToURI(qName, location); + Node property = qNameToIRI(qName, location); String lexicalForm = xmlSource.getAttributeValue(i); Node object = literal(lexicalForm, currentLang, location); emit(subject, property, object, location); @@ -490,8 +488,8 @@ class ParserRDFXML_StAX_SR { private boolean checkPropertyAttribute(QName qName, boolean outputWarnings) { String namespace = qName.getNamespaceURI(); if ( namespace == null || namespace.isEmpty() ) { - // SAX passes xmlns as attributes with namespace and local name of "". The qname is "xmlns:"/"xmlns" - // StAX, does not pass namespaces. + // Note about XML: The empty string namespace does not apply to XML attributes, + // only XML elements. ":attr" is not legal XML. //RDFXMLparseError("XML attribute '"+qName.getLocalPart()+"' used for RDF property attribute (no namespace)", event); if ( outputWarnings ) RDFXMLparseWarning("XML attribute '"+qName.getLocalPart()+"' used for RDF property attribute - ignored"); @@ -566,7 +564,7 @@ class ParserRDFXML_StAX_SR { if ( qNameMatches(rdfContainerItem, qName) ) property = iriDirect(rdfNS+"_"+Integer.toString(listElementCounter.value++), location()); else - property = qNameToURI(qName, location); + property = qNameToIRI(qName, location); Node reify = reifyStatement(location); Emitter emitter = (reify==null) ? this::emit : (s,p,o,loc)->emitReify(reify, s, p, o, loc); @@ -1022,7 +1020,7 @@ class ParserRDFXML_StAX_SR { // subject property INNER emitter.emit(subject, property, subjectInner, location()); - // Process as a node element, having decied the subject. + // Process as a node element, having decided the subject. nodeElement(subjectInner); // End property tag. @@ -1078,13 +1076,6 @@ class ParserRDFXML_StAX_SR { return blankNode(location); } - private String attribute(StartElement startElt, QName attrName) { - Attribute attr = startElt.getAttributeByName(attrName); - if ( attr == null ) - return null; - return attr.getValue(); - } - // ---- Nodes private void setBase(String uriStr, Location location) { @@ -1092,13 +1083,17 @@ class ParserRDFXML_StAX_SR { parserProfile.setBaseIRI(n.getURI()); } - private Node qNameToURI(QName qName, Location location) { - String uriStr = strQNameToURI(qName); + /** This is the RDF rule for creating an IRI from a QName. */ + private Node qNameToIRI(QName qName, Location location) { + if ( StringUtils.isBlank(qName.getNamespaceURI()) ) + RDFXMLparseWarning("Unqualified typed nodes are not allowed: <"+qName.getLocalPart()+">", location); + + String uriStr = strQNameToIRI(qName); return iriDirect(uriStr, location); } - /** RDF rule */ - private String strQNameToURI(QName qName) { + /** This is the RDF rule for creating an IRI from a QName. */ + private String strQNameToIRI(QName qName) { return qName.getNamespaceURI()+qName.getLocalPart(); } @@ -1264,8 +1259,9 @@ class ParserRDFXML_StAX_SR { } private boolean processBaseAndLang() { - String xmlBase = attribute(xmlQNameBase); - String xmlLang = attribute(xmlQNameLang); + IRIx xmlBase = xmlBase(); + String xmlLang = xmlLang(); + if ( ReaderRDFXML_StAX_SR.TRACE ) { if ( xmlBase != null ) trace.printf("+ BASE <%s>\n", xmlBase); @@ -1275,11 +1271,8 @@ class ParserRDFXML_StAX_SR { boolean hasFrame = (xmlBase != null || xmlLang != null); if ( hasFrame ) { pushFrame(currentBase, currentLang); - if ( xmlBase != null ) { - currentBase = (currentBase != null) - ? currentBase.resolve(xmlBase) - : IRIx.create(xmlBase); - } + if ( xmlBase != null ) + currentBase = xmlBase; if ( xmlLang != null ) currentLang = xmlLang; } @@ -1351,6 +1344,27 @@ class ParserRDFXML_StAX_SR { destination.prefix(prefix, iriStr); } + /** + * Generate a new base IRIx. + * If this is relative, issue a warning. + * It is an error to use it and the error is generated + * sin {@link #resolveIRIx}. + */ + private IRIx xmlBase() { + String baseStr = attribute(xmlQNameBase); + if ( baseStr == null ) + return null; + Location location = location(); + IRIx irix = resolveIRIxAny(baseStr, location); + if ( irix.isRelative() ) + RDFXMLparseWarning("Relative URI for base: <"+baseStr+">", location); + return irix; + } + + private String xmlLang() { + return attribute(xmlQNameLang); + } + // ---- RDF Terms (Nodes) private Node iriFromID(String idStr, Location location) { @@ -1371,13 +1385,15 @@ class ParserRDFXML_StAX_SR { return parserProfile.createURI(uriStr, line, col); } - /** Create a URI. The IRI is resolved by this operation. */ + /** + * Create a URI. The IRI is resolved by this operation. + */ private Node iriResolve(String uriStr, Location location) { Objects.requireNonNull(uriStr); Objects.requireNonNull(location); + String resolved = resolveIRI(uriStr, location); int line = location.getLineNumber(); int col = location.getColumnNumber(); - String resolved = resolveIRI(uriStr, location); return parserProfile.createURI(resolved, line, col); } @@ -1392,11 +1408,20 @@ class ParserRDFXML_StAX_SR { private IRIx resolveIRIx(String uriStr, Location location) { // This does not use the parser profile because the base stacks and unstacks in RDF/XML. try { - if ( currentBase != null ) - return currentBase.resolve(uriStr); - IRIx iri = IRIx.create(uriStr); + IRIx iri = resolveIRIxAny(uriStr, location); if ( iri.isRelative() ) - throw RDFXMLparseError("Base URI is null, but there are relative URIs to resolve" , location); + throw RDFXMLparseError("Relative URI encountered: <"+iri.str()+">" , location); + return iri; + } catch (IRIException ex) { + throw RDFXMLparseError(ex.getMessage(), location); + } + } + + private IRIx resolveIRIxAny(String uriStr, Location location) { + try { + IRIx iri = ( currentBase != null ) + ? currentBase.resolve(uriStr) + : IRIx.create(uriStr); return iri; } catch (IRIException ex) { throw RDFXMLparseError(ex.getMessage(), location); diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ReaderRDFXML_StAX_SR.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ReaderRDFXML_StAX_SR.java index 272b2cd6a4..ba12f54d37 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ReaderRDFXML_StAX_SR.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ReaderRDFXML_StAX_SR.java @@ -78,7 +78,7 @@ public class ReaderRDFXML_StAX_SR implements ReaderRIOT } private void parse(XMLStreamReader xmlStreamReader, String xmlBase, ContentType ct, StreamRDF destination, Context context) { - ParserRDFXML_StAX_SR parser = new ParserRDFXML_StAX_SR(xmlStreamReader, xmlBase, parserProfile, destination, context); + ParserRRX_StAX_SR parser = new ParserRRX_StAX_SR(xmlStreamReader, xmlBase, parserProfile, destination, context); destination.start(); try { parser.parse(); diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java index f9133a433e..64712ff262 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java @@ -71,6 +71,8 @@ public class RunTestRDFXML { } } + private static String ParserBase = "http://external/base#"; + /** * Manifest-like in that the test files are run in a specific order. * The local files cover all the features of RDF/XML parsing @@ -463,7 +465,7 @@ public class RunTestRDFXML { Graph graph = GraphFactory.createDefaultGraph(); StreamRDF dest = StreamRDFLib.graph(graph); try ( InputStream in = IO.openFile(filename) ) { - reader.read(in, "http://external/base", WebContent.ctRDFXML, dest, RIOT.getContext().copy()); + reader.read(in, ParserBase, WebContent.ctRDFXML, dest, RIOT.getContext().copy()); } catch (IOException ex) { throw IOX.exception(ex); } diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java index e88689c058..cd4e4a8c24 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java @@ -25,15 +25,10 @@ import java.util.List; import org.apache.jena.atlas.io.IO; import org.apache.jena.atlas.io.IOX; -import org.apache.jena.graph.Graph; import org.apache.jena.riot.*; import org.apache.jena.riot.lang.rdfxml.RRX; import org.apache.jena.riot.lang.rdfxml.rrx.RunTestRDFXML.ErrorHandlerCollector; -import org.apache.jena.riot.system.ParserProfile; -import org.apache.jena.riot.system.RiotLib; -import org.apache.jena.riot.system.StreamRDF; -import org.apache.jena.riot.system.StreamRDFLib; -import org.apache.jena.sparql.graph.GraphFactory; +import org.apache.jena.riot.system.*; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -72,15 +67,15 @@ public class TestRRX { // Test2 for more than one object in RDF/XML striping. @Test public void error_multiple_objects_lex_node() { - errorTest("multiple_objects_lex_node.rdf"); + checkForError("multiple_objects_lex_node.rdf"); } @Test public void error_multiple_objects_node_lex() { - errorTest("multiple_objects_node_lex.rdf"); + checkForError("multiple_objects_node_lex.rdf"); } @Test public void error_multiple_objects_node_node() { - errorTest("multiple_objects_node_node.rdf"); + checkForError("multiple_objects_node_node.rdf"); } // Check that the "one object" parse state does not impact deeper structures. @@ -91,7 +86,7 @@ public class TestRRX { // rdf:parserType= @Test public void error_parseType_unknown() { // This is only a warning in ARP. - errorTest("parseType-unknown.rdf", false); + checkForError("parseType-unknown.rdf", false); } @Test public void warn_parseType_extension_1() { @@ -102,28 +97,39 @@ public class TestRRX { } // misc - @Test public void no_base_not_requiredBase01() { + @Test public void base_not_needed() { // Call with no base; no base needed. - noBase("file-no-base.rdf"); + noBase("base-none.rdf"); } @Test(expected=RiotException.class) - public void noBase_but_needed() { + public void bare_needed() { // Call with no base; a base is needed => exception. - noBase("file-external-base.rdf"); + noBase("base-external-needed.rdf"); + } + + @Test(expected=RiotException.class) + public void base_inner_1() { + // Call with no base; xml:base is relative in the data. + noBase("base-inner.rdf"); + } + + public void base_inner_2() { + // Called external base + goodTest("base-inner.rdf"); } // CIM @Test public void cim_statements01() { // parseType="Statements" - // because ARP behaved that way. - //errorTest("error02.rdf"); + // This is an extension to support CIM XML data. + // ARP behaved this way. // Warning issued. warningTest("cim_statements01.rdf", 2); } @Test public void rdfResourceBad() { - errorTest("rdf-resource-node.rdf"); + checkForError("rdf-resource-node.rdf"); } /** Parse with no base set by the parser */ @@ -133,10 +139,13 @@ public class TestRRX { ErrorHandlerCollector errorHandler = new ErrorHandlerCollector(); ParserProfile parserProfile = RiotLib.createParserProfile(RiotLib.factoryRDF(), errorHandler, true); ReaderRIOT reader = factory.create(lang, parserProfile); - Graph graph = GraphFactory.createDefaultGraph(); - StreamRDF dest = StreamRDFLib.graph(graph); + StreamRDF dest = false + ? StreamRDFWriter.getWriterStream(System.out, RDFFormat.TURTLE_FLAT) + : StreamRDFWriter.getWriterStream(System.out, RDFFormat.RDFNULL); try ( InputStream in = IO.openFile(fn) ) { reader.read(in, null/* No base*/, WebContent.ctRDFXML, dest, RIOT.getContext().copy()); + } catch (RiotException ex) { + throw ex; } catch (IOException ex) { throw IOX.exception(ex); } @@ -156,13 +165,18 @@ public class TestRRX { RunTestRDFXML.runTestCompareARP(fn, factory, label, fn); } - // Run test, expecting an error. - // This is checked by error handler. - private void errorTest(String filename) { - errorTest(filename, true); + /** + * Run test, expecting an error. + */ + private void checkForError(String filename) { + checkForError(filename, true); } - private void errorTest(String filename, boolean compare) { + /** + * Run test, expecting an error. If the second argument is true, also Compare to + * make sure it is the same as ARP. + */ + private void checkForError(String filename, boolean compare) { ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); String fn = DIR+filename; RunTestRDFXML.runTestExpectFailure(filename, factory, label, fn); diff --git a/jena-arq/testing/RIOT/rrx-files/file-external-base.rdf b/jena-arq/testing/RIOT/rrx-files/base-external-needed.rdf similarity index 100% copy from jena-arq/testing/RIOT/rrx-files/file-external-base.rdf copy to jena-arq/testing/RIOT/rrx-files/base-external-needed.rdf diff --git a/jena-arq/testing/RIOT/rrx-files/file-external-base.rdf b/jena-arq/testing/RIOT/rrx-files/base-inner.rdf similarity index 74% rename from jena-arq/testing/RIOT/rrx-files/file-external-base.rdf rename to jena-arq/testing/RIOT/rrx-files/base-inner.rdf index e13b8cbd50..682248a36f 100644 --- a/jena-arq/testing/RIOT/rrx-files/file-external-base.rdf +++ b/jena-arq/testing/RIOT/rrx-files/base-inner.rdf @@ -6,8 +6,7 @@ xmlns="http://local/" xmlns:ex="http://example/" > - <rdf:Description rdf:about="relative"> - <ex:property1>ABC</ex:property1> + <rdf:Description rdf:about="inner" xml:base="relative"> + <ex:property>ABC</ex:property> </rdf:Description> - </rdf:RDF> diff --git a/jena-arq/testing/RIOT/rrx-files/file-no-base.rdf b/jena-arq/testing/RIOT/rrx-files/base-none.rdf similarity index 100% rename from jena-arq/testing/RIOT/rrx-files/file-no-base.rdf rename to jena-arq/testing/RIOT/rrx-files/base-none.rdf diff --git a/jena-core/src/main/java/org/apache/jena/irix/IRIx.java b/jena-core/src/main/java/org/apache/jena/irix/IRIx.java index 7b2c5377f1..36ae005e38 100644 --- a/jena-core/src/main/java/org/apache/jena/irix/IRIx.java +++ b/jena-core/src/main/java/org/apache/jena/irix/IRIx.java @@ -185,7 +185,6 @@ public abstract class IRIx { */ public abstract IRIx normalize(); - /** * Return (if possible), an IRI that is relative to the base argument. * If this IRI is a relative path, this is returned unchanged.
