This is an automated email from the ASF dual-hosted git repository. andy pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/jena.git
commit e36f93c9d04866f8892c0b563bba0809399e76e1 Author: Andy Seaborne <[email protected]> AuthorDate: Sat Aug 10 21:41:34 2024 +0100 GH-2629: Check for unqualified elements --- .../jena/riot/lang/rdfxml/rrx/ParserRRX_SAX.java | 17 +++-- .../lang/rdfxml/rrx_stax_ev/ParserRRX_StAX_EV.java | 12 ++- .../lang/rdfxml/rrx_stax_sr/ParserRRX_StAX_SR.java | 19 ++--- .../jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java | 2 +- .../apache/jena/riot/lang/rdfxml/rrx/TestRRX.java | 88 ++++++++++++++++++++-- .../{rdf-resource-node.rdf => bad-rdf-id-node.rdf} | 4 +- ...resource-node.rdf => bad-rdf-resource-node.rdf} | 0 .../RIOT/rrx-files/bad-unqualified-attribute1.rdf | 6 ++ .../RIOT/rrx-files/bad-unqualified-attribute2.rdf | 9 +++ .../RIOT/rrx-files/bad-unqualified-class.rdf | 6 ++ .../RIOT/rrx-files/bad-unqualified-property.rdf | 8 ++ 11 files changed, 135 insertions(+), 36 deletions(-) diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRRX_SAX.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRRX_SAX.java index aca5212fcb..9f34f9cd2e 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRRX_SAX.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRRX_SAX.java @@ -696,7 +696,7 @@ class ParserRRX_SAX if ( isNotRecognizedRDFtype(namespaceURI, localName) ) RDFXMLparseWarning(qName+" is not a recognized RDF term for a type", position); } - Node object = qNameToIRI(namespaceURI, localName, position); + Node object = qNameToIRI(namespaceURI, localName, position, "typed node element"); emit(currentSubject, RDF.Nodes.type, object, position); } @@ -724,8 +724,10 @@ class ParserRRX_SAX int i = containerPropertyCounter.value++; String p = rdfNS+"_"+i; currentProperty = iri(p, position); - } else - currentProperty = qNameToIRI(namespaceURI, localName, position); + } else { + // The empty string namespace does not apply to XML attributes. + currentProperty = qNameToIRI(namespaceURI, localName, position, "property element"); + } if ( ReaderRDFXML_SAX.TRACE ) trace.printf("Property = %s\n", str(currentProperty)); @@ -1026,7 +1028,7 @@ class ParserRRX_SAX return; } } - Node property = qNameToIRI(namespaceURI, localName, position); + Node property = qNameToIRI(namespaceURI, localName, position, "property attribute"); String lex = value; Node object = literal(lex, currentLang, position); emit(subject, property, object, position); @@ -1269,10 +1271,9 @@ class ParserRRX_SAX // ---- Creating terms. - private Node qNameToIRI(String namespaceURI, String localName, Position position) { - if ( StringUtils.isBlank(namespaceURI) ) { - RDFXMLparseWarning("Unqualified typed nodes are not allowed: <"+localName+">", position); - } + private Node qNameToIRI(String namespaceURI, String localName, Position position, String usage) { + if ( StringUtils.isBlank(namespaceURI) ) + throw RDFXMLparseError("Unqualified "+usage+" not allowed: <"+localName+">", position); String uriStr = qNameToIRI(namespaceURI, localName); return iri(uriStr, position); } diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRRX_StAX_EV.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRRX_StAX_EV.java index fd354eb49b..84e97ead58 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRRX_StAX_EV.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRRX_StAX_EV.java @@ -448,7 +448,7 @@ class ParserRRX_StAX_EV { RDFXMLparseWarning(str(qName)+" is not a recognized RDF term for a type", location); } - Node object = qNameToIRI(qName, location); + Node object = qNameToIRI(qName, location, "typed node element"); emit(subject, NodeConst.nodeRDFType, object, location); } @@ -489,7 +489,7 @@ class ParserRRX_StAX_EV { emit(subject, RDF.Nodes.type, type, location); continue; } - Node property = qNameToIRI(attribute.getName(), location); + Node property = qNameToIRI(attribute.getName(), location, "property attribute"); Node object = literal(attribute.getValue(), currentLang, location); emit(subject, property, object, location); } @@ -512,8 +512,6 @@ class ParserRRX_StAX_EV { String namespace = qName.getNamespaceURI(); String localName = qName.getLocalPart(); if ( namespace == null || namespace.isEmpty() ) { - // Note about XML: The empty string namespace does not apply to XML attributes, - // only XML elements. ":attr" is not legal XML. //RDFXMLparseError("XML attribute '"+localName+"' used for RDF property attribute (no namespace)", event); if ( outputWarnings ) RDFXMLparseWarning("XML attribute '"+localName+"' used for RDF property attribute - ignored", event); @@ -589,7 +587,7 @@ class ParserRRX_StAX_EV { if ( qNameMatches(rdfContainerItem, startElt.getName()) ) property = iriDirect(rdfNS+"_"+Integer.toString(listElementCounter.value++), location); else - property = qNameToIRI(startElt.getName(), location); + property = qNameToIRI(startElt.getName(), location, "property element"); Node reify = reifyStatement(startElt); Emitter emitter = (reify==null) ? this::emit : (s,p,o,loc)->emitReify(reify, s, p, o, loc); @@ -1128,9 +1126,9 @@ class ParserRRX_StAX_EV { parserProfile.setBaseIRI(n.getURI()); } - private Node qNameToIRI(QName qName, Location location) { + private Node qNameToIRI(QName qName, Location location, String usage) { if ( StringUtils.isBlank(qName.getNamespaceURI()) ) - RDFXMLparseWarning("Unqualified typed nodes are not allowed: <"+qName.getLocalPart()+">", location); + throw RDFXMLparseError("Unqualified "+usage+" not allowed: <"+qName.getLocalPart()+">", location); String uriStr = qNameToIRI(qName); return iriDirect(uriStr, location); } diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRRX_StAX_SR.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRRX_StAX_SR.java index 9ed1672a2d..a8eaa8402e 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRRX_StAX_SR.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRRX_StAX_SR.java @@ -428,7 +428,7 @@ class ParserRRX_StAX_SR { RDFXMLparseWarning(str(qName)+" is not a recognized RDF term for a type"); } - Node object = qNameToIRI(qName, location); + Node object = qNameToIRI(qName, location, "type node element"); emit(subject, NodeConst.nodeRDFType, object, location); } @@ -462,7 +462,7 @@ class ParserRRX_StAX_SR { emit(subject, RDF.Nodes.type, type, location); continue; } - Node property = qNameToIRI(qName, location); + Node property = qNameToIRI(qName, location, "property attribute"); String lexicalForm = xmlSource.getAttributeValue(i); Node object = literal(lexicalForm, currentLang, location); emit(subject, property, object, location); @@ -486,15 +486,13 @@ class ParserRRX_StAX_SR { * @param qName */ private boolean checkPropertyAttribute(QName qName, boolean outputWarnings) { - String namespace = qName.getNamespaceURI(); - if ( namespace == null || namespace.isEmpty() ) { - // Note about XML: The empty string namespace does not apply to XML attributes, - // only XML elements. ":attr" is not legal XML. - //RDFXMLparseError("XML attribute '"+qName.getLocalPart()+"' used for RDF property attribute (no namespace)", event); + if ( StringUtils.isBlank(qName.getNamespaceURI()) ) { + //RDFXMLparseError("XML attribute '"+localName+"' used for RDF property attribute (no namespace)", event); if ( outputWarnings ) RDFXMLparseWarning("XML attribute '"+qName.getLocalPart()+"' used for RDF property attribute - ignored"); return false; } + if ( isSyntaxAttribute(qName) ) return false; @@ -564,7 +562,7 @@ class ParserRRX_StAX_SR { if ( qNameMatches(rdfContainerItem, qName) ) property = iriDirect(rdfNS+"_"+Integer.toString(listElementCounter.value++), location()); else - property = qNameToIRI(qName, location); + property = qNameToIRI(qName, location, "property element"); Node reify = reifyStatement(location); Emitter emitter = (reify==null) ? this::emit : (s,p,o,loc)->emitReify(reify, s, p, o, loc); @@ -1084,10 +1082,9 @@ class ParserRRX_StAX_SR { } /** This is the RDF rule for creating an IRI from a QName. */ - private Node qNameToIRI(QName qName, Location location) { + private Node qNameToIRI(QName qName, Location location, String usage) { if ( StringUtils.isBlank(qName.getNamespaceURI()) ) - RDFXMLparseWarning("Unqualified typed nodes are not allowed: <"+qName.getLocalPart()+">", location); - + throw RDFXMLparseError("Unqualified "+usage+" not allowed: <"+qName.getLocalPart()+">", location); String uriStr = strQNameToIRI(qName); return iriDirect(uriStr, location); } diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java index 64712ff262..2f12d64e41 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java @@ -329,7 +329,7 @@ public class RunTestRDFXML { ErrorHandlerCollector actualErrorHandler = new ErrorHandlerCollector(); assertThrows(RiotException.class, ()->{ parseFile(testSubjectFactory, actualErrorHandler, filename); - output.printf("## Expected RiotExpection : %-4s : %s : %s", subjectLabel, testLabel, filename); + //output.printf("## Expected RiotExpection : %-4s : %s : %s\n", subjectLabel, testLabel, filename); }); checkErrorHandler(testLabel, actualErrorHandler, -1, 1, 0); } diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java index cd4e4a8c24..44235fb102 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java @@ -20,15 +20,22 @@ package org.apache.jena.riot.lang.rdfxml.rrx; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import org.apache.commons.collections4.SetUtils; import org.apache.jena.atlas.io.IO; import org.apache.jena.atlas.io.IOX; import org.apache.jena.riot.*; import org.apache.jena.riot.lang.rdfxml.RRX; import org.apache.jena.riot.lang.rdfxml.rrx.RunTestRDFXML.ErrorHandlerCollector; import org.apache.jena.riot.system.*; +import org.junit.AfterClass; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; @@ -65,17 +72,41 @@ public class TestRRX { this.lang = lang; } - // Test2 for more than one object in RDF/XML striping. + // XXX Track files! + + private static Set<String> processedFiles = new HashSet<>(); + private void trackFilename(String filename) { + processedFiles.add(filename); + } + + /** Check all files in the were touched */ + + @AfterClass public static void checkFiles() { + // This can break when running single tests. + Set<String> fsFiles = localTestFiles(); + if ( fsFiles.size() != processedFiles.size()) { + System.out.flush(); + System.err.flush(); + Set<String> missed = SetUtils.difference(fsFiles, processedFiles); + System.err.println("Missed files: "); + missed.forEach(x->System.err.printf(" %s\n",x)); + System.out.flush(); + System.err.flush(); + //Assert.fail(); + } + } + + // Test for more than one object in RDF/XML striping. @Test public void error_multiple_objects_lex_node() { - checkForError("multiple_objects_lex_node.rdf"); + checkForErrorCompare("multiple_objects_lex_node.rdf"); } @Test public void error_multiple_objects_node_lex() { - checkForError("multiple_objects_node_lex.rdf"); + checkForErrorCompare("multiple_objects_node_lex.rdf"); } @Test public void error_multiple_objects_node_node() { - checkForError("multiple_objects_node_node.rdf"); + checkForErrorCompare("multiple_objects_node_node.rdf"); } // Check that the "one object" parse state does not impact deeper structures. @@ -128,12 +159,26 @@ public class TestRRX { warningTest("cim_statements01.rdf", 2); } - @Test public void rdfResourceBad() { - checkForError("rdf-resource-node.rdf"); + @Test public void element_node_rdf_resource_bad() { + checkForErrorCompare("bad-rdf-resource-node.rdf"); + } + + @Test public void element_node_rdf_id_bad() { + checkForErrorCompare("bad-rdf-id-node.rdf"); + } + + @Test public void bad_unqualified_property() { + checkForError("bad-unqualified-property.rdf", false); + } + + @Test public void bad_unqualified_class() { + checkForError("bad-unqualified-class.rdf", false); } /** Parse with no base set by the parser */ private void noBase(String filename) { + trackFilename(filename); + ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); String fn = DIR+filename; ErrorHandlerCollector errorHandler = new ErrorHandlerCollector(); @@ -152,6 +197,7 @@ public class TestRRX { } private void goodTest(String filename) { + trackFilename(filename); ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); String fn = DIR+filename; RunTestRDFXML.runTestPlain(filename, factory, label, fn); @@ -159,6 +205,7 @@ public class TestRRX { } private void warningTest(String filename, int warnings) { + trackFilename(filename); ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); String fn = DIR+filename; RunTestRDFXML.runTestExpectWarning(filename, factory, label, warnings, fn); @@ -167,20 +214,47 @@ public class TestRRX { /** * Run test, expecting an error. + * Compare to running ARP. */ - private void checkForError(String filename) { + private void checkForErrorCompare(String filename) { checkForError(filename, true); } + /** + * Run test, expecting an error. + * Dop not compare to running ARP. + */ + private void checkForErroNoCompare(String filename) { + checkForError(filename, false); + } + + /** * Run test, expecting an error. If the second argument is true, also Compare to * make sure it is the same as ARP. */ private void checkForError(String filename, boolean compare) { + trackFilename(filename); ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang); String fn = DIR+filename; RunTestRDFXML.runTestExpectFailure(filename, factory, label, fn); if ( compare ) RunTestRDFXML.runTestCompareARP(fn, factory, label, fn); } + + static Set<String> localTestFiles() { + Path LOCAL_DIR = Path.of(DIR); + Set<String> found; + try { + found = Files + .list(LOCAL_DIR) + // Directory relative name. + .map(path->path.getFileName().toString()) + .filter(fn->fn.endsWith(".rdf")) + .collect(Collectors.toSet()); + } catch (IOException e) { + throw IOX.exception(e); + } + return found; + } } diff --git a/jena-arq/testing/RIOT/rrx-files/rdf-resource-node.rdf b/jena-arq/testing/RIOT/rrx-files/bad-rdf-id-node.rdf similarity index 70% copy from jena-arq/testing/RIOT/rrx-files/rdf-resource-node.rdf copy to jena-arq/testing/RIOT/rrx-files/bad-rdf-id-node.rdf index ee3bbab131..db19f039ff 100644 --- a/jena-arq/testing/RIOT/rrx-files/rdf-resource-node.rdf +++ b/jena-arq/testing/RIOT/rrx-files/bad-rdf-id-node.rdf @@ -7,8 +7,8 @@ xmlns="http://local/" xmlns:ex="http://example/" > - <!-- rdf:resource on rdf:Description --> - <rdf:Description rdf:about="http://example/s" rdf:resource="http://example/foo"> + <!-- rdf:ID on rdf:Description --> + <rdf:Description rdf:about="http://example/s" rdf:ID="foo"> <ex:property>text</ex:property> </rdf:Description> </rdf:RDF> \ No newline at end of file diff --git a/jena-arq/testing/RIOT/rrx-files/rdf-resource-node.rdf b/jena-arq/testing/RIOT/rrx-files/bad-rdf-resource-node.rdf similarity index 100% rename from jena-arq/testing/RIOT/rrx-files/rdf-resource-node.rdf rename to jena-arq/testing/RIOT/rrx-files/bad-rdf-resource-node.rdf diff --git a/jena-arq/testing/RIOT/rrx-files/bad-unqualified-attribute1.rdf b/jena-arq/testing/RIOT/rrx-files/bad-unqualified-attribute1.rdf new file mode 100644 index 0000000000..dd1db8e7c3 --- /dev/null +++ b/jena-arq/testing/RIOT/rrx-files/bad-unqualified-attribute1.rdf @@ -0,0 +1,6 @@ +<?xml version='1.0'?> +<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 --> + +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> + <rdf:Description name="NAME"/> +</rdf:RDF> \ No newline at end of file diff --git a/jena-arq/testing/RIOT/rrx-files/bad-unqualified-attribute2.rdf b/jena-arq/testing/RIOT/rrx-files/bad-unqualified-attribute2.rdf new file mode 100644 index 0000000000..21940b5d63 --- /dev/null +++ b/jena-arq/testing/RIOT/rrx-files/bad-unqualified-attribute2.rdf @@ -0,0 +1,9 @@ +<?xml version='1.0'?> +<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 --> + +<rdf:RDF + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns="http://example/"> + <!-- qnames do not apply to attributes unless ex:attr --> + <rdf:Description name="NAME"/> +</rdf:RDF> \ No newline at end of file diff --git a/jena-arq/testing/RIOT/rrx-files/bad-unqualified-class.rdf b/jena-arq/testing/RIOT/rrx-files/bad-unqualified-class.rdf new file mode 100644 index 0000000000..aa59cef963 --- /dev/null +++ b/jena-arq/testing/RIOT/rrx-files/bad-unqualified-class.rdf @@ -0,0 +1,6 @@ +<?xml version='1.0'?> +<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 --> + +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> + <Class/> +</rdf:RDF> diff --git a/jena-arq/testing/RIOT/rrx-files/bad-unqualified-property.rdf b/jena-arq/testing/RIOT/rrx-files/bad-unqualified-property.rdf new file mode 100644 index 0000000000..46a054784e --- /dev/null +++ b/jena-arq/testing/RIOT/rrx-files/bad-unqualified-property.rdf @@ -0,0 +1,8 @@ +<?xml version='1.0'?> +<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0 --> + +<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> + <rdf:Description rdf:about="http://example/s"> + <property></property> + </rdf:Description> +</rdf:RDF> \ No newline at end of file
