This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new 48cd7f762a GH-2620: Striping error fix for RRX
48cd7f762a is described below
commit 48cd7f762afe721c9ab14af3d70ea88818366ca1
Author: Andy Seaborne <[email protected]>
AuthorDate: Tue Aug 6 20:37:39 2024 +0100
GH-2620: Striping error fix for RRX
---
.../org/apache/jena/riot/lang/rdfxml/SysRRX.java | 1 -
.../riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java | 103 +++++++++++++--------
.../rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java | 36 +++++--
.../rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java | 31 +++++--
.../jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java | 6 +-
.../rdfxml/converted_legacy/TS_ConvertedARP1.java | 3 +
.../converted_legacy/TestARP1_W3C_Pending.java | 4 +-
.../rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java | 3 +-
...Xev.java => TestManifest_RDF11_RRX_StAXev.java} | 2 +-
.../lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java | 2 +-
.../jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java | 27 +++---
.../apache/jena/riot/lang/rdfxml/rrx/TestRRX.java | 61 ++++++++++--
jena-arq/testing/RIOT/rrx-files/README | 10 +-
.../{error01.rdf => multiple_objects_lex_node.rdf} | 15 ++-
.../{error01.rdf => multiple_objects_node_lex.rdf} | 15 ++-
...{error01.rdf => multiple_objects_node_node.rdf} | 15 ++-
jena-arq/testing/RIOT/rrx-files/nested_object.rdf | 17 ++++
.../{error01.rdf => parseType-unknown.rdf} | 0
jena-arq/testing/RIOT/rrx/README | 4 +
19 files changed, 245 insertions(+), 110 deletions(-)
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java
index 0fc87e881f..17a6173b26 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/SysRRX.java
@@ -38,5 +38,4 @@ public class SysRRX {
xmlInputFactory.setProperty(XMLInputFactory.IS_COALESCING,
Boolean.FALSE);
return xmlInputFactory;
}
-
}
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
index 27447c3524..e34ccc4329 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx/ParserRDFXML_SAX.java
@@ -28,6 +28,7 @@ import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import org.apache.jena.atlas.io.IndentedWriter;
+import org.apache.jena.atlas.lib.EscapeStr;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
@@ -235,13 +236,18 @@ class ParserRDFXML_SAX
ObjectLex,
// The node implied by rdf:parseType=Resource
- ObjectParserTypeResource,
+ ObjectParseTypeResource,
// The object is rdf:parseType=Literal. Collecting characters of a RDF
XML Literal
ObjectParseTypeLiteral,
// The object is rdf:parseType=Collection (RDF List)
- ObjectParseTypeCollection
+ ObjectParseTypeCollection,
+
+ // The object is a nested element.
+ // Unlike NodeElement, there is only one ObjectNode inside one
property.
+ // ObjectLex becomes ObjectNode if a startElement is found.
+ ObjectNode
}
/** Integer holder for rdf:li */
@@ -314,7 +320,7 @@ class ParserRDFXML_SAX
// If this frame is ParserMode.ObjectResource , then it is an implicit
frame
// inserted for the implied node. Pop the stack again to balance the
push of
// the implicit node element.
- if ( parserMode == ParserMode.ObjectParserTypeResource ) {
+ if ( parserMode == ParserMode.ObjectParseTypeResource ) {
popParserFrame();
decIndent();
}
@@ -485,6 +491,7 @@ class ParserRDFXML_SAX
}
trace.printf(") mode = %s\n", parserMode);
}
+
incIndent();
Position position = position();
@@ -497,16 +504,20 @@ class ParserRDFXML_SAX
switch (parserMode) {
case ObjectLex -> {
// While processing ObjectLex, we found a startElement.
- // The "ObjectLex" decision needs updating. This is a
ParserMode.NodeElement.
+ // The "ObjectLex" decision needs updating. This is a
ParserMode.NodeElement.
// This is not parseType=Resource.
if ( !isWhitespace(accCharacters) )
throw RDFXMLparseError("XML content before nested
element", position);
accCharacters.setLength(0);
- // Declare that the containing frame is expecting a node
element mode.
- // Leave in parserMode=ObjectLex
- pushParserFrame(ParserMode.NodeElement);
+ // Declare that the containing frame is expecting a node
element as the object.
+ // There can be only one object.
+ pushParserFrame(ParserMode.ObjectNode);
processBaseAndLang(attributes, position);
}
+ case ObjectNode -> {
+ // Already in ObjectNode so a second statrtElement is an error.
+ throw RDFXMLparseError("Start tag after inner node element
(only one node element permitted): got "+qName, position);
+ }
default -> {
// For everything else.
pushParserFrame();
@@ -528,7 +539,7 @@ class ParserRDFXML_SAX
// The top element can be a single nodeElement.
startNodeElement(namespaceURI, localName, qName, attributes,
position);
}
- case NodeElement ->
+ case NodeElement, ObjectNode ->
startNodeElement(namespaceURI, localName, qName, attributes,
position);
case PropertyElement ->
startPropertyElement(namespaceURI, localName, qName,
attributes, position);
@@ -574,30 +585,33 @@ class ParserRDFXML_SAX
return;
}
endXMLLiteral(position);
- if ( ReaderRDFXML_SAX.TRACE )
- trace.printf("**** End XML Literal[%s]: elementDepth=%d /
xmlLiteralStartDepth=%s\n", qName, elementDepth, xmlLiteralStartDepth);
// Keep going to finish the end tag.
}
switch (parserMode) {
- case NodeElement ->
- endNodeElement(position);
+ case NodeElement, ObjectNode ->
+ endNodeElement(position);
case PropertyElement -> {
if ( isEndNodeElement() )
// Possible next property but it's a node element so no
property
- // and it's end of node, with two "end property" tags seen
in a row.
+ // and it is end of node, with two "end property" tags
seen in a row.
+ // This occurs for
+ // <rdf:Description> and no properties *maybe some
attribute properties.
+ // <Class></Class>
endNodeElement(position);
else
endPropertyElement(position);
}
- case ObjectLex ->
+ case ObjectLex -> {
endObjectLexical(position);
- case ObjectParseTypeLiteral ->
+ }
+ case ObjectParseTypeLiteral -> {
endObjectXMLLiteral(position);
- case ObjectParseTypeCollection ->
+ }
+ case ObjectParseTypeCollection -> {
endCollectionItem(position);
- default ->
- throw RDFXMLparseError("Inconsistent parserMode:" +
parserMode, position);
+ }
+ default -> throw RDFXMLparseError("Inconsistent parserMode:" +
parserMode, position);
}
popParserFrame();
@@ -783,7 +797,7 @@ class ParserRDFXML_SAX
// Push a frame here as an implicit node frame because the
subject is changing.
// The companion "end frame" is handled in "popParserFrame"
which
// checks for parserMode=ImplicitNode
- parserMode(ParserMode.ObjectParserTypeResource);
+ parserMode(ParserMode.ObjectParseTypeResource);
pushParserFrame();
// ... expect a property element start or an end element.
parserMode(ParserMode.PropertyElement);
@@ -808,12 +822,12 @@ class ParserRDFXML_SAX
return currentProperty == null;
}
- // private String xmlBaseStr(Attributes attributes, Position position) {
- // String baseStr = attributes.getValue(xmlNS, xmlBaseLN);
- // if ( baseStr == null )
- // return null;
- // return IRIs.resolve(currentBase, baseStr);
- // }
+// private String xmlBaseStr(Attributes attributes, Position position) {
+// String baseStr = attributes.getValue(xmlNS, xmlBaseLN);
+// if ( baseStr == null )
+// return null;
+// return IRIs.resolve(currentBase, baseStr);
+// }
// Start element encountered when expecting a ObjectCollection
private void startCollectionItem(String namespaceURI, String
localName, String qName, Attributes attributes, Position position) {
@@ -958,8 +972,9 @@ class ParserRDFXML_SAX
String qName = attributes.getQName(index);
if ( namespace == null || namespace.isEmpty() ) {
- if ( outputWarnings ) {
- // In SAX, xmlns: is a qname, but namespace and local name are
"".
+ // In SAX, xmlns: is qname, but namespace and local name are "".
+ //RDFXMLparseError("XML attribute '"+qName+"' used for RDF
property attribute (no namespace)", position);
+ if ( outputWarnings ){
if ( ! localName.isEmpty() ) // Skip XML namespace
declarations.
RDFXMLparseWarning("XML attribute '"+qName+"' used for RDF
property attribute - ignored", position);
}
@@ -1040,6 +1055,7 @@ class ParserRDFXML_SAX
return ObjectParseType.Plain;
try {
String parseTypeName = parseTypeStr;
+ // Extensions - some names that appear in the wild
switch(parseTypeName) {
case "literal" -> {
RDFXMLparseWarning("Encountered rdf:parseType='literal'.
Treated as rdf:parseType='Literal'", position);
@@ -1138,29 +1154,42 @@ class ParserRDFXML_SAX
return;
}
// Allow whitespace only
- case ObjectParserTypeResource, NodeElement, PropertyElement,
ObjectParseTypeCollection -> {
- if ( !isWhitespace(ch, start, length) )
- throw RDFXMLparseError("Non-whitespace text content
between element tags: "
- +
nonWhitespaceForMsg(ch, start, length), position());
+ case NodeElement, PropertyElement, ObjectParseTypeResource,
ObjectParseTypeCollection, ObjectNode -> {
+ if ( !isWhitespace(ch, start, length) ) {
+ String text = nonWhitespaceMsg(ch, start, length);
+ throw RDFXMLparseError("Non-whitespace text content
between element tags: '"+text+"'", position());
+ }
}
case TOP -> {
if ( !isWhitespace(ch, start, length) ) {
- throw RDFXMLparseError("Non-whitespace text content
outside element tags: "
- +
nonWhitespaceForMsg(ch, start, length), position());
+ String text = nonWhitespaceMsg(ch, start, length);
+ throw RDFXMLparseError("Non-whitespace text content
outside element tags: '"+text+"'", position());
}
}
}
}
/** The string for the first non-whitespace index. */
- private static String nonWhitespaceForMsg(char[] ch, int start, int
length) {
+ private static String nonWhitespaceMsg(char[] ch, int start, int length) {
+ final int MaxLen = 10; // Short - this is for error messages
+ // Find the start of non-whitespace.
+ // Slice, truncate if necessary.
+ // Make safe.
for ( int i = start ; i < start + length ; i++ ) {
if ( !Character.isWhitespace(ch[i]) ) {
- int len = Math.min(20, start - i);
- return new String(ch, i, len);
+ // Slight overshoot
+ int remaindingLength = length - (i-start);
+ int len = Math.min(MaxLen, remaindingLength);
+ String x = new String(ch, i, len);
+ if ( remaindingLength > MaxLen )
+ x = x+"...";
+ // Escape characters, especially newlines and backspaces.
+ x = EscapeStr.stringEsc(x);
+ x = x.stripTrailing();
+ return x;
}
}
- throw new RDFXMLParseException("Failed to find any non-whitespace
characters");
+ throw new RDFXMLParseException("Internal error: Failed to find any
non-whitespace characters");
}
@Override
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
index e4a0a3a80e..cbbaac3b34 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_ev/ParserRDFXML_StAX_EV.java
@@ -31,6 +31,7 @@ import javax.xml.stream.events.*;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.atlas.io.IndentedWriter;
+import org.apache.jena.atlas.lib.EscapeStr;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
@@ -509,6 +510,8 @@ class ParserRDFXML_StAX_EV {
if ( namespace == null || namespace.isEmpty() ) {
// SAX passes xmlns as attributes with namespace and local name of
"". The qname is "xmlns:"/"xmlns"
// StAX, does not pass namespaces.
+
+ //RDFXMLparseError("XML attribute '"+localName+"' used for RDF
property attribute (no namespace)", event);
if ( outputWarnings )
RDFXMLparseWarning("XML attribute '"+localName+"' used for RDF
property attribute - ignored", event);
return false;
@@ -692,10 +695,9 @@ class ParserRDFXML_StAX_EV {
event = nextEventAny();
}
if ( event.isStartElement() ) {
- // DRY!
// Striped - inner node element.
if ( ! isWhitespace(sBuff) ) {
- String msg = nonWhitespaceForMsg(sBuff.toString());
+ String msg = nonWhitespaceMsg(sBuff.toString());
throw RDFXMLparseError("Content before node element.
'"+msg+"'", event);
}
event = processNestedNodeElement(event, subject, property,
emitter);
@@ -1196,8 +1198,11 @@ class ParserRDFXML_StAX_EV {
}
case CHARACTERS, CDATA -> {
Characters chars = ev.asCharacters();
- if ( ! isWhitespace(ev) )
- throw RDFXMLparseError("Read "+str(ev)+" when
expecting a start or end element.", ev);
+ if ( ! isWhitespace(ev) ) {
+ String str = ev.asCharacters().getData();
+ String text = nonWhitespaceMsg(str);
+ throw RDFXMLparseError("Expecting a start or end
element. Got characters '"+text+"'", ev);
+ }
}
case COMMENT, DTD -> { } // Skip
//case SPACE ->
@@ -1488,7 +1493,7 @@ class ParserRDFXML_StAX_EV {
private void noContentAllowed(XMLEvent event) {
if ( event.isCharacters() ) {
String content = event.asCharacters().getData();
- content = nonWhitespaceForMsg(content);
+ content = nonWhitespaceMsg(content);
throw RDFXMLparseError("Expected XML start tag or end tag. Found
text content (possible striping error): \""+content+"\"", event);
}
}
@@ -1628,12 +1633,23 @@ class ParserRDFXML_StAX_EV {
};
}
- /** The string for the first non-whitespace index. */
- private static String nonWhitespaceForMsg(String string) {
- for ( int i = 0 ; i < string.length() ; i++ ) {
+ /** The string for the first non-whitespace */
+ private static String nonWhitespaceMsg(String string) {
+ final int MaxLen = 10; // Short - this is for error messages
+ // Find the start of non-whitespace.
+ // Slice, truncate if necessary.
+ // Make safe.
+ int length = string.length();
+ for ( int i = 0 ; i < length ; i++ ) {
if ( !Character.isWhitespace(string.charAt(i)) ) {
- int index = Math.min(20, string.length()-i);
- return string.substring(index);
+ int len = Math.min(MaxLen, length - i);
+ String x = string.substring(i, i+len);
+ if ( length > MaxLen )
+ x = x+"...";
+ // Escape characters, especially newlines and backspaces.
+ x = EscapeStr.stringEsc(x);
+ x = x.stripTrailing();
+ return x;
}
}
throw new RDFXMLParseException("Failed to find any non-whitespace
characters");
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
index 17a1aebafe..32b1024056 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/lang/rdfxml/rrx_stax_sr/ParserRDFXML_StAX_SR.java
@@ -35,6 +35,7 @@ import javax.xml.stream.events.XMLEvent;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.atlas.io.IndentedWriter;
+import org.apache.jena.atlas.lib.EscapeStr;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
@@ -485,6 +486,7 @@ class ParserRDFXML_StAX_SR {
if ( namespace == null || namespace.isEmpty() ) {
// SAX passes xmlns as attributes with namespace and local name of
"". The qname is "xmlns:"/"xmlns"
// StAX, does not pass namespaces.
+ //RDFXMLparseError("XML attribute '"+qName.getLocalPart()+"' used
for RDF property attribute (no namespace)", event);
if ( outputWarnings )
RDFXMLparseWarning("XML attribute '"+qName.getLocalPart()+"'
used for RDF property attribute - ignored");
return false;
@@ -666,7 +668,7 @@ class ParserRDFXML_StAX_SR {
}
if ( lookingAt(event, START_ELEMENT) ) {
if ( ! isWhitespace(accCharacters) ) {
- String msg = nonWhitespaceForMsg(accCharacters.toString());
+ String msg = nonWhitespaceMsg(accCharacters.toString());
throw RDFXMLparseError("Content before node element.
'"+msg+"'");
}
event = processNestedNodeElement(event, subject, property,
emitter);
@@ -1163,8 +1165,10 @@ class ParserRDFXML_StAX_SR {
}
case CHARACTERS, CDATA -> {
String chars = xmlSource.getText();
- if ( ! isWhitespace(chars) )
- throw RDFXMLparseError("Read
"+nonWhitespaceForMsg(chars)+" when expecting a start or end element.");
+ if ( ! isWhitespace(chars) ) {
+ String text = nonWhitespaceMsg(chars);
+ throw RDFXMLparseError("Expecting a start or end
element. Got characters '"+text+"'");
+ }
// Skip
break;
}
@@ -1472,7 +1476,7 @@ class ParserRDFXML_StAX_SR {
private void noContentAllowed(XMLEvent event) {
if ( event.isCharacters() ) {
String content = event.asCharacters().getData();
- content = nonWhitespaceForMsg(content);
+ content = nonWhitespaceMsg(content);
throw RDFXMLparseError("Expected XML start tag or end tag. Found
text content (possible striping error): \""+content+"\"");
}
}
@@ -1622,11 +1626,22 @@ class ParserRDFXML_StAX_SR {
}
/** The string for the first non-whitespace index. */
- private static String nonWhitespaceForMsg(String string) {
- for ( int i = 0 ; i < string.length() ; i++ ) {
+ private static String nonWhitespaceMsg(String string) {
+ final int MaxLen = 10; // Short - this is for error messages
+ // Find the start of non-whitespace.
+ // Slice, truncate if necessary.
+ // Make safe.
+ int length = string.length();
+ for ( int i = 0 ; i < length ; i++ ) {
if ( !Character.isWhitespace(string.charAt(i)) ) {
- int index = Math.min(20, string.length()-i);
- return string.substring(index);
+ int len = Math.min(MaxLen, length - i);
+ String x = string.substring(i, i+len);
+ if ( length > MaxLen )
+ x = x+"...";
+ // Escape characters, especially newlines and backspaces.
+ x = EscapeStr.stringEsc(x);
+ x = x.stripTrailing();
+ return x;
}
}
throw new RDFXMLParseException("Failed to find any non-whitespace
characters");
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java
index 9c268d5abd..230105e600 100644
---
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java
+++
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/TC_RIOT_RDFXML.java
@@ -26,10 +26,10 @@ import org.junit.runners.Suite;
@RunWith(Suite.class)
@Suite.SuiteClasses( {
- // Local file and rdf11-xml - detailed checking.
+ // Local tests, extensions and error reports.
TS_RRX.class,
- // Manifest-driven rdf11-xml - all parsers
+ // Manifest-driven RDF 1.1 rdf-xml test suite - all parsers
Scripts_RRX_RDFXML.class,
// jena-core legacy test (RDF 1.0)
@@ -43,7 +43,7 @@ import org.junit.runners.Suite;
* converted to run as RIOT tests.
* <p>
* {@linkplain TS_RRX} runs local RRX tests by comparing the different RRX
parsers
- * to ARP1. tese test check for the same number of warnig as well.
+ * to ARP1. These test check for the same number of warning as well.
* The {@code TestRDFXML_RRX_*} are running on extra local files. The
* TestRDFXML_W3C_* are running on the RDF 1.0 test suite that ARP1 has used.
* <p>
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java
index 8379d47d78..e479ff7414 100644
---
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java
+++
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TS_ConvertedARP1.java
@@ -32,5 +32,8 @@ import org.junit.runners.Suite.SuiteClasses;
/**
* The ARP (final) tests from jena-core, so related to RDF 1.0 test suite,
* then converted to run as RIOT tests.
+ *
+ * The files used are in " * Files in "testing/RIOT/rdf11-xml/"
+ * which have been updated for RDF 1.1 if necessary.
*/
public class TS_ConvertedARP1 {}
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java
index 305d11409f..b5cd53e162 100644
---
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java
+++
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/converted_legacy/TestARP1_W3C_Pending.java
@@ -24,7 +24,9 @@ import org.junit.Test;
/**
* The ARP test suite run on a local legacy copy of the RDF 1.0 WG test suite
- * (updated for RDF 1.1). Tests marked pending.
+ * (updated for RDF 1.1).
+ *
+ * Tests marked pending.
*/
public class TestARP1_W3C_Pending {
@Test
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java
index 15bcfca775..afe97d071b 100644
---
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java
+++
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/Scripts_RRX_RDFXML.java
@@ -24,6 +24,7 @@ import org.junit.runners.Suite.SuiteClasses;
/**
* Run all the parsers on the rdf-test CG RDF/XML manifest files (RDF 1.1).
+ * Files in "testing/RIOT/rdf11-xml/".
*/
@RunWith(Suite.class)
@SuiteClasses( {
@@ -32,7 +33,7 @@ import org.junit.runners.Suite.SuiteClasses;
TestManifest_RDF11_ARP1.class,
TestManifest_RDF11_RRX_SAX.class,
TestManifest_RDF11_RRX_StAXsr.class,
- TestManifest_RDF_RRX_StAXev.class
+ TestManifest_RDF11_RRX_StAXev.class
})
public class Scripts_RRX_RDFXML {}
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF_RRX_StAXev.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF11_RRX_StAXev.java
similarity index 97%
rename from
jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF_RRX_StAXev.java
rename to
jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF11_RRX_StAXev.java
index 859c92bbb8..fb904943f6 100644
---
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF_RRX_StAXev.java
+++
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/manifest_rdf11/TestManifest_RDF11_RRX_StAXev.java
@@ -36,7 +36,7 @@ import org.junit.runner.RunWith;
"testing/RIOT/rdf11-xml/manifest.ttl"
})
-public class TestManifest_RDF_RRX_StAXev {
+public class TestManifest_RDF11_RRX_StAXev {
static ReaderRIOTFactory systemReaderfactory;
@BeforeClass
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java
index b25bb22968..02c3162121 100644
---
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java
+++
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/AbstractTestRDFXML_RRX.java
@@ -44,6 +44,6 @@ public abstract class AbstractTestRDFXML_RRX {
}
@Test public void test() {
- RunTestRDFXML.runTest(testLabel, factory, implName, filename);
+ RunTestRDFXML.runTestCompareARP(testLabel, factory, implName,
filename);
}
}
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
index 09aaea80ec..da6648a05b 100644
---
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
+++
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/RunTestRDFXML.java
@@ -72,10 +72,12 @@ public class RunTestRDFXML {
}
/**
- * Manifest-like in that the test files in a specific order.
+ * Manifest-like in that the test files are run in a specific order.
* The local files cover all the features of RDF/XML parsing
* but not in great depth.
* These tests more easily highlight problems and the grouping helps.
+ *
+ * Check the files on disk agree with the built-in order list.
*/
static List<String> localTestFiles() {
Path LOCAL_DIR = Path.of("testing/RIOT/rrx/");
@@ -184,7 +186,6 @@ public class RunTestRDFXML {
);
for ( String fn : testfiles ) {
-
if ( ! found.contains(fn) )
output.printf("Not found in file area: %s\n", fn);
}
@@ -216,17 +217,6 @@ public class RunTestRDFXML {
return x;
}
- static void runTest(String label, ReaderRIOTFactory factory, String
implLabel, String filename) {
- try {
- runTestCompareARP(label, factory, implLabel, filename);
- } catch(Throwable ex) {
- throw new RuntimeException(filename, ex) {
- @Override
- public Throwable fillInStackTrace() { return this; }
- };
- }
- }
-
static class ErrorHandlerCollector implements ErrorHandler {
List<String> warnings = new ArrayList<>();
List<String> errors = new ArrayList<>();
@@ -295,9 +285,9 @@ public class RunTestRDFXML {
String testFullLabel = format("-- Test : %-4s : %s", testLabel,
filename);
Graph expectedGraph;
- // -- "Reference" implementation
ErrorHandlerCollector errorHandlerReference = new
ErrorHandlerCollector();
try {
+ // Reference expectation
expectedGraph = parseFile(referenceFactory, errorHandlerReference,
filename);
} catch (RiotException ex) {
// Exception expected. Run as "failure test"
@@ -310,6 +300,15 @@ public class RunTestRDFXML {
}
+ /**
+ * Run a test, single parse of using the given reader factory.
+ */
+ public static void runTestPlain(String label, ReaderRIOTFactory
testSubjectFactory, String implLabel, String filename) {
+ String testLabel = format("-- Test : %-4s : %s", implLabel, filename);
+ ErrorHandlerCollector errorHandlerReference = new
ErrorHandlerCollector();
+ parseFile(testSubjectFactory, errorHandlerReference, filename);
+ }
+
/**
* Run a test, expecting a graph as the result.
*/
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
index 00eac3a12b..8c16eec860 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/rdfxml/rrx/TestRRX.java
@@ -39,10 +39,20 @@ import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameters;
-/** Cases where the RRX parsers differ from ARP */
+/**
+ * Additional tests for RRX:
+ * <ul>
+ * <li>errors and warnings not in the W3C manifest files</li>
+ * <li>additional reports</li>
+ * <li>extensions toRDF/XML</li>
+ * </ul>
+ */
@RunWith(Parameterized.class)
public class TestRRX {
+
+ private static String DIR = "testing/RIOT/rrx-files/";
+
@Parameters(name = "{index}: {0} {1}")
public static Iterable<Object[]> data() {
List<Object[]> x = new ArrayList<>();
@@ -60,17 +70,38 @@ public class TestRRX {
this.lang = lang;
}
- @Test public void error01() {
- errorTest("error01.rdf");
+ // Test2 for more than one object in RDF/XML striping.
+ @Test public void error_multiple_objects_lex_node() {
+ errorTest("multiple_objects_lex_node.rdf");
+ }
+
+ @Test public void error_multiple_objects_node_lex() {
+ errorTest("multiple_objects_node_lex.rdf");
+ }
+
+ @Test public void error_multiple_objects_node_node() {
+ errorTest("multiple_objects_node_node.rdf");
+ }
+
+ // Check that the "one object" parse state does not impact deeper
structures.
+ @Test public void nested_object() {
+ goodTest("nested_object.rdf");
}
- @Test public void warn_literal() {
+ // rdf:parserType=
+ @Test public void error_parseType_unknown() {
+ // This is only a warning in ARP.
+ errorTest("parseType-unknown.rdf", false);
+ }
+
+ @Test public void warn_parseType_extension_1() {
// Now valid. parseType="literal" -> parseType="Literal"
// because ARP behaved that way.
// Warning issued.
warningTest("warn01.rdf", 1);
}
+ // CIM
@Test public void cim_statements01() {
// parseType="Statements"
// because ARP behaved that way.
@@ -79,6 +110,7 @@ public class TestRRX {
warningTest("cim_statements01.rdf", 2);
}
+ // misc
@Test public void noBase01() {
// Call with no base; no base needed.
noBase("file-no-base.rdf");
@@ -92,7 +124,7 @@ public class TestRRX {
private void noBase(String filename) {
ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
- String fn = "testing/RIOT/rrx-files/"+filename;
+ String fn = DIR+filename;
ErrorHandlerCollector errorHandler = new ErrorHandlerCollector();
ParserProfile parserProfile =
RiotLib.createParserProfile(RiotLib.factoryRDF(), errorHandler, true);
ReaderRIOT reader = factory.create(lang, parserProfile);
@@ -105,16 +137,29 @@ public class TestRRX {
}
}
+ private void goodTest(String filename) {
+ ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
+ String fn = DIR+filename;
+ RunTestRDFXML.runTestPlain(filename, factory, label, fn);
+ RunTestRDFXML.runTestCompareARP(fn, factory, label, fn);
+ }
+
private void warningTest(String filename, int warnings) {
ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
- String fn = "testing/RIOT/rrx-files/"+filename;
+ String fn = DIR+filename;
RunTestRDFXML.runTestExpectWarning(filename, factory, label, warnings,
fn);
+ RunTestRDFXML.runTestCompareARP(fn, factory, label, fn);
}
private void errorTest(String filename) {
+ errorTest(filename, true);
+ }
+
+ private void errorTest(String filename, boolean compare) {
ReaderRIOTFactory factory = RDFParserRegistry.getFactory(lang);
- String fn = "testing/RIOT/rrx-files/"+filename;
+ String fn = DIR+filename;
RunTestRDFXML.runTestExpectFailure(filename, factory, label, fn);
+ if ( compare )
+ RunTestRDFXML.runTestCompareARP(fn, factory, label, fn);
}
-
}
diff --git a/jena-arq/testing/RIOT/rrx-files/README
b/jena-arq/testing/RIOT/rrx-files/README
index 79eefb4d90..2f50a97add 100644
--- a/jena-arq/testing/RIOT/rrx-files/README
+++ b/jena-arq/testing/RIOT/rrx-files/README
@@ -1,5 +1,13 @@
-# Extensions to RDF/XML
+## Files for specific tests of RRX.
+
+This directory contained files used for specific tests of RRX to suppliment the
+basic positive tests (in
+
+- errors and extensions.
+See TestRRX.java
+
+# Extensions to RDF/XML
CIM - uses rdf:parseType="Statements" for rdf:parseType="Literal"
https://github.com/apache/jena/issues/2473
diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf
b/jena-arq/testing/RIOT/rrx-files/multiple_objects_lex_node.rdf
similarity index 51%
copy from jena-arq/testing/RIOT/rrx-files/error01.rdf
copy to jena-arq/testing/RIOT/rrx-files/multiple_objects_lex_node.rdf
index 1ac33893fb..7e39b6b036 100644
--- a/jena-arq/testing/RIOT/rrx-files/error01.rdf
+++ b/jena-arq/testing/RIOT/rrx-files/multiple_objects_lex_node.rdf
@@ -1,14 +1,13 @@
<?xml version='1.0'?>
<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0
-->
-<rdf:RDF
+<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:ex="http://example/"
- >
-
- <!-- rdf:parseType not recognized -->
- <rdf:Description rdf:about="http://host/subject">
- <ex:property rdf:parseType="unknown"/>
+ xmlns:ex="http://example/">
+ <rdf:Description rdf:about="http://example/s">
+ <ex:property>
+ TEXT
+ <ex:object2/>
+ </ex:property>
</rdf:Description>
-
</rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf
b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_lex.rdf
similarity index 51%
copy from jena-arq/testing/RIOT/rrx-files/error01.rdf
copy to jena-arq/testing/RIOT/rrx-files/multiple_objects_node_lex.rdf
index 1ac33893fb..4336b6eb4b 100644
--- a/jena-arq/testing/RIOT/rrx-files/error01.rdf
+++ b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_lex.rdf
@@ -1,14 +1,13 @@
<?xml version='1.0'?>
<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0
-->
-<rdf:RDF
+<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:ex="http://example/"
- >
-
- <!-- rdf:parseType not recognized -->
- <rdf:Description rdf:about="http://host/subject">
- <ex:property rdf:parseType="unknown"/>
+ xmlns:ex="http://example/">
+ <rdf:Description rdf:about="http://example/s">
+ <ex:property>
+ <ex:object1/>
+ TEXT
+ </ex:property>
</rdf:Description>
-
</rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf
b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_node.rdf
similarity index 51%
copy from jena-arq/testing/RIOT/rrx-files/error01.rdf
copy to jena-arq/testing/RIOT/rrx-files/multiple_objects_node_node.rdf
index 1ac33893fb..63dc5d5785 100644
--- a/jena-arq/testing/RIOT/rrx-files/error01.rdf
+++ b/jena-arq/testing/RIOT/rrx-files/multiple_objects_node_node.rdf
@@ -1,14 +1,13 @@
<?xml version='1.0'?>
<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0
-->
-<rdf:RDF
+<rdf:RDF
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
- xmlns:ex="http://example/"
- >
-
- <!-- rdf:parseType not recognized -->
- <rdf:Description rdf:about="http://host/subject">
- <ex:property rdf:parseType="unknown"/>
+ xmlns:ex="http://example/">
+ <rdf:Description rdf:about="http://example/s">
+ <ex:property>
+ <ex:object1/>
+ <ex:object2/>
+ </ex:property>
</rdf:Description>
-
</rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/nested_object.rdf
b/jena-arq/testing/RIOT/rrx-files/nested_object.rdf
new file mode 100644
index 0000000000..bde480de20
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx-files/nested_object.rdf
@@ -0,0 +1,17 @@
+<?xml version='1.0'?>
+<!-- Licensed under the terms of https://www.apache.org/licenses/LICENSE-2.0
-->
+
+<rdf:RDF
+ xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+ xmlns:ex="http://example/">
+ <rdf:Description rdf:about="http://example/s">
+ <ex:property1>
+ <rdf:Description rdf:about="http://example/o1">
+ <ex:property2>NESTED1</ex:property2>
+ <ex:property3>NESTED2</ex:property3>
+ <!-- white space -->
+
+ </rdf:Description>
+ </ex:property1>
+ </rdf:Description>
+</rdf:RDF>
diff --git a/jena-arq/testing/RIOT/rrx-files/error01.rdf
b/jena-arq/testing/RIOT/rrx-files/parseType-unknown.rdf
similarity index 100%
rename from jena-arq/testing/RIOT/rrx-files/error01.rdf
rename to jena-arq/testing/RIOT/rrx-files/parseType-unknown.rdf
diff --git a/jena-arq/testing/RIOT/rrx/README b/jena-arq/testing/RIOT/rrx/README
new file mode 100644
index 0000000000..399d3bf1ca
--- /dev/null
+++ b/jena-arq/testing/RIOT/rrx/README
@@ -0,0 +1,4 @@
+== Basic positive tests for RRX
+
+The tests in this directory cover RDF/XML features.
+See RunTestRDFXML.java.