This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new 070203ea72 GH-1551: IRI with tab - don't return null after error
new efb53021cf Merge pull request #1571 from afs/ttl-tab
070203ea72 is described below
commit 070203ea72cc17c8da59f24be293e4ed878233a6
Author: Andy Seaborne <[email protected]>
AuthorDate: Sat Oct 1 21:09:02 2022 +0100
GH-1551: IRI with tab - don't return null after error
---
.../src/main/java/org/apache/jena/riot/tokens/TokenizerText.java | 2 +-
.../src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java | 7 +++++++
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
index bdb66badf1..9c02bf0b24 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
@@ -527,7 +527,7 @@ public final class TokenizerText implements Tokenizer
// Probably a corrupt file so treat as fatal.
fatal("Bad character in IRI (bad character: '<'):
<%s[<]...>", stringBuilder.toString()); return null;
case TAB:
- error("Bad character in IRI (Tab character):
<%s[tab]...>", stringBuilder.toString()); return null;
+ error("Bad character in IRI (tab character):
<%s[tab]...>", stringBuilder.toString()); break;
case '{': case '}': case '"': case '|': case '^': case '`' :
if ( ! VeryVeryLaxIRI )
warning("Illegal character in IRI (codepoint 0x%02X,
'%c'): <%s[%c]...>", ch, (char)ch, stringBuilder.toString(), (char)ch);
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
index e0c6d57d1a..0ebe54a8a8 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
@@ -1044,6 +1044,13 @@ public class TestTokenizer {
// and no escaped characters for blank node labels.
}
+ @Test(expected=RiotException.class)
+ public void tokenIRI_tab() {
+ // Raw tab in a IRI string. Illegal - this is an error.
+ Tokenizer tokenizer =
tokenizer("<http://example/invalid/iri/with_\t_tab>") ;
+ testNextToken(tokenizer, TokenType.IRI) ;
+ }
+
private static Token testExpectWarning(String input, TokenType
expectedTokenType, int warningCount) {
PeekReader r = PeekReader.readString(input);
return testExpectWarning(r, expectedTokenType, warningCount);