This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new 452c8d287a GH-2715: Update TokenizerText. Fix signed number
tokenizing; add RDF 1.2 tokens.
452c8d287a is described below
commit 452c8d287a930bb15474f3c30071aa572b27e87d
Author: Andy Seaborne <[email protected]>
AuthorDate: Fri Sep 20 16:28:18 2024 +0100
GH-2715: Update TokenizerText. Fix signed number tokenizing; add RDF 1.2
tokens.
---
.../java/org/apache/jena/riot/tokens/Token.java | 52 +-
.../org/apache/jena/riot/tokens/TokenChecker.java | 2 +-
.../apache/jena/riot/tokens/TokenCheckerBase.java | 2 +-
.../org/apache/jena/riot/tokens/TokenType.java | 11 +-
.../org/apache/jena/riot/tokens/TokenizerText.java | 299 ++++---
...eTextBuilder.java => TokenizerTextBuilder.java} | 18 +-
.../jena/riot/lang/AbstractTestLangNTuples.java | 4 +-
.../org/apache/jena/riot/tokens/TS_Tokens.java | 2 +-
.../{TestTokenizer.java => TestTokenizerText.java} | 970 ++++++++++++---------
.../main/java/org/apache/jena/atlas/lib/Chars.java | 9 +-
10 files changed, 767 insertions(+), 602 deletions(-)
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java
b/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java
index 3328111f9a..f65daa183b 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java
@@ -18,7 +18,6 @@
package org.apache.jena.riot.tokens;
-import static org.apache.jena.atlas.lib.Chars.*;
import static org.apache.jena.atlas.lib.Lib.hashCodeObject;
import static org.apache.jena.riot.tokens.TokenType.*;
@@ -56,7 +55,6 @@ public final class Token
private Token subToken2 = null; // A related token (used for
datatype literals and language tags)
private StringType stringType = null;
- public int cntrlCode = 0;
private long column;
private long line;
@@ -70,7 +68,6 @@ public final class Token
//public final String getImage1() { return tokenImage1; }
public final String getImage2() { return tokenImage2; }
- public final int getCntrlCode() { return cntrlCode; }
public final Token getSubToken1() { return subToken1; }
public final Token getSubToken2() { return subToken2; }
public final StringType getStringType() { return stringType; }
@@ -91,8 +88,6 @@ public final class Token
public final Token setImage2(String tokenImage2) { this.tokenImage2 =
tokenImage2; return this; }
- public final Token setCntrlCode(int cntrlCode) { this.cntrlCode =
cntrlCode; return this; }
-
public final Token setSubToken1(Token subToken) { this.subToken1 =
subToken; return this; }
public final Token setSubToken2(Token subToken) { this.subToken2 =
subToken; return this; }
@@ -157,7 +152,6 @@ public final class Token
this(token.tokenType,
token.tokenImage, token.tokenImage2,
token.subToken1, token.subToken2);
- this.cntrlCode = token.cntrlCode;
this.line = token.line;
this.column = token.column;
}
@@ -221,43 +215,34 @@ public final class Token
else
sb.append(getType().toString());
- if ( getImage() != null )
- {
+ if ( getImage() != null ) {
sb.append(":");
sb.append(delim1);
sb.append(getImage());
sb.append(delim1);
}
- if ( getImage2() != null )
- {
+ if ( getImage2() != null ) {
sb.append(":");
sb.append(delim2);
sb.append(getImage2());
sb.append(delim2);
}
- if ( getSubToken1() != null )
- {
+ if ( getSubToken1() != null ) {
sb.append(";");
sb.append(delim2);
sb.append(getSubToken1().toString());
sb.append(delim2);
}
- if ( getSubToken2() != null )
- {
+ if ( getSubToken2() != null ) {
sb.append(";");
sb.append(delim2);
sb.append(getSubToken2().toString());
sb.append(delim2);
}
- if ( getCntrlCode() != 0 )
- {
- sb.append(":");
- sb.append(getCntrlCode());
- }
sb.append("]");
return sb.toString();
}
@@ -427,7 +412,7 @@ public final class Token
@Override
public int hashCode() {
- return hashCodeObject(tokenType) ^ hashCodeObject(tokenImage) ^
hashCodeObject(tokenImage2) ^ hashCodeObject(cntrlCode);
+ return hashCodeObject(tokenType) ^ hashCodeObject(tokenImage) ^
hashCodeObject(tokenImage2);
}
@Override
@@ -435,32 +420,7 @@ public final class Token
if ( !(other instanceof Token t) )
return false;
return Objects.equals(tokenType, t.tokenType) &&
Objects.equals(tokenImage, t.tokenImage)
- && Objects.equals(tokenImage2, t.tokenImage2) &&
Objects.equals(cntrlCode, t.cntrlCode);
- }
-
- public static Token tokenForChar(char character) {
- switch (character) {
- case CH_DOT :
- return new Token(TokenType.DOT);
- case CH_SEMICOLON :
- return new Token(TokenType.SEMICOLON);
- case CH_COMMA :
- return new Token(TokenType.COMMA);
- case CH_LBRACE :
- return new Token(TokenType.LBRACE);
- case CH_RBRACE :
- return new Token(TokenType.RBRACE);
- case CH_LPAREN :
- return new Token(TokenType.LPAREN);
- case CH_RPAREN :
- return new Token(TokenType.RPAREN);
- case CH_LBRACKET :
- return new Token(TokenType.LBRACKET);
- case CH_RBRACKET :
- return new Token(TokenType.RBRACKET);
- default :
- throw new RuntimeException("Token error: unrecognized
character: " + character);
- }
+ && Objects.equals(tokenImage2, t.tokenImage2);
}
public static Token tokenForInteger(long value) {
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenChecker.java
b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenChecker.java
index aa0e875b20..3b466fed5c 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenChecker.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenChecker.java
@@ -27,7 +27,7 @@ public interface TokenChecker
public void checkURI(String uriStr);
public void checkNumber(String lexical, String datatypeURI);
public void checkVariable(String tokenImage);
- public void checkDirective(int cntrlCode);
+ public void checkDirective(String directive);
public void checkKeyword(String lexical);
public void checkPrefixedName(String prefixName, String localName);
public void checkControl(int code);
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenCheckerBase.java
b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenCheckerBase.java
index 6ffb5dda4c..11cb40d1eb 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenCheckerBase.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenCheckerBase.java
@@ -31,7 +31,7 @@ public class TokenCheckerBase implements TokenChecker
{}
@Override
- public void checkDirective(int cntrlCode)
+ public void checkDirective(String directive)
{}
@Override
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenType.java
b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenType.java
index 0ef1062944..813c45d756 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenType.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenType.java
@@ -35,17 +35,20 @@ public enum TokenType {
DOT, COMMA, SEMICOLON, COLON, DIRECTIVE,
// LT, GT, LE, GE are only visible if IRI processing is not enabled.
LT, GT, LE, GE, LOGICAL_AND, LOGICAL_OR, // && and ||
- LT2, GT2, // << and >>
- // RDF-star annotation syntax {|...|}
+ // << and >> : Used in RDF 1.2
+ LT2, GT2,
+ // RDF 1.2 star triple terms <<( ... )>>
+ L_TRIPLE, R_TRIPLE,
+ // RDF 1.2 annotation syntax {|...|}
L_ANN, R_ANN,
VBAR, AMPERSAND,
LBRACE, RBRACE, // {}
LPAREN, RPAREN, // ()
LBRACKET, RBRACKET, // []
- // = == + - * / \ ! ?
- EQUALS, EQUIVALENT, PLUS, MINUS, STAR, SLASH, RSLASH, EMARK, QMARK,
+ // = == + - * / \ ! ? ~
+ EQUALS, EQUIVALENT, PLUS, MINUS, STAR, SLASH, RSLASH, EMARK, QMARK, TILDE,
// Whitespace, any comment, (one line comment, multiline comment)
NL, WS, COMMENT, COMMENT1, COMMENT2, EOF;
}
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
index 2f8bd3a5a5..90767fe044 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
@@ -35,37 +35,35 @@ import org.apache.jena.sparql.ARQInternalErrorException;
/**
* Tokenizer for the Turtle family of syntaxes.
- * Supports addition tokens.
+ * Supports additional tokens.
*/
public final class TokenizerText implements Tokenizer
{
- // Drop through to final general symbol/keyword reader, including <=, !=
- // Care with <=
- // Policy driven for CURIES?
-
- private static final int CTRL_CHAR = CH_STAR;
-
// The code has the call points for checking tokens but it is generally
better to
// do the check later in the parsing process. In case a need arises, the
code
// remains, all compiled away by "if ( false )" (javac does not generate
any
// bytecodes and even if it it did, JIT will remove dead branches).
private static final boolean Checking = false;
+ // Optional checker.
+ private final TokenChecker checker = null;
+ // Workspace for building token images.
+ // Reusing a StringBuilder is faster than allocating a fresh one each time.
private final StringBuilder stringBuilder = new StringBuilder(200);
+
+ // Character source
private final PeekReader reader;
// Whether whitespace between tokens includes newlines (in various forms).
private final boolean singleLineMode;
// Indicator. The PeekReader should throw
java.nio.charset.MalformedInputException
private final boolean isASCII;
- private final TokenChecker checker = null;
// The code assumes that errors throw exception and so stop parsing.
private final ErrorHandler errorHandler;
private Token token = null;
private boolean finished = false;
-
- public static TokenizeTextBuilder create() { return new
TokenizeTextBuilder(); }
+ public static TokenizerTextBuilder create() { return new
TokenizerTextBuilder(); }
public static Tokenizer fromString(String string) { return
create().fromString(string).build(); }
@@ -133,22 +131,6 @@ public final class TokenizerText implements Tokenizer
return token;
}
-// private TokenChecker getChecker() {
-// return checker;
-// }
-//
-// private void setChecker(TokenChecker checker) {
-// this.checker = checker;
-// }
-//
-// private ErrorHandler getErrorHandler() {
-// return errorHandler;
-// }
-//
-// private void setErrorHandler(ErrorHandler handler) {
-// this.errorHandler = handler;
-// }
-
@Override
public void close() {
IO.close(reader);
@@ -191,26 +173,33 @@ public final class TokenizerText implements Tokenizer
int ch = reader.peekChar();
- // ---- IRI, unless it's <<.
+ // ---- IRI, unless it's << or <<(
// [spc] check is for LT.
if ( ch == CH_LT ) {
// Look ahead on char
reader.readChar();
- int chPeek = reader.peekChar();
- if ( chPeek != '<' ) {
+ int chPeek2 = reader.peekChar();
+ if ( chPeek2 != '<' ) {
+ // '<' not '<<'
token.setImage(readIRI());
token.setType(TokenType.IRI);
if ( Checking )
checkURI(token.getImage());
return token;
}
- if ( chPeek == '<' ) {
- reader.readChar();
+ reader.readChar();
+ // '<<' so far - maybe '<<('
+ int chPeek3 = reader.peekChar();
+ if ( chPeek3 != '(' ) {
token.setType(TokenType.LT2);
//token.setImage("<<");
return token;
}
- fatal("Internal error - parsed '%c' after '<'", chPeek);
+ // It is <<(
+ reader.readChar();
+ token.setType(TokenType.L_TRIPLE);
+ //token.setImage("<<(");
+ return token;
}
// ---- Literal
@@ -318,7 +307,7 @@ public final class TokenizerText implements Tokenizer
token.setType(TokenType.DIRECTIVE);
token.setImage(readWord(false));
if ( Checking )
- checkDirective(token.cntrlCode);
+ checkDirective(token.getImage());
return token;
}
@@ -337,18 +326,23 @@ public final class TokenizerText implements Tokenizer
switch(ch)
{
- // DOT can start a decimal. Check for digit.
+ // DOT can start a decimal.
case CH_DOT:
reader.readChar();
ch = reader.peekChar();
if ( range(ch, '0', '9') ) {
- // Not a DOT after all.
+ // DOT DIGIT - it's a number.
+ // Reload the DOT.
reader.pushbackChar(CH_DOT);
- readNumber();
- if ( Checking )
- checkNumber(token.getImage(), token.getImage2());
- return token;
+ boolean charactersConsumed = readNumber(CH_ZERO, false);
+ if ( charactersConsumed ) {
+ if ( Checking )
+ checkNumber(token.getImage(), token.getImage2());
+ return token;
+ }
+ // else it's DOT - drop through.
}
+ // It's DOT.
token.setType(TokenType.DOT);
return token;
@@ -384,7 +378,31 @@ public final class TokenizerText implements Tokenizer
case CH_RBRACE: reader.readChar();
token.setType(TokenType.RBRACE); /*token.setImage(CH_RBRACE);*/ return token;
case CH_LPAREN: reader.readChar();
token.setType(TokenType.LPAREN); /*token.setImage(CH_LPAREN);*/ return token;
- case CH_RPAREN: reader.readChar();
token.setType(TokenType.RPAREN); /*token.setImage(CH_RPAREN);*/ return token;
+
+ // Can be ')' or ')>>'
+ case CH_RPAREN: {
+ // The ')'
+ reader.readChar();
+ int peek2 = reader.peekChar();
+ if ( peek2 != '>') {
+ // Includes EOF.
+ token.setType(TokenType.RPAREN);
+ return token;
+ }
+ reader.readChar();
+ int peek3 = reader.peekChar();
+ if ( peek3 != '>') {
+ reader.pushbackChar(peek2);
+ token.setType(TokenType.RPAREN);
+ return token;
+ }
+ // It is ')>>'
+ reader.readChar();
+ token.setType(TokenType.R_TRIPLE);
+ /*token.setImage(")>>");*/
+ return token;
+ }
+
case CH_LBRACKET: reader.readChar();
token.setType(TokenType.LBRACKET); /*token.setImage(CH_LBRACKET);*/ return
token;
case CH_RBRACKET: reader.readChar();
token.setType(TokenType.RBRACKET); /*token.setImage(CH_RBRACKET);*/ return
token;
case CH_EQUALS: reader.readChar();
token.setType(TokenType.EQUALS); /*token.setImage(CH_EQUALS);*/ return token;
@@ -414,10 +432,11 @@ public final class TokenizerText implements Tokenizer
case CH_STAR: reader.readChar();
token.setType(TokenType.STAR); /*token.setImage(CH_STAR);*/ return token;
case CH_EMARK: reader.readChar();
token.setType(TokenType.EMARK); /*token.setImage(CH_EMARK);*/ return token;
+ case CH_TILDE: reader.readChar();
token.setType(TokenType.TILDE); /*token.setImage(CH_TILDE);*/ return token;
+
// VAR overrides
- //case CH_QMARK: reader.readChar();
token.setType(TokenType.QMARK); /*token.setImage(CH_EMARK);*/ return token;
+ //case CH_QMARK: reader.readChar();
token.setType(TokenType.QMARK); /*token.setImage(CH_EMARK);*/ return token;
- // XXX Multi-character symbols
// Two character tokens && || GE >= , LE <=
//TokenType.LE
//TokenType.GE
@@ -426,7 +445,7 @@ public final class TokenizerText implements Tokenizer
}
// ---- Numbers.
- // A plain "+" and "-", not followed by a digit, are symbols.
+ // A plain "+" and "-", not followed by an unsigned number are symbols.
/*
[16] integer ::= ('-' | '+') ? [0-9]+
@@ -439,37 +458,42 @@ public final class TokenizerText implements Tokenizer
*/
- // TODO extract readNumberNoSign
-
- int signCh = 0;
-
if ( ch == CH_PLUS || ch == CH_MINUS ) {
reader.readChar();
int ch2 = reader.peekChar();
-
- if ( !range(ch2, '0', '9') ) {
- // ch was end of symbol.
- // reader.readChar();
+ if ( !range(ch2, '0', '9') && ch2 != CH_DOT ) {
+ // Not a number.
if ( ch == CH_PLUS )
token.setType(TokenType.PLUS);
else
token.setType(TokenType.MINUS);
return token;
}
-
- // Already got a + or - ...
- // readNumberNoSign
- // Because next, old code processes signs.
- reader.pushbackChar(ch);
- signCh = ch;
- // Drop to next "if"
+ // ch2 not consumed.
+ boolean charactersConsumed = readNumber(ch, false);
+ if ( ! charactersConsumed ) {
+ if ( ch == CH_PLUS )
+ token.setType(TokenType.PLUS);
+ else
+ token.setType(TokenType.MINUS);
+ }
+ return token;
}
- if ( ch == CH_PLUS || ch == CH_MINUS || range(ch, '0', '9') ) {
- // readNumberNoSign
- readNumber();
- if ( Checking )
- checkNumber(token.getImage(), token.getImage2());
+ if ( range(ch, '0', '9') ) {
+ reader.readChar();
+ if ( ch == '0' ) {
+ // Is it "hex" -- 0x/0X ?
+ boolean isHex = readPossibleHex();
+ if ( isHex )
+ return token;
+ }
+ // Not hex.
+ boolean charactersConsumed = readNumber(ch, true);
+ if ( ! charactersConsumed ) {
+ // Impossible.
+ throw new InternalError("Seen digit but no number produced");
+ }
return token;
}
@@ -976,90 +1000,117 @@ public final class TokenizerText implements Tokenizer
}
/*
+ * Number, no sign.
* [146] INTEGER ::= [0-9]+
* [147] DECIMAL ::= [0-9]* '.' [0-9]+
* [148] DOUBLE ::= [0-9]+ '.' [0-9]* EXPONENT | '.' ([0-9])+ EXPONENT
| ([0-9])+ EXPONENT
- * [] hex ::= 0x0123456789ABCDEFG
*/
- private void readNumber() {
- // One entry, definitely a number.
- // Beware of '.' as a (non) decimal.
- /*
- maybeSign()
- digits()
- if dot ==> decimal, digits
- if e ==> double, maybeSign, digits
- else
- check not "." for decimal.
- */
+ /**
+ * Read a number.
+ * <p>
+ * On entry, {@code initialChar} is a seen and consumer character or {code
CH_ZERO} (char 0x0000).
+ * <p>
+ * It parses {@code [0-9]* '.' [0-9]*}, then checks the outcome is not a
single DOT, then adds an exponent.
+ * If the number/significand is exactly '.', set the token to be DOT.
+ * Note special code in sign processing for this.
+ * <p>
+ * HEX has already been handled.
+ *
+ * @returns true if the function consumed any characters.
+ */
+ private boolean readNumber(int initialChar, boolean isDigit) {
+ // initial character is a +/- sign or 0.
boolean isDouble = false;
- boolean isDecimal = false;
- stringBuilder.setLength(0);
+ boolean hasDecimalPoint = false;
+ boolean hasDigitsBeforeDot = false;
+ boolean hasDigitsAfterDot = false;
+ // DP = Decimal Point.
+ int numDigitsBeforeDP = 0;
+ int numDigitsAfterDP = 0;
- /*
- readPossibleSign(stringBuilder);
- readDigits may be hex
- readDot
- readDigits
- readExponent.
- */
-
- int x = 0; // Digits before a dot.
- int ch = reader.peekChar();
- if ( ch == '0' ) {
- x++;
- reader.readChar();
- insertCodepointDirect(stringBuilder, ch);
- ch = reader.peekChar();
- if ( ch == 'x' || ch == 'X' ) {
- reader.readChar();
- insertCodepointDirect(stringBuilder, ch);
- readHex(reader, stringBuilder);
- token.setImage(stringBuilder.toString());
- token.setType(TokenType.HEX);
- return;
+ stringBuilder.setLength(0);
+ if ( initialChar != CH_ZERO ) { // char U+0000
+ if ( initialChar == CH_PLUS || initialChar == CH_MINUS )
+ insertCodepointDirect(stringBuilder, initialChar);
+ else if ( isDigit ) {
+ insertCodepointDirect(stringBuilder, initialChar);
+ numDigitsBeforeDP = 1;
}
- } else if ( ch == '-' || ch == '+' ) {
- readPossibleSign(stringBuilder);
}
- x += readDigits(stringBuilder);
-// if ( x == 0 ) {}
+ int ch = reader.peekChar();
+ numDigitsBeforeDP += readDigits(stringBuilder);
+ if ( numDigitsBeforeDP > 0 )
+ hasDigitsBeforeDot = true;
+
+ // DOT or integer.
ch = reader.peekChar();
if ( ch == CH_DOT ) {
reader.readChar();
stringBuilder.append(CH_DOT);
- isDecimal = true; // Includes things that will be doubles.
- readDigits(stringBuilder);
+ hasDecimalPoint = true;
+ numDigitsAfterDP += readDigits(stringBuilder);
+ if ( numDigitsAfterDP > 0 )
+ hasDigitsAfterDot = true;
}
- if ( x == 0 && !isDecimal )
+ if ( numDigitsBeforeDP == 0 && !hasDecimalPoint )
// Possible a tokenizer error - should not have entered readNumber
// in the first place.
fatal("Unrecognized as number");
- if ( exponent(stringBuilder) ) {
- isDouble = true;
- isDecimal = false;
+ if ( ! hasDigitsBeforeDot & ! hasDigitsAfterDot ) {
+ // The number/significand/mantissa is exactly '.'
+ // Don't do anything - there might be a preceeding sign.
+ if ( hasDecimalPoint )
+ reader.pushbackChar(CH_DOT);
+ return false;
}
- // Final part - "decimal" 123. is an integer 123 and a DOT.
- if ( isDecimal ) {
- int len = stringBuilder.length();
- if ( stringBuilder.charAt(len - 1) == CH_DOT ) {
- stringBuilder.setLength(len - 1);
+ if ( exponent(stringBuilder) ) {
+ isDouble = true;
+ } else {
+ // Final part - "decimal" 123. is an integer 123 and a DOT.
+ if ( hasDecimalPoint && ! hasDigitsAfterDot ) {
+ int N = stringBuilder.length();
+ stringBuilder.deleteCharAt(N-1); // A DOT
+ // Reject the DOT which will be picked up next time.
reader.pushbackChar(CH_DOT);
- isDecimal = false;
+ hasDecimalPoint = false;
+// int len = stringBuilder.length();
+// if ( stringBuilder.charAt(len - 1) == CH_DOT ) {
+// stringBuilder.setLength(len - 1);
+// reader.pushbackChar(CH_DOT);
+// hasDecimalPoint = false;
+// }
}
}
token.setImage(stringBuilder.toString());
if ( isDouble )
token.setType(TokenType.DOUBLE);
- else if ( isDecimal )
+ else if ( hasDecimalPoint )
token.setType(TokenType.DECIMAL);
else
token.setType(TokenType.INTEGER);
+ return true;
+ }
+
+ // On entry, have seen and consumed a digit '0'
+ private boolean readPossibleHex() {
+ int ch2 = reader.peekChar();
+ if ( ch2 != 'x' && ch2 != 'X' )
+ return false;
+ // It's HEX
+ reader.readChar();
+ stringBuilder.setLength(0);
+ insertCodepointDirect(stringBuilder, '0');
+ insertCodepointDirect(stringBuilder, ch2);
+ // Error if no hex digits.
+ readHex(reader, stringBuilder);
+ token.setImage(stringBuilder.toString());
+ token.setType(TokenType.HEX);
+ return true;
}
private void readHex(PeekReader reader, StringBuilder sb) {
@@ -1141,11 +1192,23 @@ public final class TokenizerText implements Tokenizer
a2z(stringBuilder);
if ( stringBuilder.length() == 0 )
fatal("Bad language tag");
+
+ boolean seenTextDirection = false;
+
for (;;) {
int ch = reader.peekChar();
if ( ch == '-' ) {
+ if ( seenTextDirection )
+ fatal("Bad language tag with text direction");
reader.readChar();
insertCodepointDirect(stringBuilder, ch);
+ int ch2 = reader.peekChar();
+ if ( ch2 == '-' ) {
+ reader.readChar();
+ // Initial text direction
+ insertCodepointDirect(stringBuilder, ch2);
+ seenTextDirection = true;
+ }
int x = stringBuilder.length();
a2zN(stringBuilder);
if ( stringBuilder.length() == x )
@@ -1244,9 +1307,9 @@ public final class TokenizerText implements Tokenizer
checker.checkVariable(tokenImage);
}
- private void checkDirective(int cntrlCode) {
+ private void checkDirective(String directive) {
if ( checker != null )
- checker.checkDirective(cntrlCode);
+ checker.checkDirective(directive);
}
private void checkKeyword(String tokenImage) {
diff --git
a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizeTextBuilder.java
b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerTextBuilder.java
similarity index 87%
rename from
jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizeTextBuilder.java
rename to
jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerTextBuilder.java
index b4811e6d14..bb564415d5 100644
---
a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizeTextBuilder.java
+++
b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerTextBuilder.java
@@ -29,7 +29,7 @@ import org.apache.jena.riot.system.ErrorHandlerFactory;
import org.slf4j.Logger;
/** Builder for TokenizerText */
-public class TokenizeTextBuilder {
+public class TokenizerTextBuilder {
// One of these.
private PeekReader peekReader = null;
@@ -41,7 +41,7 @@ public class TokenizeTextBuilder {
private boolean utf8 = true;
private ErrorHandler errorHandler = null;
- TokenizeTextBuilder() {}
+ TokenizerTextBuilder() {}
private void clearInput() {
this.peekReader = null;
@@ -50,41 +50,41 @@ public class TokenizeTextBuilder {
this.string = null;
}
- public TokenizeTextBuilder source(InputStream input) {
+ public TokenizerTextBuilder source(InputStream input) {
clearInput();
this.input = input;
return this;
}
- public TokenizeTextBuilder source(Reader reader) {
+ public TokenizerTextBuilder source(Reader reader) {
clearInput();
this.reader = reader;
return this;
}
- public TokenizeTextBuilder source(PeekReader peekReader) {
+ public TokenizerTextBuilder source(PeekReader peekReader) {
clearInput();
this.peekReader = peekReader;
return this;
}
- public TokenizeTextBuilder fromString(String string) {
+ public TokenizerTextBuilder fromString(String string) {
clearInput();
this.string = string;
return this;
}
- public TokenizeTextBuilder lineMode(boolean lineMode) {
+ public TokenizerTextBuilder lineMode(boolean lineMode) {
this.singleLineMode = lineMode;
return this;
}
- public TokenizeTextBuilder asciiOnly(boolean asciiOnly) {
+ public TokenizerTextBuilder asciiOnly(boolean asciiOnly) {
this.utf8 = !asciiOnly;
return this;
}
- public TokenizeTextBuilder errorHandler(ErrorHandler errorHandler) {
+ public TokenizerTextBuilder errorHandler(ErrorHandler errorHandler) {
this.errorHandler = errorHandler;
return this;
}
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
b/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
index cff62fce4e..267537a1c5 100644
---
a/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
+++
b/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
@@ -35,7 +35,7 @@ import org.apache.jena.riot.ErrorHandlerTestLib.ExWarning;
import org.apache.jena.riot.Lang ;
import org.apache.jena.riot.RIOT;
import org.apache.jena.riot.system.*;
-import org.apache.jena.riot.tokens.TokenizeTextBuilder;
+import org.apache.jena.riot.tokens.TokenizerTextBuilder;
import org.apache.jena.riot.tokens.Tokenizer ;
import org.apache.jena.riot.tokens.TokenizerText;
import org.junit.AfterClass ;
@@ -177,7 +177,7 @@ abstract public class AbstractTestLangNTuples
byte b[] = StrUtils.asUTF8bytes(string);
ByteArrayInputStream in = new ByteArrayInputStream(b);
- TokenizeTextBuilder builder = TokenizerText.create()
+ TokenizerTextBuilder builder = TokenizerText.create()
.source(in)
.errorHandler(ErrorHandlerFactory.errorHandlerExceptions());
if ( charSpace == CharSpace.ASCII )
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TS_Tokens.java
b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TS_Tokens.java
index c787fdc990..6624603590 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TS_Tokens.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TS_Tokens.java
@@ -23,7 +23,7 @@ import org.junit.runners.Suite ;
@RunWith(Suite.class)
@Suite.SuiteClasses( {
- TestTokenizer.class
+ TestTokenizerText.class
, TestTokenForNode.class
})
diff --git
a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizerText.java
similarity index 56%
rename from
jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
rename to
jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizerText.java
index 1fabb8de70..4bd7751067 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizerText.java
@@ -16,7 +16,7 @@
* limitations under the License.
*/
-package org.apache.jena.riot.tokens ;
+package org.apache.jena.riot.tokens;
import static
org.apache.jena.riot.system.ErrorHandlerFactory.errorHandlerExceptions;
import static
org.apache.jena.riot.system.ErrorHandlerFactory.errorHandlerSimple;
@@ -25,132 +25,132 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
-import java.io.ByteArrayInputStream ;
+import java.io.ByteArrayInputStream;
import java.io.Reader;
import org.apache.jena.atlas.io.IO;
-import org.apache.jena.atlas.io.PeekReader ;
-import org.apache.jena.atlas.lib.StrUtils ;
-import org.apache.jena.riot.RiotException ;
-import org.apache.jena.riot.RiotParseException ;
+import org.apache.jena.atlas.io.PeekReader;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.riot.RiotException;
+import org.apache.jena.riot.RiotParseException;
import org.apache.jena.riot.system.ErrorHandlerFactory.ErrorHandlerRecorder;
-import org.apache.jena.sparql.ARQConstants ;
-import org.junit.Test ;
+import org.apache.jena.sparql.ARQConstants;
+import org.junit.Test;
-public class TestTokenizer {
+public class TestTokenizerText {
private static Tokenizer tokenizer(String string) {
- return tokenizer(string, false) ;
+ return tokenizer(string, false);
}
private static Tokenizer tokenizer(String string, boolean lineMode) {
- PeekReader r = PeekReader.readString(string) ;
+ PeekReader r = PeekReader.readString(string);
Tokenizer tokenizer = TokenizerText.create()
.errorHandler(errorHandlerExceptions())
.source(r)
.lineMode(lineMode)
.build();
- return tokenizer ;
+ return tokenizer;
}
private static void tokenFirst(String string) {
- Tokenizer tokenizer = tokenizer(string) ;
- assertTrue(tokenizer.hasNext()) ;
- assertNotNull(tokenizer.next()) ;
+ Tokenizer tokenizer = tokenizer(string);
+ assertTrue(tokenizer.hasNext());
+ assertNotNull(tokenizer.next());
// Maybe more.
- // assertFalse(tokenizer.hasNext()) ;
+ // assertFalse(tokenizer.hasNext());
}
private static Token tokenFor(String string) {
- Tokenizer tokenizer = tokenizer(string) ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertFalse(tokenizer.hasNext()) ;
- return token ;
+ Tokenizer tokenizer = tokenizer(string);
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertFalse(tokenizer.hasNext());
+ return token;
}
private static Token tokenizeAndTestExact(String input, TokenType
tokenType, String tokenImage) {
- return tokenizeAndTestExact(input, tokenType, tokenImage, null) ;
+ return tokenizeAndTestExact(input, tokenType, tokenImage, null);
}
private static Token tokenizeAndTestExact(String input, StringType
stringType, String tokenImage) {
- Token token = tokenizeAndTestExact(input, TokenType.STRING,
tokenImage, null) ;
+ Token token = tokenizeAndTestExact(input, TokenType.STRING,
tokenImage, null);
assertEquals(stringType, token.getStringType());
return token;
}
private static Token tokenizeAndTestExact(String input, TokenType
tokenType, String tokenImage1, String tokenImage2) {
- Tokenizer tokenizer = tokenizer(input) ;
- Token token = testNextToken(tokenizer, tokenType, tokenImage1,
tokenImage2) ;
- assertFalse("Excess tokens", tokenizer.hasNext()) ;
- return token ;
+ Tokenizer tokenizer = tokenizer(input);
+ Token token = testNextToken(tokenizer, tokenType, tokenImage1,
tokenImage2);
+ assertFalse("Excess tokens", tokenizer.hasNext());
+ return token;
}
private static Token tokenizeAndTestExact(String input, TokenType
tokenType, String tokenImage1,
String tokenImage2, Token
subToken1, Token subToken2) {
- Token token = tokenFor(input) ;
- assertEquals(tokenType, token.getType()) ;
- assertEquals(tokenImage1, token.getImage()) ;
- assertEquals(tokenImage2, token.getImage2()) ;
- assertEquals(subToken1, token.getSubToken1()) ;
- assertEquals(subToken2, token.getSubToken2()) ;
- return token ;
+ Token token = tokenFor(input);
+ assertEquals(tokenType, token.getType());
+ assertEquals(tokenImage1, token.getImage());
+ assertEquals(tokenImage2, token.getImage2());
+ assertEquals(subToken1, token.getSubToken1());
+ assertEquals(subToken2, token.getSubToken2());
+ return token;
}
private static Tokenizer tokenizeAndTestFirst(String input, TokenType
tokenType) {
- return tokenizeAndTestFirst(input, tokenType, null, null) ;
+ return tokenizeAndTestFirst(input, tokenType, null, null);
}
private static Tokenizer tokenizeAndTestFirst(String input, TokenType
tokenType, String tokenImage) {
- return tokenizeAndTestFirst(input, tokenType, tokenImage, null) ;
+ return tokenizeAndTestFirst(input, tokenType, tokenImage, null);
}
private static Tokenizer tokenizeAndTestFirst(String input, TokenType
tokenType, String tokenImage1, String tokenImage2) {
- Tokenizer tokenizer = tokenizer(input) ;
- testNextToken(tokenizer, tokenType, tokenImage1, tokenImage2) ;
- return tokenizer ;
+ Tokenizer tokenizer = tokenizer(input);
+ testNextToken(tokenizer, tokenType, tokenImage1, tokenImage2);
+ return tokenizer;
}
private static Token testNextToken(Tokenizer tokenizer, TokenType
tokenType) {
- return testNextToken(tokenizer, tokenType, null, null) ;
+ return testNextToken(tokenizer, tokenType, null, null);
}
private static Token testNextToken(Tokenizer tokenizer, TokenType
tokenType, String tokenImage1) {
- return testNextToken(tokenizer, tokenType, tokenImage1, null) ;
+ return testNextToken(tokenizer, tokenType, tokenImage1, null);
}
private static Token testNextToken(Tokenizer tokenizer, TokenType
tokenType, String tokenImage1, String tokenImage2) {
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertNotNull(token) ;
- assertEquals(tokenType, token.getType()) ;
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertNotNull(token);
+ assertEquals(tokenType, token.getType());
if ( tokenImage1 != null )
- assertEquals(tokenImage1, token.getImage()) ;
+ assertEquals(tokenImage1, token.getImage());
if ( tokenImage2 != null )
- assertEquals(tokenImage1, token.getImage()) ;
- assertEquals(tokenImage2, token.getImage2()) ;
- return token ;
+ assertEquals(tokenImage1, token.getImage());
+ assertEquals(tokenImage2, token.getImage2());
+ return token;
}
private static Token tokenizeAndTest(String input, TokenType tokenType,
String tokenImage1, String tokenImage2, Token subToken1, Token subToken2) {
- Token token = tokenFor(input) ;
- assertNotNull(token) ;
- assertEquals(tokenType, token.getType()) ;
- assertEquals(tokenImage1, token.getImage()) ;
- assertEquals(tokenImage2, token.getImage2()) ;
- assertEquals(subToken1, token.getSubToken1()) ;
- assertEquals(subToken2, token.getSubToken2()) ;
- return token ;
+ Token token = tokenFor(input);
+ assertNotNull(token);
+ assertEquals(tokenType, token.getType());
+ assertEquals(tokenImage1, token.getImage());
+ assertEquals(tokenImage2, token.getImage2());
+ assertEquals(subToken1, token.getSubToken1());
+ assertEquals(subToken2, token.getSubToken2());
+ return token;
}
@Test
public void tokenUnit_iri1() {
- tokenizeAndTestExact("<x>", TokenType.IRI, "x") ;
+ tokenizeAndTestExact("<x>", TokenType.IRI, "x");
}
@Test
public void tokenUnit_iri2() {
- tokenizeAndTestExact(" <> ", TokenType.IRI, "") ;
+ tokenizeAndTestExact(" <> ", TokenType.IRI, "");
}
@Test
@@ -158,778 +158,805 @@ public class TestTokenizer {
public void tokenUnit_iri3() {
try {
// That's one \
- tokenFirst("<abc\\>def>") ;
+ tokenFirst("<abc\\>def>");
} catch (RiotParseException ex) {
- String x = ex.getMessage() ;
- assertTrue(x.contains("Illegal")) ;
+ String x = ex.getMessage();
+ assertTrue(x.contains("Illegal"));
}
}
@Test
public void tokenUnit_iri4() {
// \\\\ is a double \\ in the data. 0x41 is 'A'
- tokenizeAndTestFirst("<abc\\u0041def> 123", TokenType.IRI,
"abcAdef") ;
+ tokenizeAndTestFirst("<abc\\u0041def> 123", TokenType.IRI,
"abcAdef");
}
@Test
public void tokenUnit_iri5() {
// \\\\ is a double \\ in the data. 0x41 is 'A'
- tokenizeAndTestFirst("<\\u0041def> 123", TokenType.IRI, "Adef") ;
+ tokenizeAndTestFirst("<\\u0041def> 123", TokenType.IRI, "Adef");
}
@Test
public void tokenUnit_iri6() {
// \\\\ is a double \\ in the data. 0x41 is 'A'
- tokenizeAndTestFirst("<abc\\u0041> 123", TokenType.IRI, "abcA") ;
+ tokenizeAndTestFirst("<abc\\u0041> 123", TokenType.IRI, "abcA");
}
// Bad IRIs
@Test(expected=RiotException.class)
public void tokenUnit_iri10() {
- tokenFirst("<abc def>") ;
+ tokenFirst("<abc def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri11() {
- tokenFirst("<abc<def>") ;
+ tokenFirst("<abc<def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri12() {
- tokenFirst("<abc{def>") ;
+ tokenFirst("<abc{def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri13() {
- tokenFirst("<abc}def>") ;
+ tokenFirst("<abc}def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri14() {
- tokenFirst("<abc|def>") ;
+ tokenFirst("<abc|def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri15() {
- tokenFirst("<abc^def>") ;
+ tokenFirst("<abc^def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri16() {
- tokenFirst("<abc`def>") ;
+ tokenFirst("<abc`def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri17() {
- tokenFirst("<abc\tdef>") ; // Java escae - real tab
+ tokenFirst("<abc\tdef>"); // Java escae - real tab
}
@Test(expected=RiotException.class)
public void tokenUnit_iri18() {
- tokenFirst("<abc\u0007def>") ; // Java escape - codepoint 7
+ tokenFirst("<abc\u0007def>"); // Java escape - codepoint 7
}
@Test(expected=RiotException.class)
public void tokenUnit_iri19() {
- tokenFirst("<abc\\>") ;
+ tokenFirst("<abc\\>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri20() {
- tokenFirst("<abc\\def>") ;
+ tokenFirst("<abc\\def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri21() {
// \\\\ is a double \\ in the data.
// RDF 1.1 - \\ is not legal in a IRIREF
- tokenFirst("<abc\\\\def>") ;
+ tokenFirst("<abc\\\\def>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri22() {
- tokenFirst("<abc\\u00ZZdef>") ;
+ tokenFirst("<abc\\u00ZZdef>");
}
@Test(expected=RiotException.class)
public void tokenUnit_iri23() {
- tokenFirst("<abc\\uZZ20def>") ;
+ tokenFirst("<abc\\uZZ20def>");
}
@Test
public void tokenUnit_str1() {
- tokenizeAndTestExact(" 'abc' ", StringType.STRING1, "abc") ;
+ tokenizeAndTestExact(" 'abc' ", StringType.STRING1, "abc");
}
@Test
public void tokenUnit_str2() {
- tokenizeAndTestExact(" '' ", StringType.STRING1, "") ;
+ tokenizeAndTestExact(" '' ", StringType.STRING1, "");
}
@Test
public void tokenUnit_str3() {
- tokenizeAndTestExact("'\\u0020'", StringType.STRING1, " ") ;
+ tokenizeAndTestExact("'\\u0020'", StringType.STRING1, " ");
}
@Test
public void tokenUnit_str4() {
- tokenizeAndTestExact("'a\\'\\\"\\n\\t\\r\\f'", StringType.STRING1,
"a'\"\n\t\r\f") ;
+ tokenizeAndTestExact("'a\\'\\\"\\n\\t\\r\\f'", StringType.STRING1,
"a'\"\n\t\r\f");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str5() {
// This is a raw newline. \n is a Java string escape.
- tokenizeAndTestExact("'\n'", StringType.STRING1, "\n") ;
+ tokenizeAndTestExact("'\n'", StringType.STRING1, "\n");
}
@Test
public void tokenUnit_str6() {
- tokenizeAndTestExact(" \"abc\" ", StringType.STRING2, "abc") ;
+ tokenizeAndTestExact(" \"abc\" ", StringType.STRING2, "abc");
}
@Test
public void tokenUnit_str7() {
- tokenizeAndTestExact("\"\"", StringType.STRING2, "") ;
+ tokenizeAndTestExact("\"\"", StringType.STRING2, "");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str8() {
- Tokenizer tokenizer = tokenizer("\"") ;
- assertTrue(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("\"");
+ assertTrue(tokenizer.hasNext());
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str9() {
- tokenFirst("'abc") ;
+ tokenFirst("'abc");
}
@Test
public void tokenUnit_str10() {
- tokenizeAndTestExact("'\\'abc'", StringType.STRING1, "'abc") ;
+ tokenizeAndTestExact("'\\'abc'", StringType.STRING1, "'abc");
}
@Test
public void tokenUnit_str11() {
- tokenizeAndTestExact("'\\U00000020'", StringType.STRING1, " ") ;
+ tokenizeAndTestExact("'\\U00000020'", StringType.STRING1, " ");
}
@Test
public void tokenUnit_str_long1() {
- tokenizeAndTestExact("'''aaa'''", StringType.LONG_STRING1, "aaa") ;
+ tokenizeAndTestExact("'''aaa'''", StringType.LONG_STRING1, "aaa");
}
@Test
public void tokenUnit_str_long2() {
- tokenizeAndTestExact("\"\"\"aaa\"\"\"", StringType.LONG_STRING2,
"aaa") ;
+ tokenizeAndTestExact("\"\"\"aaa\"\"\"", StringType.LONG_STRING2,
"aaa");
}
@Test
public void tokenUnit_str_long3() {
- tokenizeAndTestExact("''''1234'''", StringType.LONG_STRING1, "'1234") ;
+ tokenizeAndTestExact("''''1234'''", StringType.LONG_STRING1, "'1234");
}
@Test
public void tokenUnit_str_long4() {
- tokenizeAndTestExact("'''''1234'''", StringType.LONG_STRING1,
"''1234") ;
+ tokenizeAndTestExact("'''''1234'''", StringType.LONG_STRING1,
"''1234");
}
@Test
public void tokenUnit_str_long5() {
- tokenizeAndTestExact("'''\\'''1234'''", StringType.LONG_STRING1,
"'''1234") ;
+ tokenizeAndTestExact("'''\\'''1234'''", StringType.LONG_STRING1,
"'''1234");
}
@Test
public void tokenUnit_str_long6() {
- tokenizeAndTestExact("\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"1234") ;
+ tokenizeAndTestExact("\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"1234");
}
@Test
public void tokenUnit_str_long7() {
- tokenizeAndTestExact("\"\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"\"1234") ;
+ tokenizeAndTestExact("\"\"\"\"\"1234\"\"\"", StringType.LONG_STRING2,
"\"\"1234");
}
@Test
public void tokenUnit_str_long8() {
- tokenizeAndTestExact("''''''", StringType.LONG_STRING1, "") ;
+ tokenizeAndTestExact("''''''", StringType.LONG_STRING1, "");
}
@Test
public void tokenUnit_str_long9() {
- tokenizeAndTestExact("\"\"\"'''''''''''''''''\"\"\"",
StringType.LONG_STRING2, "'''''''''''''''''") ;
+ tokenizeAndTestExact("\"\"\"'''''''''''''''''\"\"\"",
StringType.LONG_STRING2, "'''''''''''''''''");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str_long10() {
- tokenFirst("\"\"\"abcdef") ;
+ tokenFirst("\"\"\"abcdef");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str_long11() {
- tokenFirst("'''") ;
+ tokenFirst("'''");
}
@Test
public void tokenUnit_str_long12() {
- tokenizeAndTestExact("'''x'''@en", TokenType.LITERAL_LANG, "x", "en") ;
+ tokenizeAndTestExact("'''x'''@en", TokenType.LITERAL_LANG, "x", "en");
}
@Test
public void tokenUnit_bNode1() {
- tokenizeAndTestExact("_:abc", TokenType.BNODE, "abc") ;
+ tokenizeAndTestExact("_:abc", TokenType.BNODE, "abc");
}
@Test
public void tokenUnit_bNode2() {
- tokenizeAndTestExact("_:123 ", TokenType.BNODE, "123") ;
+ tokenizeAndTestExact("_:123 ", TokenType.BNODE, "123");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_bNode3() {
- Tokenizer tokenizer = tokenizer("_:") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertNotNull(token) ;
+ Tokenizer tokenizer = tokenizer("_:");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertNotNull(token);
}
@Test
public void tokenUnit_bNode4() {
- tokenizeAndTestExact("_:1-2-Z ", TokenType.BNODE, "1-2-Z") ;
+ tokenizeAndTestExact("_:1-2-Z ", TokenType.BNODE, "1-2-Z");
}
@Test
public void tokenUnit_bNode5() {
- Tokenizer tokenizer = tokenizeAndTestFirst("_:x. ",
TokenType.BNODE, "x") ;
- testNextToken(tokenizer, TokenType.DOT) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("_:x. ",
TokenType.BNODE, "x");
+ testNextToken(tokenizer, TokenType.DOT);
+ assertFalse(tokenizer.hasNext());
}
@Test
public void tokenUnit_bNode6() {
- Tokenizer tokenizer = tokenizeAndTestFirst("_:x:a. ",
TokenType.BNODE, "x") ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "a") ;
- testNextToken(tokenizer, TokenType.DOT) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("_:x:a. ",
TokenType.BNODE, "x");
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "a");
+ testNextToken(tokenizer, TokenType.DOT);
+ assertFalse(tokenizer.hasNext());
}
-// @Test
-// public void tokenUnit_cntrl1() {
-// tokenizeAndTestExact("*S", TokenType.CNTRL, "S");
-// }
-//
-// @Test
-// public void tokenUnit_cntr2() {
-// tokenizeAndTestExact("*SXYZ", TokenType.CNTRL, "SXYZ");
-// }
-//
-// @Test
-// public void tokenUnit_cntrl3() {
-// Tokenizer tokenizer = tokenizer("*S<x>");
-// assertTrue(tokenizer.hasNext());
-// Token token = tokenizer.next();
-// assertNotNull(token);
-// assertEquals(TokenType.CNTRL, token.getType());
-// assertEquals('S', token.getCntrlCode());
-// assertNull(token.getImage());
-// assertNull(token.getImage2());
-//
-// assertTrue(tokenizer.hasNext());
-// Token token2 = tokenizer.next();
-// assertNotNull(token2);
-// assertEquals(TokenType.IRI, token2.getType());
-// assertEquals("x", token2.getImage());
-// assertNull(token2.getImage2());
-// assertFalse(tokenizer.hasNext());
-// }
-
@Test
public void tokenUnit_syntax1() {
- tokenizeAndTestExact(".", TokenType.DOT, null, null) ;
+ tokenizeAndTestExact(".", TokenType.DOT, null, null);
}
@Test
public void tokenUnit_syntax2() {
- Tokenizer tokenizer = tokenizer(".;,") ;
- testNextToken(tokenizer, TokenType.DOT) ;
- testNextToken(tokenizer, TokenType.SEMICOLON) ;
- testNextToken(tokenizer, TokenType.COMMA) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer(".;,");
+ testNextToken(tokenizer, TokenType.DOT);
+ testNextToken(tokenizer, TokenType.SEMICOLON);
+ testNextToken(tokenizer, TokenType.COMMA);
+ assertFalse(tokenizer.hasNext());
}
@Test
public void tokenUnit_pname1() {
- tokenizeAndTestExact("a:b.c", TokenType.PREFIXED_NAME, "a", "b.c") ;
+ tokenizeAndTestExact("a:b.c", TokenType.PREFIXED_NAME, "a", "b.c");
}
@Test
public void tokenUnit_pname2() {
- Tokenizer tokenizer = tokenizeAndTestFirst("a:b.",
TokenType.PREFIXED_NAME, "a", "b") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertEquals(TokenType.DOT, token.getType()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("a:b.",
TokenType.PREFIXED_NAME, "a", "b");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertEquals(TokenType.DOT, token.getType());
}
@Test
public void tokenUnit_pname3() {
- tokenizeAndTestExact("a:b123", TokenType.PREFIXED_NAME, "a", "b123") ;
+ tokenizeAndTestExact("a:b123", TokenType.PREFIXED_NAME, "a", "b123");
}
@Test
public void tokenUnit_pname4() {
- tokenizeAndTestExact("a:", TokenType.PREFIXED_NAME, "a", "") ;
+ tokenizeAndTestExact("a:", TokenType.PREFIXED_NAME, "a", "");
}
@Test
public void tokenUnit_pname5() {
- tokenizeAndTestExact(":", TokenType.PREFIXED_NAME, "", "") ;
+ tokenizeAndTestExact(":", TokenType.PREFIXED_NAME, "", "");
}
@Test
public void tokenUnit_pname6() {
- tokenizeAndTestExact(":a", TokenType.PREFIXED_NAME, "", "a") ;
+ tokenizeAndTestExact(":a", TokenType.PREFIXED_NAME, "", "a");
}
@Test
public void tokenUnit_pname7() {
- tokenizeAndTestExact(":123", TokenType.PREFIXED_NAME, "", "123") ;
+ tokenizeAndTestExact(":123", TokenType.PREFIXED_NAME, "", "123");
}
@Test
public void tokenUnit_pname8() {
- tokenizeAndTestExact("a123:456", TokenType.PREFIXED_NAME, "a123",
"456") ;
+ tokenizeAndTestExact("a123:456", TokenType.PREFIXED_NAME, "a123",
"456");
}
@Test
public void tokenUnit_pname9() {
- Tokenizer tokenizer = tokenizeAndTestFirst("a123:-456",
TokenType.PREFIXED_NAME, "a123", "") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertEquals(TokenType.INTEGER, token.getType()) ;
- assertEquals("-456", token.getImage()) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("a123:-456",
TokenType.PREFIXED_NAME, "a123", "");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertEquals(TokenType.INTEGER, token.getType());
+ assertEquals("-456", token.getImage());
}
@Test
public void tokenUnit_pname10() {
- tokenizeAndTestExact("a:a.b", TokenType.PREFIXED_NAME, "a", "a.b") ;
+ tokenizeAndTestExact("a:a.b", TokenType.PREFIXED_NAME, "a", "a.b");
}
@Test
public void tokenUnit_pname11() {
- tokenizeAndTestExact("a:0.b", TokenType.PREFIXED_NAME, "a", "0.b") ;
+ tokenizeAndTestExact("a:0.b", TokenType.PREFIXED_NAME, "a", "0.b");
}
@Test
public void tokenUnit_pname12() {
- tokenizeAndTestFirst("a:0. b", TokenType.PREFIXED_NAME, "a", "0") ;
+ tokenizeAndTestFirst("a:0. b", TokenType.PREFIXED_NAME, "a", "0");
}
@Test
public void tokenUnit_pname13() {
// x00e9 é
// x0065 e and x0301 ́
- tokenizeAndTestExact("a:xyzé", TokenType.PREFIXED_NAME, "a",
"xyz\u00e9") ;
+ tokenizeAndTestExact("a:xyzé", TokenType.PREFIXED_NAME, "a",
"xyz\u00e9");
}
@Test
public void tokenUnit_pname14() {
// x0065 e and x0301 ́
- tokenizeAndTestExact("a:xyze\u0301", TokenType.PREFIXED_NAME, "a",
"xyze\u0301") ;
+ tokenizeAndTestExact("a:xyze\u0301", TokenType.PREFIXED_NAME, "a",
"xyze\u0301");
}
@Test
public void tokenUnit_pname15() {
// x0065 e and x0301 ́
- tokenizeAndTestExact("a:xe\u0301y", TokenType.PREFIXED_NAME, "a",
"xe\u0301y") ;
+ tokenizeAndTestExact("a:xe\u0301y", TokenType.PREFIXED_NAME, "a",
"xe\u0301y");
}
@Test
public void tokenUnit_pname16() {
- tokenizeAndTestExact("a:b\\#c", TokenType.PREFIXED_NAME, "a", "b#c") ;
+ tokenizeAndTestExact("a:b\\#c", TokenType.PREFIXED_NAME, "a", "b#c");
}
@Test
public void tokenUnit_pname17() {
- tokenizeAndTestExact("a:b\\/c", TokenType.PREFIXED_NAME, "a", "b/c") ;
+ tokenizeAndTestExact("a:b\\/c", TokenType.PREFIXED_NAME, "a", "b/c");
}
@Test
public void tokenUnit_pname18() {
- tokenizeAndTestExact("a:b:c", TokenType.PREFIXED_NAME, "a", "b:c") ;
+ tokenizeAndTestExact("a:b:c", TokenType.PREFIXED_NAME, "a", "b:c");
}
@Test
public void tokenUnit_pname19() {
- tokenizeAndTestExact("a:b%AAc", TokenType.PREFIXED_NAME, "a", "b%AAc")
;
+ tokenizeAndTestExact("a:b%AAc", TokenType.PREFIXED_NAME, "a", "b%AAc");
}
@Test
public void tokenUnit_pname20() {
- Tokenizer tokenizer = tokenizeAndTestFirst("123:", TokenType.INTEGER,
"123") ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "") ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("123:", TokenType.INTEGER,
"123");
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "");
}
// Generic: parse first token from ...
- // tokenTest(str, TokenType, TokenImage) ;
+ // tokenTest(str, TokenType, TokenImage);
@Test
- public void tokenUnit_num1() {
- tokenizeAndTestExact("123", TokenType.INTEGER, "123") ;
+ public void tokenUnit_num01() {
+ tokenizeAndTestExact("123", TokenType.INTEGER, "123");
}
@Test
- public void tokenUnit_num2() {
+ public void tokenUnit_num02() {
// This is a change in Turtle (and SPARQL 1.1)
- tokenizeAndTestFirst("123.", TokenType.INTEGER, "123") ;
+ tokenizeAndTestFirst("123.", TokenType.INTEGER, "123");
}
@Test
- public void tokenUnit_num3() {
- tokenizeAndTestExact("+123.456", TokenType.DECIMAL, "+123.456") ;
+ public void tokenUnit_num03() {
+ tokenizeAndTestExact("+123.456", TokenType.DECIMAL, "+123.456");
}
@Test
- public void tokenUnit_num4() {
- tokenizeAndTestExact("-1", TokenType.INTEGER, "-1") ;
+ public void tokenUnit_num04() {
+ tokenizeAndTestExact("-1", TokenType.INTEGER, "-1");
}
@Test
- public void tokenUnit_num5() {
- tokenizeAndTestExact("-1e0", TokenType.DOUBLE, "-1e0") ;
+ public void tokenUnit_num05() {
+ tokenizeAndTestExact("-1e0", TokenType.DOUBLE, "-1e0");
}
@Test
- public void tokenUnit_num6() {
- tokenizeAndTestExact("1e+1", TokenType.DOUBLE, "1e+1") ;
+ public void tokenUnit_num06() {
+ tokenizeAndTestExact("1e+1", TokenType.DOUBLE, "1e+1");
}
@Test
- public void tokenUnit_num7() {
- tokenizeAndTestExact("1.3e+1", TokenType.DOUBLE, "1.3e+1") ;
+ public void tokenUnit_num07() {
+ tokenizeAndTestExact("1.3e+1", TokenType.DOUBLE, "1.3e+1");
}
@Test
- public void tokenUnit_num8() {
- tokenizeAndTestFirst("1.3.4", TokenType.DECIMAL, "1.3") ;
+ public void tokenUnit_num08() {
+ tokenizeAndTestFirst("1.3.4", TokenType.DECIMAL, "1.3");
}
@Test
- public void tokenUnit_num9() {
- tokenizeAndTestFirst("1.3e67.7", TokenType.DOUBLE, "1.3e67") ;
+ public void tokenUnit_num09() {
+ tokenizeAndTestFirst("1.3e67.7", TokenType.DOUBLE, "1.3e67");
}
@Test
public void tokenUnit_num10() {
- tokenizeAndTestExact(".1", TokenType.DECIMAL, ".1") ;
+ tokenizeAndTestExact(".1", TokenType.DECIMAL, ".1");
}
@Test
public void tokenUnit_num11() {
- tokenizeAndTestExact(".1e0", TokenType.DOUBLE, ".1e0") ;
+ tokenizeAndTestExact(".1e0", TokenType.DOUBLE, ".1e0");
}
@Test
public void tokenUnit_num12() {
- // This is not a hex number.
+ tokenizeAndTestExact("+.1", TokenType.DECIMAL, "+.1");
+ }
- Tokenizer tokenizer = tokenizeAndTestFirst("000A .",
TokenType.INTEGER, "000") ;
- testNextToken(tokenizer, TokenType.KEYWORD, "A") ;
+ @Test
+ public void tokenUnit_num13() {
+ tokenizeAndTestExact("-.1", TokenType.DECIMAL, "-.1");
+ }
+
+ @Test
+ public void tokenUnit_num14() {
+ tokenizeAndTestExact("+.1e0", TokenType.DOUBLE, "+.1e0");
+ }
+
+ @Test
+ public void tokenUnit_num15() {
+ tokenizeAndTestExact("-.1e0", TokenType.DOUBLE, "-.1e0");
+ }
+
+ @Test
+ public void tokenUnit_num20() {
+ Tokenizer tokenizer = tokenizer("+.123.");
+ testNextToken(tokenizer, TokenType.DECIMAL, "+.123");
+ testNextToken(tokenizer, TokenType.DOT);
+ assertTrue(tokenizer.eof());
+ }
+
+ @Test
+ public void tokenUnit_num21() {
+ Tokenizer tokenizer = tokenizer("++.123.");
+ testNextToken(tokenizer, TokenType.PLUS);
+ testNextToken(tokenizer, TokenType.DECIMAL, "+.123");
+ testNextToken(tokenizer, TokenType.DOT);
+ assertTrue(tokenizer.eof());
+ }
+
+ @Test
+ public void tokenUnit_num22() {
+ Tokenizer tokenizer = tokenizer("123.");
+ testNextToken(tokenizer, TokenType.INTEGER, "123");
+ testNextToken(tokenizer, TokenType.DOT);
+ assertTrue(tokenizer.eof());
+ }
+
+ @Test
+ public void tokenUnit_dot1() {
+ Tokenizer tokenizer = tokenizer("+.");
+ testNextToken(tokenizer, TokenType.PLUS);
+ testNextToken(tokenizer, TokenType.DOT);
+ assertTrue(tokenizer.eof());
+ }
+
+ @Test
+ public void tokenUnit_dot2() {
+ Tokenizer tokenizer = tokenizer("++.");
+ testNextToken(tokenizer, TokenType.PLUS);
+ testNextToken(tokenizer, TokenType.PLUS);
+ testNextToken(tokenizer, TokenType.DOT);
+ assertTrue(tokenizer.eof());
+ }
+
+ @Test
+ public void tokenUnit_dot3() {
+ Tokenizer tokenizer = tokenizer("++..");
+ testNextToken(tokenizer, TokenType.PLUS);
+ testNextToken(tokenizer, TokenType.PLUS);
+ testNextToken(tokenizer, TokenType.DOT);
+ testNextToken(tokenizer, TokenType.DOT);
+ assertTrue(tokenizer.eof());
+ }
+
+ @Test
+ public void tokenUnit_dot4() {
+ Tokenizer tokenizer = tokenizer("..");
+ testNextToken(tokenizer, TokenType.DOT);
+ testNextToken(tokenizer, TokenType.DOT);
+ assertTrue(tokenizer.eof());
+ }
+
+ // This is not a hex number.
+ @Test
+ public void tokenUnit_num99() {
+ Tokenizer tokenizer = tokenizeAndTestFirst("000A .",
TokenType.INTEGER, "000");
+ testNextToken(tokenizer, TokenType.KEYWORD, "A");
}
@Test
public void tokenUnit_var1() {
- tokenizeAndTestFirst("?x ?y", TokenType.VAR, "x") ;
+ tokenizeAndTestFirst("?x ?y", TokenType.VAR, "x");
}
@Test
public void tokenUnit_var2() {
- tokenizeAndTestFirst("? x", TokenType.VAR, "") ;
+ tokenizeAndTestFirst("? x", TokenType.VAR, "");
}
@Test
public void tokenUnit_var3() {
- tokenizeAndTestExact("??x", TokenType.VAR, "?x") ;
+ tokenizeAndTestExact("??x", TokenType.VAR, "?x");
}
@Test
public void tokenUnit_var4() {
- tokenizeAndTestExact("?.1", TokenType.VAR, ".1") ;
+ tokenizeAndTestExact("?.1", TokenType.VAR, ".1");
}
@Test
public void tokenUnit_var5() {
- tokenizeAndTestExact("?" + ARQConstants.allocVarMarker, TokenType.VAR,
ARQConstants.allocVarMarker) ;
+ tokenizeAndTestExact("?" + ARQConstants.allocVarMarker, TokenType.VAR,
ARQConstants.allocVarMarker);
}
@Test
public void tokenUnit_var6() {
- tokenizeAndTestExact("?" + ARQConstants.allocVarMarker + "0",
TokenType.VAR, ARQConstants.allocVarMarker + "0") ;
+ tokenizeAndTestExact("?" + ARQConstants.allocVarMarker + "0",
TokenType.VAR, ARQConstants.allocVarMarker + "0");
}
@Test
public void tokenUnit_var7() {
- tokenizeAndTestExact("?" + ARQConstants.allocVarScopeHiding + "0",
TokenType.VAR, ARQConstants.allocVarScopeHiding + "0") ;
+ tokenizeAndTestExact("?" + ARQConstants.allocVarScopeHiding + "0",
TokenType.VAR, ARQConstants.allocVarScopeHiding + "0");
}
@Test
public void tokenUnit_var8() {
- tokenizeAndTestExact("?" + ARQConstants.allocVarAnonMarker + "0",
TokenType.VAR, ARQConstants.allocVarAnonMarker + "0") ;
+ tokenizeAndTestExact("?" + ARQConstants.allocVarAnonMarker + "0",
TokenType.VAR, ARQConstants.allocVarAnonMarker + "0");
}
@Test
public void tokenUnit_var9() {
- tokenizeAndTestExact("?" + ARQConstants.allocVarTripleTerm + "9",
TokenType.VAR, ARQConstants.allocVarTripleTerm + "9") ;
+ tokenizeAndTestExact("?" + ARQConstants.allocVarTripleTerm + "9",
TokenType.VAR, ARQConstants.allocVarTripleTerm + "9");
}
@Test
public void tokenUnit_hex1() {
- tokenizeAndTestExact("0xABC", TokenType.HEX, "0xABC") ;
+ tokenizeAndTestExact("0xABC", TokenType.HEX, "0xABC");
}
@Test
public void tokenUnit_hex2() {
- tokenizeAndTestFirst("0xABCXYZ", TokenType.HEX, "0xABC") ;
+ tokenizeAndTestFirst("0xABCXYZ", TokenType.HEX, "0xABC");
}
@Test(expected = RiotParseException.class)
public void tokenUnit_hex3() {
- tokenFirst("0xXYZ") ;
+ tokenFirst("0xXYZ");
}
@Test
public void tokenUnit_hex4() {
- tokenizeAndTestExact("0Xabc", TokenType.HEX, "0Xabc") ;
+ tokenizeAndTestExact("0Xabc", TokenType.HEX, "0Xabc");
}
private static void tokenizeAndTestLiteralDT(String input, StringType
lexType, String image, TokenType dt,
String dtImage1, String
dtImage2) {
- Token lexToken = new Token(TokenType.STRING, image) ;
+ Token lexToken = new Token(TokenType.STRING, image);
lexToken.setStringType(lexType);
- Token dtToken = new Token(dt, dtImage1, dtImage2) ;
- tokenizeAndTest(input, TokenType.LITERAL_DT, image, null, lexToken,
dtToken) ;
+ Token dtToken = new Token(dt, dtImage1, dtImage2);
+ tokenizeAndTest(input, TokenType.LITERAL_DT, image, null, lexToken,
dtToken);
- Token expectedToken = new Token(TokenType.LITERAL_DT) ;
- expectedToken.setImage(image) ;
- expectedToken.setImage2(null) ;
- expectedToken.setSubToken1(lexToken) ;
- expectedToken.setSubToken2(dtToken) ;
+ Token expectedToken = new Token(TokenType.LITERAL_DT);
+ expectedToken.setImage(image);
+ expectedToken.setImage2(null);
+ expectedToken.setSubToken1(lexToken);
+ expectedToken.setSubToken2(dtToken);
- Token token = tokenFor(input) ;
- assertEquals(expectedToken, token) ;
+ Token token = tokenFor(input);
+ assertEquals(expectedToken, token);
- Token token2 = tokenizeAndTestExact(input, TokenType.LITERAL_DT,
image).getSubToken2() ;
- assertEquals(dt, token2.getType()) ;
- assertEquals(dtImage1, token2.getImage()) ;
- assertEquals(dtImage2, token2.getImage2()) ;
+ Token token2 = tokenizeAndTestExact(input, TokenType.LITERAL_DT,
image).getSubToken2();
+ assertEquals(dt, token2.getType());
+ assertEquals(dtImage1, token2.getImage());
+ assertEquals(dtImage2, token2.getImage2());
}
@Test
public void tokenLiteralDT_0() {
- tokenizeAndTestLiteralDT("\"123\"^^<x> ", StringType.STRING2, "123",
TokenType.IRI, "x", null) ;
+ tokenizeAndTestLiteralDT("\"123\"^^<x> ", StringType.STRING2, "123",
TokenType.IRI, "x", null);
}
// literal test function.
@Test
public void tokenLiteralDT_1() {
- tokenizeAndTestLiteralDT("'123'^^x:y ", StringType.STRING1, "123",
TokenType.PREFIXED_NAME, "x", "y") ;
+ tokenizeAndTestLiteralDT("'123'^^x:y ", StringType.STRING1, "123",
TokenType.PREFIXED_NAME, "x", "y");
}
@Test
public void tokenLiteralDT_2() {
- tokenizeAndTestLiteralDT("'123'^^:y", StringType.STRING1, "123",
TokenType.PREFIXED_NAME, "", "y") ;
+ tokenizeAndTestLiteralDT("'123'^^:y", StringType.STRING1, "123",
TokenType.PREFIXED_NAME, "", "y");
}
@Test
public void tokenLiteralDT_3() {
- tokenizeAndTestLiteralDT("'''123'''^^<xyz>", StringType.LONG_STRING1,
"123", TokenType.IRI, "xyz", null) ;
+ tokenizeAndTestLiteralDT("'''123'''^^<xyz>", StringType.LONG_STRING1,
"123", TokenType.IRI, "xyz", null);
}
-// @Test(expected = RiotParseException.class)
-// public void tokenLiteralDT_bad_1() {
-// Tokenizer tokenizer = tokenizer("'123'^^ <x> ") ;
-// assertTrue(tokenizer.hasNext()) ;
-// Token token = tokenizer.next() ;
-// assertNotNull(token) ;
-// }
-
-// @Test(expected = RiotParseException.class)
-// public void tokenLiteralDT_bad_2() {
-// Tokenizer tokenizer = tokenizer("'123' ^^<x> ") ;
-// assertTrue(tokenizer.hasNext()) ;
-// Token token = tokenizer.next() ;
-// assertNotNull(token) ; // 123
-// assertEquals(TokenType.STRING1, token.getType()) ;
-// assertEquals("123", token.getImage()) ;
-//
-// assertTrue(tokenizer.hasNext()) ;
-// Token token2 = tokenizer.next() ;
-// assertNotNull(token2) ; // ^^
-// }
-
+ @Test
public void tokenLiteralDT_4() {
- tokenizeAndTestLiteralDT("'123' ^^<xyz>", StringType.STRING1, "123",
TokenType.IRI, "xyz", null) ;
- }
-
- public void tokenLiteralDT_5() {
- tokenizeAndTestLiteralDT("'123'^^ <xyz>", StringType.STRING1, "123",
TokenType.IRI, "xyz", null) ;
- }
-
- public void tokenLiteralDT_6() {
- tokenizeAndTestLiteralDT("'123' ^^ <xyz>", StringType.STRING1,
"123", TokenType.IRI, "xyz", null) ;
+ tokenizeAndTestLiteralDT("'123' ^^<xyz>", StringType.STRING1, "123",
TokenType.IRI, "xyz", null);
}
@Test(expected = RiotParseException.class)
public void tokenLiteralDT_bad_1() {
// Can't split ^^
- Tokenizer tokenizer = tokenizer("'123'^ ^<x> ") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertNotNull(token) ;
- }
-
-// @Test(expected = RiotParseException.class)
-// public void tokenLiteralDT_bad_4() {
-// Tokenizer tokenizer = tokenizer("'123'^^ x:y") ;
-// assertTrue(tokenizer.hasNext()) ;
-// Token token = tokenizer.next() ;
-// assertNotNull(token) ;
-// }
+ Tokenizer tokenizer = tokenizer("'123'^ ^<x> ");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertNotNull(token);
+ }
@Test
public void tokenLiteralLang_0() {
- tokenizeAndTestExact("'a'@en", TokenType.LITERAL_LANG, "a", "en") ;
+ tokenizeAndTestExact("'a'@en", TokenType.LITERAL_LANG, "a", "en");
}
@Test
public void tokenLiteralLang_1() {
- tokenizeAndTestExact("'a'@en-UK", TokenType.LITERAL_LANG, "a",
"en-UK") ;
+ tokenizeAndTestExact("'a'@en-UK", TokenType.LITERAL_LANG, "a",
"en-UK");
}
@Test
public void tokenLiteralLang_2() {
- Tokenizer tokenizer = tokenizeAndTestFirst("'' @lang ",
TokenType.LITERAL_LANG, "", "lang") ;
- //testNextToken(tokenizer, TokenType.LITERAL_LANG, "lang") ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("'' @lang ",
TokenType.LITERAL_LANG, "", "lang");
+ //testNextToken(tokenizer, TokenType.LITERAL_LANG, "lang");
}
@Test(expected = RiotParseException.class)
public void tokenLiteralLang_3() {
- tokenFirst("''@ lang ") ;
+ tokenFirst("''@ lang ");
}
@Test(expected = RiotParseException.class)
public void tokenLiteralLang_4() {
- tokenFirst("''@lang- ") ;
+ tokenFirst("''@lang- ");
}
@Test(expected = RiotParseException.class)
public void tokenLiteralLang_5() {
- tokenFirst("'abc'@- ") ;
+ tokenFirst("'abc'@- ");
}
@Test
public void tokenLiteralLang_6() {
- tokenizeAndTestExact("'XYZ'@a-b-c ", TokenType.LITERAL_LANG, "XYZ",
"a-b-c") ;
+ tokenizeAndTestExact("'XYZ'@a-b-c ", TokenType.LITERAL_LANG, "XYZ",
"a-b-c");
}
@Test
public void tokenLiteralLang_7() {
- tokenizeAndTestExact("'X'@a-b9z-c99 ", TokenType.LITERAL_LANG, "X",
"a-b9z-c99") ;
+ tokenizeAndTestExact("'X'@a-b9z-c99 ", TokenType.LITERAL_LANG, "X",
"a-b9z-c99");
}
@Test
public void tokenLiteralLang_8() {
- tokenizeAndTestExact("'X' @a", TokenType.LITERAL_LANG, "X", "a") ;
+ tokenizeAndTestExact("'X' @a", TokenType.LITERAL_LANG, "X", "a");
}
@Test(expected = RiotParseException.class)
public void tokenLiteralLang_bad_1() {
- tokenFirst("''@9-b") ;
+ tokenFirst("''@9-b");
}
@Test(expected = RiotParseException.class)
public void tokenLiteralLang_bad_2() {
- tokenFirst("''@ tag") ;
+ tokenFirst("''@ tag");
+ }
+
+ @Test
+ public void tokenLiteralLangDir_1() {
+ tokenizeAndTestExact("'X' @a--ltr", TokenType.LITERAL_LANG, "X",
"a--ltr");
+ }
+
+ @Test
+ public void tokenLiteralLangDir_2() {
+ // Not checked at this point.
+ tokenizeAndTestExact("'X' @a--AB", TokenType.LITERAL_LANG, "X",
"a--AB");
+ }
+
+ @Test
+ public void tokenLiteralLangDir_3() {
+ tokenizeAndTestExact("'X' @en-GB--ltr", TokenType.LITERAL_LANG, "X",
"en-GB--ltr");
}
@Test
public void directive_1() {
- tokenizeAndTestExact("@prefix", TokenType.DIRECTIVE, "prefix") ;
+ tokenizeAndTestExact("@prefix", TokenType.DIRECTIVE, "prefix");
}
@Test
public void directive_2() {
- tokenizeAndTestExact("@base", TokenType.DIRECTIVE, "base") ;
+ tokenizeAndTestExact("@base", TokenType.DIRECTIVE, "base");
}
@Test
public void directive_3() {
- tokenizeAndTestExact("@whatever", TokenType.DIRECTIVE, "whatever") ;
+ tokenizeAndTestExact("@whatever", TokenType.DIRECTIVE, "whatever");
}
-
@Test
public void tokenComment_01() {
- tokenizeAndTestExact("_:123 # Comment", TokenType.BNODE, "123") ;
+ tokenizeAndTestExact("_:123 # Comment", TokenType.BNODE, "123");
}
@Test
public void tokenComment_02() {
- tokenizeAndTestExact("\"foo # Non-Comment\"", TokenType.STRING, "foo #
Non-Comment") ;
+ tokenizeAndTestExact("\"foo # Non-Comment\"", TokenType.STRING, "foo #
Non-Comment");
}
@Test
public void tokenComment_03() {
- Tokenizer tokenizer = tokenizeAndTestFirst("'foo' # Comment\n'bar'",
TokenType.STRING, "foo") ;
- testNextToken(tokenizer, TokenType.STRING, "bar") ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("'foo' # Comment\n'bar'",
TokenType.STRING, "foo");
+ testNextToken(tokenizer, TokenType.STRING, "bar");
}
@Test
public void tokenWord_01() {
- tokenizeAndTestExact("abc", TokenType.KEYWORD, "abc") ;
+ tokenizeAndTestExact("abc", TokenType.KEYWORD, "abc");
}
// Multiple terms
@Test
public void token_multiple() {
- Tokenizer tokenizer = tokenizer("<x><y>") ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertNotNull(token) ;
- assertEquals(TokenType.IRI, token.getType()) ;
- assertEquals("x", token.getImage()) ;
+ Tokenizer tokenizer = tokenizer("<x><y>");
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertNotNull(token);
+ assertEquals(TokenType.IRI, token.getType());
+ assertEquals("x", token.getImage());
+
+ assertTrue(tokenizer.hasNext());
+ Token token2 = tokenizer.next();
+ assertNotNull(token2);
+ assertEquals(TokenType.IRI, token2.getType());
+ assertEquals("y", token2.getImage());
- assertTrue(tokenizer.hasNext()) ;
- Token token2 = tokenizer.next() ;
- assertNotNull(token2) ;
- assertEquals(TokenType.IRI, token2.getType()) ;
- assertEquals("y", token2.getImage()) ;
-
- assertFalse(tokenizer.hasNext()) ;
+ assertFalse(tokenizer.hasNext());
}
// These tests converts some java characters to UTF-8 and read back as
// ASCII.
private static Tokenizer tokenizerASCII (String string) {
- ByteArrayInputStream in = bytes(string) ;
+ ByteArrayInputStream in = bytes(string);
Tokenizer tokenizer = TokenizerText.create()
.asciiOnly(true)
.errorHandler(errorHandlerExceptions())
.source(in)
- .build() ;
+ .build();
return tokenizer;
}
private static ByteArrayInputStream bytes(String string) {
- byte b[] = StrUtils.asUTF8bytes(string) ;
- return new ByteArrayInputStream(b) ;
+ byte b[] = StrUtils.asUTF8bytes(string);
+ return new ByteArrayInputStream(b);
}
@Test
public void tokenizer_charset_str_1() {
- Tokenizer tokenizer = tokenizerASCII("'abc'") ;
- Token t = tokenizer.next() ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizerASCII("'abc'");
+ Token t = tokenizer.next();
+ assertFalse(tokenizer.hasNext());
}
// Check the RiotParseException is about bad encoding.
@@ -945,54 +972,54 @@ public class TestTokenizer {
@Test(expected = RiotParseException.class)
public void tokenizer_charset_str_2() {
expectBadEncoding(()->{
- Tokenizer tokenizer = tokenizerASCII("'abcdé'") ;
+ Tokenizer tokenizer = tokenizerASCII("'abcdé'");
// ASCII only -> bad.
- Token t = tokenizer.next() ;
+ Token t = tokenizer.next();
});
}
@Test(expected = RiotParseException.class)
public void tokenizer_charset_str_3() {
expectBadEncoding(()->{
- Tokenizer tokenizer = tokenizerASCII("'α'") ;
+ Tokenizer tokenizer = tokenizerASCII("'α'");
// ASCII only -> bad.
- Token t = tokenizer.next() ;
+ Token t = tokenizer.next();
});
}
@Test(expected = RiotParseException.class)
public void tokenizer_charset_uri_1() {
expectBadEncoding(()->{
- Tokenizer tokenizer = tokenizerASCII("<http://example/abcdé>") ;
+ Tokenizer tokenizer = tokenizerASCII("<http://example/abcdé>");
// ASCII only -> bad.
- Token t = tokenizer.next() ;
+ Token t = tokenizer.next();
});
}
@Test(expected=RiotParseException.class)
public void token_replacmentChar_uri_1() {
- Tokenizer tokenizer = tokenizer("<a\uFFFDz>") ;
- testNextToken(tokenizer, TokenType.IRI) ;
+ Tokenizer tokenizer = tokenizer("<a\uFFFDz>");
+ testNextToken(tokenizer, TokenType.IRI);
}
@Test(expected=RiotParseException.class)
public void token_replacmentChar_uri_2() {
- Tokenizer tokenizer = tokenizer("<a\\uFFFDz>") ;
- testNextToken(tokenizer, TokenType.IRI) ;
+ Tokenizer tokenizer = tokenizer("<a\\uFFFDz>");
+ testNextToken(tokenizer, TokenType.IRI);
}
@Test(expected=RiotParseException.class)
public void token_replacmentChar_bnode_1() {
- Tokenizer tokenizer = tokenizer("ns\uFFFD:xyz") ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME) ;
- //assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("ns\uFFFD:xyz");
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME);
+ //assertFalse(tokenizer.hasNext());
}
@Test(expected=RiotParseException.class)
public void token_replacmentChar_bnode_2() {
- Tokenizer tokenizer = tokenizer("ns:\uFFFDabc") ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME) ;
- //assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("ns:\uFFFDabc");
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME);
+ //assertFalse(tokenizer.hasNext());
}
private final int CountWaringsOnReplacmeentChar = 0;
@@ -1072,8 +1099,8 @@ public class TestTokenizer {
@Test(expected=RiotException.class)
public void tokenIRI_tab() {
// Raw tab in a IRI string. Illegal - this is an error.
- Tokenizer tokenizer =
tokenizer("<http://example/invalid/iri/with_\t_tab>") ;
- testNextToken(tokenizer, TokenType.IRI) ;
+ Tokenizer tokenizer =
tokenizer("<http://example/invalid/iri/with_\t_tab>");
+ testNextToken(tokenizer, TokenType.IRI);
}
private static Token testExpectWarning(String input, TokenType
expectedTokenType, int warningCount) {
@@ -1084,8 +1111,8 @@ public class TestTokenizer {
private static Token testExpectWarning(PeekReader r, TokenType
expectedTokenType, int warningCount) {
ErrorHandlerRecorder errHandler = new
ErrorHandlerRecorder(errorHandlerSimple());
Tokenizer tokenizer =
TokenizerText.create().source(r).errorHandler(errHandler).build();
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
if ( expectedTokenType != null )
assertEquals(expectedTokenType, token.getType());
assertFalse("Expected one token", tokenizer.hasNext());
@@ -1098,46 +1125,46 @@ public class TestTokenizer {
@Test
public void tokenizer_BOM_1() {
// BOM
- ByteArrayInputStream in = bytes("\uFEFF'abc'") ;
- Tokenizer tokenizer = TokenizerText.create().source(in).build() ;
- assertTrue(tokenizer.hasNext()) ;
- Token token = tokenizer.next() ;
- assertNotNull(token) ;
- assertEquals(TokenType.STRING, token.getType()) ;
- assertEquals("abc", token.getImage()) ;
- assertFalse(tokenizer.hasNext()) ;
+ ByteArrayInputStream in = bytes("\uFEFF'abc'");
+ Tokenizer tokenizer = TokenizerText.create().source(in).build();
+ assertTrue(tokenizer.hasNext());
+ Token token = tokenizer.next();
+ assertNotNull(token);
+ assertEquals(TokenType.STRING, token.getType());
+ assertEquals("abc", token.getImage());
+ assertFalse(tokenizer.hasNext());
}
// First symbol from the stream.
private static void testSymbol(String string, TokenType expected) {
- Tokenizer tokenizer = tokenizeAndTestFirst(string, expected, null) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst(string, expected, null);
assertFalse(tokenizer.hasNext());
}
// -- Symbols
// CNTRL
// @Test public void tokenizer_symbol_01() { testSymbol("*",
TokenType.STAR)
- // ; }
+ //; }
@Test
public void tokenizer_symbol_02() {
- testSymbol("+", TokenType.PLUS) ;
+ testSymbol("+", TokenType.PLUS);
}
@Test
public void tokenizer_symbol_03() {
- testSymbol("-", TokenType.MINUS) ;
+ testSymbol("-", TokenType.MINUS);
}
- // @Test public void tokenizer_symbol_04() { testSymbol("<", TokenType.LT)
;
+ // @Test public void tokenizer_symbol_04() { testSymbol("<", TokenType.LT);
// }
@Test
public void tokenizer_symbol_05() {
- testSymbol(">", TokenType.GT) ;
+ testSymbol(">", TokenType.GT);
}
@Test
public void tokenizer_symbol_06() {
- testSymbol("=", TokenType.EQUALS) ;
+ testSymbol("=", TokenType.EQUALS);
}
// @Test
@@ -1172,25 +1199,25 @@ public class TestTokenizer {
@Test
public void tokenUnit_symbol_11() {
- Tokenizer tokenizer = tokenizeAndTestFirst("+A", TokenType.PLUS, null)
;
+ Tokenizer tokenizer = tokenizeAndTestFirst("+A", TokenType.PLUS, null);
}
@Test
public void tokenUnit_symbol_12() {
- Tokenizer tokenizer = tokenizeAndTestFirst("+-", TokenType.PLUS, null)
;
- testNextToken(tokenizer, TokenType.MINUS) ;
+ Tokenizer tokenizer = tokenizeAndTestFirst("+-", TokenType.PLUS, null);
+ testNextToken(tokenizer, TokenType.MINUS);
}
@Test
public void tokenUnit_symbol_13() {
- testSymbol(".", TokenType.DOT) ;
+ testSymbol(".", TokenType.DOT);
}
@Test
public void tokenUnit_symbol_14() {
- Tokenizer tokenizer = tokenizeAndTestFirst(".a", TokenType.DOT, null) ;
- testNextToken(tokenizer, TokenType.KEYWORD, "a") ;
+ Tokenizer tokenizer = tokenizeAndTestFirst(".a", TokenType.DOT, null);
+ testNextToken(tokenizer, TokenType.KEYWORD, "a");
}
@Test
@@ -1211,99 +1238,214 @@ public class TestTokenizer {
@Test
public void tokenUnit_symbol_17() {
- testSymbol("*", TokenType.STAR) ;
+ testSymbol("*", TokenType.STAR);
}
@Test
public void tokenUnit_symbol_18() {
- testSymbol("\\", TokenType.RSLASH) ;
+ testSymbol("\\", TokenType.RSLASH);
+ }
+
+ @Test
+ public void tokenUnit_symbol_19() {
+ testSymbol(";", TokenType.SEMICOLON);
}
@Test
public void token_newlines_1() {
- Tokenizer tokenizer = tokenizer("\n", true) ;
- testNextToken(tokenizer, TokenType.NL) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("\n", true);
+ testNextToken(tokenizer, TokenType.NL);
+ assertFalse(tokenizer.hasNext());
}
@Test
public void token_newlines_2() {
- Tokenizer tokenizer = tokenizer("abc\ndef", true) ;
- testNextToken(tokenizer, TokenType.KEYWORD, "abc") ;
- testNextToken(tokenizer, TokenType.NL) ;
- testNextToken(tokenizer, TokenType.KEYWORD, "def") ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("abc\ndef", true);
+ testNextToken(tokenizer, TokenType.KEYWORD, "abc");
+ testNextToken(tokenizer, TokenType.NL);
+ testNextToken(tokenizer, TokenType.KEYWORD, "def");
+ assertFalse(tokenizer.hasNext());
}
@Test
public void token_newlines_3() {
- Tokenizer tokenizer = tokenizer("abc\n\ndef", true) ;
- testNextToken(tokenizer, TokenType.KEYWORD, "abc") ;
- testNextToken(tokenizer, TokenType.NL) ;
- testNextToken(tokenizer, TokenType.KEYWORD, "def") ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("abc\n\ndef", true);
+ testNextToken(tokenizer, TokenType.KEYWORD, "abc");
+ testNextToken(tokenizer, TokenType.NL);
+ testNextToken(tokenizer, TokenType.KEYWORD, "def");
+ assertFalse(tokenizer.hasNext());
}
@Test
public void token_newlines_4() {
- Tokenizer tokenizer = tokenizer("abc\n\rdef", true) ;
- testNextToken(tokenizer, TokenType.KEYWORD, "abc") ;
- testNextToken(tokenizer, TokenType.NL) ;
- testNextToken(tokenizer, TokenType.KEYWORD, "def") ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("abc\n\rdef", true);
+ testNextToken(tokenizer, TokenType.KEYWORD, "abc");
+ testNextToken(tokenizer, TokenType.NL);
+ testNextToken(tokenizer, TokenType.KEYWORD, "def");
+ assertFalse(tokenizer.hasNext());
}
@Test
public void token_newlines_5() {
- Tokenizer tokenizer = tokenizer("abc\n\n", true) ;
- testNextToken(tokenizer, TokenType.KEYWORD, "abc") ;
- testNextToken(tokenizer, TokenType.NL) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("abc\n\n", true);
+ testNextToken(tokenizer, TokenType.KEYWORD, "abc");
+ testNextToken(tokenizer, TokenType.NL);
+ assertFalse(tokenizer.hasNext());
}
@Test
public void token_newlines_6() {
- Tokenizer tokenizer = tokenizer("\n \n", true) ;
- testNextToken(tokenizer, TokenType.NL) ;
- testNextToken(tokenizer, TokenType.NL) ;
- assertFalse(tokenizer.hasNext()) ;
+ Tokenizer tokenizer = tokenizer("\n \n", true);
+ testNextToken(tokenizer, TokenType.NL);
+ testNextToken(tokenizer, TokenType.NL);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_reified_1() {
+ Tokenizer tokenizer = tokenizer("<<");
+ testNextToken(tokenizer, TokenType.LT2);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_reified_2() {
+ Tokenizer tokenizer = tokenizer("<<");
+ testNextToken(tokenizer, TokenType.LT2);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_reified_3() {
+ Tokenizer tokenizer = tokenizer("<<>>");
+ testNextToken(tokenizer, TokenType.LT2);
+ testNextToken(tokenizer, TokenType.GT2);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_reified_4() {
+ Tokenizer tokenizer = tokenizer("<< >>");
+ testNextToken(tokenizer, TokenType.LT2);
+ testNextToken(tokenizer, TokenType.GT2);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ // LT not active.
+// @Test
+// public void token_rdf_star_reified_5() {
+// Tokenizer tokenizer = tokenizer("< <> >");
+// testNextToken(tokenizer, TokenType.LT);
+// testNextToken(tokenizer, TokenType.IRI);
+// testNextToken(tokenizer, TokenType.GT);
+// assertFalse(tokenizer.hasNext());
+// }
+
+ @Test
+ public void token_rdf_star_reified_6() {
+ Tokenizer tokenizer = tokenizer("<< <> >>");
+ testNextToken(tokenizer, TokenType.LT2);
+ testNextToken(tokenizer, TokenType.IRI);
+ testNextToken(tokenizer, TokenType.GT2);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_reified_7() {
+ Tokenizer tokenizer = tokenizer("<< <>>>");
+ testNextToken(tokenizer, TokenType.LT2);
+ testNextToken(tokenizer, TokenType.IRI);
+ testNextToken(tokenizer, TokenType.GT2);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_tripleTerm_1() {
+ Tokenizer tokenizer = tokenizer("<<(");
+ testNextToken(tokenizer, TokenType.L_TRIPLE);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_tripleTerm_2() {
+ Tokenizer tokenizer = tokenizer(")>>");
+ testNextToken(tokenizer, TokenType.R_TRIPLE);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_tripleTerm_3() {
+ Tokenizer tokenizer = tokenizer("<<( )>>");
+ testNextToken(tokenizer, TokenType.L_TRIPLE);
+ testNextToken(tokenizer, TokenType.R_TRIPLE);
+ assertFalse(tokenizer.hasNext());
}
@Test
- public void token_rdf_star_1() {
- Tokenizer tokenizer = tokenizer("<<>>") ;
- testNextToken(tokenizer, TokenType.LT2) ;
- testNextToken(tokenizer, TokenType.GT2) ;
- assertFalse(tokenizer.hasNext()) ;
+ public void token_rdf_star_tripleTerm_4() {
+ Tokenizer tokenizer = tokenizer("<<()>>");
+ testNextToken(tokenizer, TokenType.L_TRIPLE);
+ testNextToken(tokenizer, TokenType.R_TRIPLE);
+ assertFalse(tokenizer.hasNext());
}
+ // Not triple term syntax
@Test
- public void token_rdf_star_2() {
- Tokenizer tokenizer = tokenizer("<< >>") ;
- testNextToken(tokenizer, TokenType.LT2) ;
- testNextToken(tokenizer, TokenType.GT2) ;
- assertFalse(tokenizer.hasNext()) ;
+ public void token_rdf_star_tripleTerm_5() {
+ Tokenizer tokenizer = tokenizer("<< () >>");
+ testNextToken(tokenizer, TokenType.LT2);
+ testNextToken(tokenizer, TokenType.LPAREN);
+ testNextToken(tokenizer, TokenType.RPAREN);
+ testNextToken(tokenizer, TokenType.GT2);
+ assertFalse(tokenizer.hasNext());
}
@Test
- public void token_rdf_star_3() {
- Tokenizer tokenizer = tokenizer("<<:s x:p 123>> :q ") ;
- testNextToken(tokenizer, TokenType.LT2) ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "s") ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME, "x", "p") ;
+ public void token_rdf_star_tripleTerm_6() {
+ Tokenizer tokenizer = tokenizer(")>> .");
+ testNextToken(tokenizer, TokenType.R_TRIPLE);
+ testNextToken(tokenizer, TokenType.DOT);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_tripleTerm_7() {
+ Tokenizer tokenizer = tokenizer("().");
+ testNextToken(tokenizer, TokenType.LPAREN);
+ testNextToken(tokenizer, TokenType.RPAREN);
+ testNextToken(tokenizer, TokenType.DOT);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_occurence1() {
+ Tokenizer tokenizer = tokenizer("<<:s x:p 123>> :q ");
+ testNextToken(tokenizer, TokenType.LT2);
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "s");
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME, "x", "p");
testNextToken(tokenizer, TokenType.INTEGER, "123", null);
- testNextToken(tokenizer, TokenType.GT2) ;
- testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "q") ;
- assertFalse(tokenizer.hasNext()) ;
+ testNextToken(tokenizer, TokenType.GT2);
+ testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "q");
+ assertFalse(tokenizer.hasNext());
}
@Test
- public void token_rdf_star_4() {
- Tokenizer tokenizer = tokenizer("<<<>>>") ;
- testNextToken(tokenizer, TokenType.LT2) ;
- Token t = testNextToken(tokenizer, TokenType.IRI) ;
+ public void token_rdf_star_inner_IRI() {
+ Tokenizer tokenizer = tokenizer("<<<>>>"); // which is << <>
>>
+ testNextToken(tokenizer, TokenType.LT2);
+ Token t = testNextToken(tokenizer, TokenType.IRI);
assertEquals("", t.getImage());
- testNextToken(tokenizer, TokenType.GT2) ;
- assertFalse(tokenizer.hasNext()) ;
+ testNextToken(tokenizer, TokenType.GT2);
+ assertFalse(tokenizer.hasNext());
+ }
+
+ @Test
+ public void token_rdf_star_inner_triple_term() {
+ Tokenizer tokenizer = tokenizer("<<<<()>>>>"); // which is << <<(
)>> >>
+ testNextToken(tokenizer, TokenType.LT2);
+ testNextToken(tokenizer, TokenType.L_TRIPLE);
+ testNextToken(tokenizer, TokenType.R_TRIPLE);
+ testNextToken(tokenizer, TokenType.GT2);
+ assertFalse(tokenizer.hasNext());
}
}
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/lib/Chars.java
b/jena-base/src/main/java/org/apache/jena/atlas/lib/Chars.java
index 9ad32031ec..e9db62c855 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/lib/Chars.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/lib/Chars.java
@@ -40,11 +40,6 @@ public class Chars
'9' , 'a' , 'b' , 'c' , 'd' , 'e' , 'f' };
-// , 'g' , 'h' ,
-// 'i' , 'j' , 'k' , 'l' , 'm' , 'n' ,
-// 'o' , 'p' , 'q' , 'r' , 's' , 't' ,
-// 'u' , 'v' , 'w' , 'x' , 'y' , 'z'
-
public static final Charset charsetUTF8 = StandardCharsets.UTF_8;
public static final Charset charsetASCII = StandardCharsets.US_ASCII;
@@ -234,6 +229,7 @@ public class Chars
public static final char CH_RSLASH = '\\' ;
public static final char CH_PERCENT = '%' ;
public static final char CH_VBAR = '|' ;
+ public static final char CH_TILDE = '~' ;
// Byte versions of the above
public static final byte B_NL = NL ;
@@ -272,6 +268,7 @@ public class Chars
public static final byte B_RSLASH = '\\' ;
public static final byte B_PERCENT = '%' ;
public static final byte B_VBAR = '|' ;
+ public static final byte B_TILDE = '~' ;
// String versions - a few compound "chars" as well.
public static final String S_LBRACKET = "[" ;
@@ -309,5 +306,5 @@ public class Chars
public static final String S_RSLASH = "\\" ;
public static final String S_PERCENT = "%" ;
public static final String S_VBAR = "|" ;
-
+ public static final String S_TILDE = "~" ;
}