One kind of tokenized string, with string type recorded separately. Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/0e2fad4f Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/0e2fad4f Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/0e2fad4f
Branch: refs/heads/master Commit: 0e2fad4f2e7e4237d27847839fa3a7b8eb1941d8 Parents: 0141994 Author: Andy Seaborne <[email protected]> Authored: Tue Jan 31 21:20:30 2017 +0000 Committer: Andy Seaborne <[email protected]> Committed: Tue Jan 31 21:20:30 2017 +0000 ---------------------------------------------------------------------- .../apache/jena/atlas/json/io/parser/JSONP.java | 9 +-- .../atlas/json/io/parser/JSONParserBase.java | 8 +-- .../atlas/json/io/parser/TokenizerJSON.java | 36 +++++++--- .../org/apache/jena/riot/lang/LangNTuple.java | 28 ++++---- .../org/apache/jena/riot/lang/LangRDFJSON.java | 4 +- .../jena/riot/system/ParserProfileBase.java | 4 -- .../org/apache/jena/riot/tokens/StringType.java | 22 ++++++ .../java/org/apache/jena/riot/tokens/Token.java | 72 +++++++++----------- .../org/apache/jena/riot/tokens/TokenType.java | 8 +-- .../apache/jena/riot/tokens/TokenizerText.java | 22 +++--- .../sparql/resultset/JSONInputIterator.java | 10 +-- .../apache/jena/riot/tokens/TestTokenizer.java | 69 ++++++++++--------- 12 files changed, 163 insertions(+), 129 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONP.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONP.java b/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONP.java index 701accf..f9d81fa 100644 --- a/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONP.java +++ b/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONP.java @@ -65,7 +65,7 @@ public class JSONP extends JSONParserBase for(;;) { handler.startPair(currLine, currCol) ; - if ( ! lookingAt(KEYWORD) && ! lookingAtString() ) + if ( ! lookingAt(KEYWORD) && ! lookingAtKeyString() ) exception("Not a key for a JSON object: "+peekToken()) ; String key = peekToken().getImage() ; nextToken() ; @@ -102,11 +102,8 @@ public class JSONP extends JSONParserBase case DECIMAL: { handler.valueDecimal(peekToken().getImage(), currLine, currCol) ; nextToken() ; return ; } case DOUBLE: { handler.valueDouble(peekToken().getImage(), currLine, currCol) ; nextToken() ; return ; } - // String - liberal - case STRING1: - case STRING2: - case LONG_STRING1: - case LONG_STRING2: + // String - liberal - includes single quotes and triple quoted forms if the tokenizer supports them. + case STRING: { handler.valueString(peekToken().getImage(), currLine, currCol) ; nextToken() ; http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONParserBase.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONParserBase.java b/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONParserBase.java index 6d5ade1..c46a8b1 100644 --- a/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONParserBase.java +++ b/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/JSONParserBase.java @@ -68,16 +68,12 @@ class JSONParserBase return t.hasType(tokenType) ; } - final protected boolean lookingAtString() + final protected boolean lookingAtKeyString() { Token t = peekTokens.peek() ; if ( t == null ) return false ; - if ( t.hasType(TokenType.STRING1) ) return true ; - if ( t.hasType(TokenType.STRING2) ) return true ; - if ( t.hasType(TokenType.LONG_STRING1) ) return true ; - if ( t.hasType(TokenType.LONG_STRING2) ) return true ; - return false ; + return t.hasType(TokenType.STRING) && ! t.isLongString(); } final protected boolean lookingAtNumber() http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/TokenizerJSON.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/TokenizerJSON.java b/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/TokenizerJSON.java index 9c98cc6..6b1f66e 100644 --- a/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/TokenizerJSON.java +++ b/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/TokenizerJSON.java @@ -18,7 +18,26 @@ package org.apache.jena.atlas.json.io.parser; -import static org.apache.jena.atlas.lib.Chars.* ; +import static org.apache.jena.atlas.lib.Chars.CH_COLON; +import static org.apache.jena.atlas.lib.Chars.CH_COMMA; +import static org.apache.jena.atlas.lib.Chars.CH_DOT; +import static org.apache.jena.atlas.lib.Chars.CH_GT; +import static org.apache.jena.atlas.lib.Chars.CH_HASH; +import static org.apache.jena.atlas.lib.Chars.CH_LBRACE; +import static org.apache.jena.atlas.lib.Chars.CH_LBRACKET; +import static org.apache.jena.atlas.lib.Chars.CH_LPAREN; +import static org.apache.jena.atlas.lib.Chars.CH_LT; +import static org.apache.jena.atlas.lib.Chars.CH_MINUS; +import static org.apache.jena.atlas.lib.Chars.CH_PLUS; +import static org.apache.jena.atlas.lib.Chars.CH_QUOTE1; +import static org.apache.jena.atlas.lib.Chars.CH_QUOTE2; +import static org.apache.jena.atlas.lib.Chars.CH_RBRACE; +import static org.apache.jena.atlas.lib.Chars.CH_RBRACKET; +import static org.apache.jena.atlas.lib.Chars.CH_RPAREN; +import static org.apache.jena.atlas.lib.Chars.CH_SEMICOLON; +import static org.apache.jena.atlas.lib.Chars.CR; +import static org.apache.jena.atlas.lib.Chars.EOF; +import static org.apache.jena.atlas.lib.Chars.NL; import java.io.IOException ; import java.util.NoSuchElementException ; @@ -26,6 +45,7 @@ import java.util.NoSuchElementException ; import org.apache.jena.atlas.io.IO ; import org.apache.jena.atlas.io.PeekReader ; import org.apache.jena.atlas.json.JsonParseException ; +import org.apache.jena.riot.tokens.StringType; import org.apache.jena.riot.tokens.Token ; import org.apache.jena.riot.tokens.TokenType ; import org.apache.jena.riot.tokens.Tokenizer ; @@ -102,6 +122,7 @@ public class TokenizerJSON implements Tokenizer // ---- String // Support both "" and '' strings (only "" is legal JSON) if ( ch == CH_QUOTE1 || ch == CH_QUOTE2 ) { + token.setType(TokenType.STRING); reader.readChar() ; int ch2 = reader.peekChar() ; if ( ch2 == ch ) { @@ -112,24 +133,19 @@ public class TokenizerJSON implements Tokenizer // """-strings/'''-strings reader.readChar() ; token.setImage(readLong(ch, false)) ; - TokenType tt = (ch == CH_QUOTE1) ? TokenType.LONG_STRING1 : TokenType.LONG_STRING2 ; - token.setType(tt) ; + StringType st = (ch == CH_QUOTE1) ? StringType.LONG_STRING1 : StringType.LONG_STRING2 ; + token.setStringType(st) ; return token ; } // Two quotes then a non-quote. // Must be '' or "" - - // No need to pushback characters as we know the lexical form is - // the empty string. - // if ( ch2 != EOF ) reader.pushbackChar(ch2) ; - // if ( ch1 != EOF ) reader.pushbackChar(ch1) ; // Must be '' or - // "" token.setImage("") ; } else // Single quote character. token.setImage(allBetween(ch, ch, true, false)) ; // Single quoted string. - token.setType((ch == CH_QUOTE1) ? TokenType.STRING1 : TokenType.STRING2) ; + StringType st = (ch == CH_QUOTE1) ? StringType.STRING1 : StringType.STRING2 ; + token.setStringType(st) ; return token ; } http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/riot/lang/LangNTuple.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/LangNTuple.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/LangNTuple.java index aed16d3..40768fb 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/LangNTuple.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/LangNTuple.java @@ -18,13 +18,12 @@ package org.apache.jena.riot.lang; -import static org.apache.jena.riot.tokens.TokenType.STRING2 ; - import java.util.Iterator ; import org.apache.jena.graph.Node ; import org.apache.jena.riot.system.ParserProfile ; import org.apache.jena.riot.system.StreamRDF ; +import org.apache.jena.riot.tokens.StringType; import org.apache.jena.riot.tokens.Token ; import org.apache.jena.riot.tokens.TokenType ; import org.apache.jena.riot.tokens.Tokenizer ; @@ -100,25 +99,26 @@ public abstract class LangNTuple<X> extends LangBase implements Iterator<X> switch (token.getType()) { case IRI: case BNODE: - case STRING2: - return ; - case LITERAL_DT: - if ( profile.isStrictMode() && ! token.getSubToken1().hasType(STRING2) ) - exception(token, "Illegal single quoted string: %s", token) ; + return; + case STRING: + checkString(token); return ; case LITERAL_LANG: - if ( profile.isStrictMode() && ! token.getSubToken1().hasType(STRING2) ) - exception(token, "Illegal single quoted string: %s", token) ; + case LITERAL_DT: + checkString(token.getSubToken1()); return ; - case STRING1: - if ( profile.isStrictMode() ) - exception(token, "Illegal single quoted string: %s", token) ; - break ; default: exception(token, "Illegal object: %s", token) ; } } - + + private void checkString(Token token) { + if ( token.isLongString() ) + exception(token, "Triple quoted string not permitted: %s", token) ; + if ( profile.isStrictMode() && ! token.hasStringType(StringType.STRING2) ) + exception(token, "Not a \"\"-quoted string: %s", token); + } + /** SkipOnBadTerm - do not output tuples with bad RDF terms */ public boolean getSkipOnBadTerm() { return skipOnBadTerm ; } /** SkipOnBadTerm - do not output tuples with bad RDF terms */ http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/riot/lang/LangRDFJSON.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/LangRDFJSON.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/LangRDFJSON.java index d495e07..cdf0cdc 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/lang/LangRDFJSON.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/LangRDFJSON.java @@ -441,13 +441,13 @@ public class LangRDFJSON extends LangBase private boolean isPropertyName() { - return lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2); + return lookingAt(TokenType.STRING); } private Token checkValidForObjectProperty() { Token t = null; - if (lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2)) + if (lookingAt(TokenType.STRING) ) t = nextToken(); else exception(peekToken(), "JSON Values given for properties for an Object must be Strings") ; http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileBase.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileBase.java b/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileBase.java index 8ed68e3..e855494 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileBase.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/system/ParserProfileBase.java @@ -201,10 +201,6 @@ public class ParserProfileBase implements ParserProfile { return pp.createLangLiteral(str, token.getImage2(), line, col) ; case STRING : - case STRING1 : - case STRING2 : - case LONG_STRING1 : - case LONG_STRING2 : return pp.createStringLiteral(str, line, col) ; default : { Node x = pp.createNodeFromToken(currentGraph, token, line, col) ; http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/riot/tokens/StringType.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/StringType.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/StringType.java new file mode 100644 index 0000000..9e45434 --- /dev/null +++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/StringType.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.riot.tokens; + +/** Seen form of a {@link TokenType#STRING} */ +public enum StringType { STRING1, STRING2, LONG_STRING1, LONG_STRING2 } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java index 27810e2..084584c 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java @@ -60,6 +60,7 @@ public final class Token private Token subToken1 = null ; // A related token (used for datatype literals and language tags) private Token subToken2 = null ; // A related token (used for datatype literals and language tags) + private StringType stringType = null ; public int cntrlCode = 0 ; private long column ; @@ -70,14 +71,25 @@ public final class Token public static final String ImageTrue = "true" ; public static final String ImageFalse = "false" ; - public final TokenType getType() { return tokenType ; } - public final String getImage() { return tokenImage ; } + public final TokenType getType() { return tokenType ; } + public final String getImage() { return tokenImage ; } //public final String getImage1() { return tokenImage1 ; } - public final String getImage2() { return tokenImage2 ; } - public final int getCntrlCode() { return cntrlCode ; } - public final Token getSubToken1() { return subToken1 ; } - public final Token getSubToken2() { return subToken2 ; } + public final String getImage2() { return tokenImage2 ; } + public final int getCntrlCode() { return cntrlCode ; } + public final Token getSubToken1() { return subToken1 ; } + public final Token getSubToken2() { return subToken2 ; } + public final StringType getStringType() { return stringType ; } + public final boolean hasStringType(StringType st) { return this.stringType == st ;} + public final boolean isLongString() { + switch(stringType) { + case LONG_STRING1: + case LONG_STRING2: + return true; + default: + return false; + } + } public final Token setType(TokenType tokenType) { this.tokenType = tokenType ; return this ; } public final Token setImage(String tokenImage) { this.tokenImage = tokenImage ; return this ; } @@ -90,6 +102,8 @@ public final class Token public final Token setSubToken1(Token subToken) { this.subToken1 = subToken ; return this ; } public final Token setSubToken2(Token subToken) { this.subToken2 = subToken ; return this ; } + public final Token setStringType(StringType st) { this.stringType = st ; return this ; } + static Token create(String s) { PeekReader pr = PeekReader.readString(s) ; @@ -176,8 +190,6 @@ public final class Token switch (tokenType) { case STRING: - case STRING1: case STRING2: - case LONG_STRING1: case LONG_STRING2: return getImage() ; default: return null ; @@ -277,24 +289,11 @@ public final class Token return sb.toString() ; } - public boolean isEOF() { return tokenType == TokenType.EOF ; } + public boolean isEOF() { return tokenType == TokenType.EOF ; } - public boolean isWord() { return tokenType == TokenType.KEYWORD ; } + public boolean isWord() { return tokenType == TokenType.KEYWORD ; } - public boolean isString() - { - switch(tokenType) - { - case STRING: - case STRING1: - case STRING2: - case LONG_STRING1: - case LONG_STRING2: - return true ; - default: - return false ; - } - } + public boolean isString() { return tokenType == TokenType.STRING ; } public boolean isNumber() { @@ -322,10 +321,6 @@ public final class Token case LITERAL_DT: case LITERAL_LANG: case STRING: - case STRING1: - case STRING2: - case LONG_STRING1: - case LONG_STRING2: return true ; case KEYWORD: if ( tokenImage.equals(ImageANY) ) @@ -346,9 +341,16 @@ public final class Token case PREFIXED_NAME : case LITERAL_DT: case LITERAL_LANG: - case STRING1: - case STRING2: - return true ; + return true; + case STRING : { + switch (stringType) { + case STRING1 : + case STRING2 : + return true; + default : + return false; + } + } default: return false ; } @@ -361,10 +363,6 @@ public final class Token case LITERAL_DT: case LITERAL_LANG: case STRING: - case STRING1: - case STRING2: - case LONG_STRING1: - case LONG_STRING2: return true ; default: return false ; @@ -439,10 +437,6 @@ public final class Token } case LITERAL_LANG : return NodeFactory.createLiteral(tokenImage, tokenImage2) ; case STRING: - case STRING1: - case STRING2: - case LONG_STRING1: - case LONG_STRING2: return NodeFactory.createLiteral(tokenImage) ; case VAR: return Var.alloc(tokenImage) ; http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenType.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenType.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenType.java index 95b3e7a..2a42497 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenType.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenType.java @@ -19,11 +19,11 @@ package org.apache.jena.riot.tokens ; public enum TokenType { + NODE, IRI, PREFIXED_NAME, BNODE, // BOOLEAN, - // One kind of string? - STRING, // Token created programmatically and superclass of ... - STRING1, STRING2, LONG_STRING1, LONG_STRING2, + // One kind of string - the seen form is record + STRING, LITERAL_LANG, LITERAL_DT, INTEGER, DECIMAL, DOUBLE, @@ -44,5 +44,5 @@ public enum TokenType { // = == + - * / \ EQUALS, EQUIVALENT, PLUS, MINUS, STAR, SLASH, RSLASH, // Whitespace, any comment, (one line comment, multiline comment) - NL, WS, COMMENT, COMMENT1, COMMENT2, EOF + NL, WS, COMMENT, COMMENT1, COMMENT2, EOF; } http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java index a13e5df..a56119e 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java @@ -217,16 +217,20 @@ public final class TokenizerText implements Tokenizer // ---- Literal if ( ch == CH_QUOTE1 || ch == CH_QUOTE2 ) { + // The token type is STRING. + // We incorporate this into a token for LITERAL_LANG or LITERAL_DT. + token.setType(TokenType.STRING) ; + reader.readChar() ; int ch2 = reader.peekChar() ; if ( ch2 == ch ) { reader.readChar() ; // Read potential second quote. int ch3 = reader.peekChar() ; if ( ch3 == ch ) { - reader.readChar() ; + reader.readChar() ; // Read potential third quote. token.setImage(readLongString(ch, false)) ; - TokenType tt = (ch == CH_QUOTE1) ? TokenType.LONG_STRING1 : TokenType.LONG_STRING2 ; - token.setType(tt) ; + StringType st = (ch == CH_QUOTE1) ? StringType.LONG_STRING1 : StringType.LONG_STRING2 ; + token.setStringType(st) ; } else { // Two quotes then a non-quote. // Must be '' or "" @@ -236,16 +240,18 @@ public final class TokenizerText implements Tokenizer // if ( ch1 != EOF ) reader.pushbackChar(ch1) ; // Must be // '' or "" token.setImage("") ; - token.setType((ch == CH_QUOTE1) ? TokenType.STRING1 : TokenType.STRING2) ; + StringType st = (ch == CH_QUOTE1) ? StringType.STRING1 : StringType.STRING2 ; + token.setStringType(st) ; } } else { - // Single quote character. + // One quote character. token.setImage(readString(ch, ch)) ; - // Single quoted string. - token.setType((ch == CH_QUOTE1) ? TokenType.STRING1 : TokenType.STRING2) ; + // Record exactly what form of STRING was seen. + StringType st = (ch == CH_QUOTE1) ? StringType.STRING1 : StringType.STRING2 ; + token.setStringType(st) ; } - // Whte space after lexical part of a literal. + // White space after lexical part of a literal. skip() ; // Literal. Is it @ or ^^ http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/main/java/org/apache/jena/sparql/resultset/JSONInputIterator.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/resultset/JSONInputIterator.java b/jena-arq/src/main/java/org/apache/jena/sparql/resultset/JSONInputIterator.java index 26575af..c8f89f1 100644 --- a/jena-arq/src/main/java/org/apache/jena/sparql/resultset/JSONInputIterator.java +++ b/jena-arq/src/main/java/org/apache/jena/sparql/resultset/JSONInputIterator.java @@ -202,7 +202,7 @@ public class JSONInputIterator extends QueryIteratorBase { nextToken(); vars.clear(); do { - if ( lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2) ) { + if ( lookingAt(TokenType.STRING) ) { Token t = nextToken(); String var = t.getImage(); vars.add(var); @@ -227,7 +227,7 @@ public class JSONInputIterator extends QueryIteratorBase { // End of links nextToken(); return; - } else if ( lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2) ) { + } else if ( lookingAt(TokenType.STRING) ) { // Ignore link and continue nextToken(); } else { @@ -491,7 +491,7 @@ public class JSONInputIterator extends QueryIteratorBase { } private String parseNodeInfo(String name) { - if ( lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2) ) { + if ( lookingAt(TokenType.STRING) ) { Token t = nextToken(); String value = t.getImage(); checkComma(TokenType.RBRACE); @@ -527,12 +527,12 @@ public class JSONInputIterator extends QueryIteratorBase { // JSON Parsing Helpers taken from LangRDFJSON private boolean isPropertyName() { - return lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2); + return lookingAt(TokenType.STRING); } private Token checkValidForStringProperty(String property) { Token t = null; - if ( lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2) ) { + if ( lookingAt(TokenType.STRING) ) { t = nextToken(); } else { exception(peekToken(), "JSON Values given for property " + property + " must be Strings"); http://git-wip-us.apache.org/repos/asf/jena/blob/0e2fad4f/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java index a568c52..4baf1f2 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java @@ -60,6 +60,12 @@ public class TestTokenizer extends BaseTest { return tokenizeAndTestExact(input, tokenType, tokenImage, null) ; } + private static Token tokenizeAndTestExact(String input, StringType stringType, String tokenImage) { + Token token = tokenizeAndTestExact(input, TokenType.STRING, tokenImage, null) ; + assertEquals(stringType, token.getStringType()); + return token; + } + private static Token tokenizeAndTestExact(String input, TokenType tokenType, String tokenImage1, String tokenImage2) { Tokenizer tokenizer = tokenizer(input) ; Token token = testNextToken(tokenizer, tokenType, tokenImage1, tokenImage2) ; @@ -239,38 +245,38 @@ public class TestTokenizer extends BaseTest { @Test public void tokenUnit_str1() { - tokenizeAndTestExact(" 'abc' ", TokenType.STRING1, "abc") ; + tokenizeAndTestExact(" 'abc' ", StringType.STRING1, "abc") ; } @Test public void tokenUnit_str2() { - tokenizeAndTestExact(" '' ", TokenType.STRING1, "") ; + tokenizeAndTestExact(" '' ", StringType.STRING1, "") ; } @Test public void tokenUnit_str3() { - tokenizeAndTestExact("'\\u0020'", TokenType.STRING1, " ") ; + tokenizeAndTestExact("'\\u0020'", StringType.STRING1, " ") ; } @Test public void tokenUnit_str4() { - tokenizeAndTestExact("'a\\'\\\"\\n\\t\\r\\f'", TokenType.STRING1, "a'\"\n\t\r\f") ; + tokenizeAndTestExact("'a\\'\\\"\\n\\t\\r\\f'", StringType.STRING1, "a'\"\n\t\r\f") ; } @Test(expected = RiotParseException.class) public void tokenUnit_str5() { // This is a raw newline. \n is a Java string escape. - tokenizeAndTestExact("'\n'", TokenType.STRING1, "\n") ; + tokenizeAndTestExact("'\n'", StringType.STRING1, "\n") ; } @Test public void tokenUnit_str6() { - tokenizeAndTestExact(" \"abc\" ", TokenType.STRING2, "abc") ; + tokenizeAndTestExact(" \"abc\" ", StringType.STRING2, "abc") ; } @Test public void tokenUnit_str7() { - tokenizeAndTestExact("\"\"", TokenType.STRING2, "") ; + tokenizeAndTestExact("\"\"", StringType.STRING2, "") ; } @Test(expected = RiotParseException.class) @@ -286,57 +292,57 @@ public class TestTokenizer extends BaseTest { @Test public void tokenUnit_str10() { - tokenizeAndTestExact("'\\'abc'", TokenType.STRING1, "'abc") ; + tokenizeAndTestExact("'\\'abc'", StringType.STRING1, "'abc") ; } @Test public void tokenUnit_str11() { - tokenizeAndTestExact("'\\U00000020'", TokenType.STRING1, " ") ; + tokenizeAndTestExact("'\\U00000020'", StringType.STRING1, " ") ; } @Test public void tokenUnit_str_long1() { - tokenizeAndTestExact("'''aaa'''", TokenType.LONG_STRING1, "aaa") ; + tokenizeAndTestExact("'''aaa'''", StringType.LONG_STRING1, "aaa") ; } @Test public void tokenUnit_str_long2() { - tokenizeAndTestExact("\"\"\"aaa\"\"\"", TokenType.LONG_STRING2, "aaa") ; + tokenizeAndTestExact("\"\"\"aaa\"\"\"", StringType.LONG_STRING2, "aaa") ; } @Test public void tokenUnit_str_long3() { - tokenizeAndTestExact("''''1234'''", TokenType.LONG_STRING1, "'1234") ; + tokenizeAndTestExact("''''1234'''", StringType.LONG_STRING1, "'1234") ; } @Test public void tokenUnit_str_long4() { - tokenizeAndTestExact("'''''1234'''", TokenType.LONG_STRING1, "''1234") ; + tokenizeAndTestExact("'''''1234'''", StringType.LONG_STRING1, "''1234") ; } @Test public void tokenUnit_str_long5() { - tokenizeAndTestExact("'''\\'''1234'''", TokenType.LONG_STRING1, "'''1234") ; + tokenizeAndTestExact("'''\\'''1234'''", StringType.LONG_STRING1, "'''1234") ; } @Test public void tokenUnit_str_long6() { - tokenizeAndTestExact("\"\"\"\"1234\"\"\"", TokenType.LONG_STRING2, "\"1234") ; + tokenizeAndTestExact("\"\"\"\"1234\"\"\"", StringType.LONG_STRING2, "\"1234") ; } @Test public void tokenUnit_str_long7() { - tokenizeAndTestExact("\"\"\"\"\"1234\"\"\"", TokenType.LONG_STRING2, "\"\"1234") ; + tokenizeAndTestExact("\"\"\"\"\"1234\"\"\"", StringType.LONG_STRING2, "\"\"1234") ; } @Test public void tokenUnit_str_long8() { - tokenizeAndTestExact("''''''", TokenType.LONG_STRING1, "") ; + tokenizeAndTestExact("''''''", StringType.LONG_STRING1, "") ; } @Test public void tokenUnit_str_long9() { - tokenizeAndTestExact("\"\"\"'''''''''''''''''\"\"\"", TokenType.LONG_STRING2, "'''''''''''''''''") ; + tokenizeAndTestExact("\"\"\"'''''''''''''''''\"\"\"", StringType.LONG_STRING2, "'''''''''''''''''") ; } @Test(expected = RiotParseException.class) @@ -668,9 +674,10 @@ public class TestTokenizer extends BaseTest { tokenizeAndTestExact("0Xabc", TokenType.HEX, "0Xabc") ; } - private static void tokenizeAndTestLiteralDT(String input, TokenType lexType, String image, TokenType dt, + private static void tokenizeAndTestLiteralDT(String input, StringType lexType, String image, TokenType dt, String dtImage1, String dtImage2) { - Token lexToken = new Token(lexType, image) ; + Token lexToken = new Token(TokenType.STRING, image) ; + lexToken.setStringType(lexType); Token dtToken = new Token(dt, dtImage1, dtImage2) ; tokenizeAndTest(input, TokenType.LITERAL_DT, image, null, lexToken, dtToken) ; @@ -691,24 +698,24 @@ public class TestTokenizer extends BaseTest { @Test public void tokenLiteralDT_0() { - tokenizeAndTestLiteralDT("\"123\"^^<x> ", TokenType.STRING2, "123", TokenType.IRI, "x", null) ; + tokenizeAndTestLiteralDT("\"123\"^^<x> ", StringType.STRING2, "123", TokenType.IRI, "x", null) ; } // literal test function. @Test public void tokenLiteralDT_1() { - tokenizeAndTestLiteralDT("'123'^^x:y ", TokenType.STRING1, "123", TokenType.PREFIXED_NAME, "x", "y") ; + tokenizeAndTestLiteralDT("'123'^^x:y ", StringType.STRING1, "123", TokenType.PREFIXED_NAME, "x", "y") ; } @Test public void tokenLiteralDT_2() { - tokenizeAndTestLiteralDT("'123'^^:y", TokenType.STRING1, "123", TokenType.PREFIXED_NAME, "", "y") ; + tokenizeAndTestLiteralDT("'123'^^:y", StringType.STRING1, "123", TokenType.PREFIXED_NAME, "", "y") ; } @Test public void tokenLiteralDT_3() { - tokenizeAndTestLiteralDT("'''123'''^^<xyz>", TokenType.LONG_STRING1, "123", TokenType.IRI, "xyz", null) ; + tokenizeAndTestLiteralDT("'''123'''^^<xyz>", StringType.LONG_STRING1, "123", TokenType.IRI, "xyz", null) ; } // @Test(expected = RiotParseException.class) @@ -734,15 +741,15 @@ public class TestTokenizer extends BaseTest { // } public void tokenLiteralDT_4() { - tokenizeAndTestLiteralDT("'123' ^^<xyz>", TokenType.STRING1, "123", TokenType.IRI, "xyz", null) ; + tokenizeAndTestLiteralDT("'123' ^^<xyz>", StringType.STRING1, "123", TokenType.IRI, "xyz", null) ; } public void tokenLiteralDT_5() { - tokenizeAndTestLiteralDT("'123'^^ <xyz>", TokenType.STRING1, "123", TokenType.IRI, "xyz", null) ; + tokenizeAndTestLiteralDT("'123'^^ <xyz>", StringType.STRING1, "123", TokenType.IRI, "xyz", null) ; } public void tokenLiteralDT_6() { - tokenizeAndTestLiteralDT("'123' ^^ <xyz>", TokenType.STRING1, "123", TokenType.IRI, "xyz", null) ; + tokenizeAndTestLiteralDT("'123' ^^ <xyz>", StringType.STRING1, "123", TokenType.IRI, "xyz", null) ; } @Test(expected = RiotParseException.class) @@ -841,13 +848,13 @@ public class TestTokenizer extends BaseTest { @Test public void tokenComment_02() { - tokenizeAndTestExact("\"foo # Non-Comment\"", TokenType.STRING2, "foo # Non-Comment") ; + tokenizeAndTestExact("\"foo # Non-Comment\"", TokenType.STRING, "foo # Non-Comment") ; } @Test public void tokenComment_03() { - Tokenizer tokenizer = tokenizeAndTestFirst("'foo' # Comment\n'bar'", TokenType.STRING1, "foo") ; - testNextToken(tokenizer, TokenType.STRING1, "bar") ; + Tokenizer tokenizer = tokenizeAndTestFirst("'foo' # Comment\n'bar'", TokenType.STRING, "foo") ; + testNextToken(tokenizer, TokenType.STRING, "bar") ; } @Test @@ -915,7 +922,7 @@ public class TestTokenizer extends BaseTest { assertTrue(tokenizer.hasNext()) ; Token token = tokenizer.next() ; assertNotNull(token) ; - assertEquals(TokenType.STRING1, token.getType()) ; + assertEquals(TokenType.STRING, token.getType()) ; assertEquals("abc", token.getImage()) ; assertFalse(tokenizer.hasNext()) ; }
