Repository: jena Updated Branches: refs/heads/master 18f0c3889 -> 1992b41e6
JENA-852 : Fix for whitespace around ^^ and before @lang. Project: http://git-wip-us.apache.org/repos/asf/jena/repo Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/1992b41e Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/1992b41e Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/1992b41e Branch: refs/heads/master Commit: 1992b41e65dc456f958876093385dac083bbe996 Parents: 18f0c38 Author: Andy Seaborne <[email protected]> Authored: Mon Jan 19 16:46:27 2015 +0000 Committer: Andy Seaborne <[email protected]> Committed: Mon Jan 19 16:46:27 2015 +0000 ---------------------------------------------------------------------- .../apache/jena/riot/tokens/TokenizerText.java | 18 ++-- .../apache/jena/riot/tokens/TestTokenizer.java | 91 +++++++++++++------- 2 files changed, 73 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/jena/blob/1992b41e/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java index c688fa6..1c0c26d 100644 --- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java +++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java @@ -211,10 +211,14 @@ public final class TokenizerText implements Tokenizer token.setType((ch == CH_QUOTE1) ? TokenType.STRING1 : TokenType.STRING2) ; } + // Whte space after lexical part of a literal. + skip() ; + // Literal. Is it @ or ^^ if ( reader.peekChar() == CH_AT ) { reader.readChar() ; - + // White space is not legal here. + // The Turtle spec terminal is "LANGTAG" which includes the '@'. Token mainToken = new Token(token) ; mainToken.setType(TokenType.LITERAL_LANG) ; mainToken.setSubToken1(token) ; @@ -224,10 +228,14 @@ public final class TokenizerText implements Tokenizer checkLiteralLang(token.getImage(), token.getImage2()) ; } else if ( reader.peekChar() == '^' ) { expect("^^") ; - // Check no whitespace. - int nextCh = reader.peekChar() ; - if ( isWhitespace(nextCh) ) - exception("No whitespace after ^^ in literal with datatype") ; + // White space is legal after a ^^. + // It's not a good idea, but it is legal. +// // Check no whitespace. +// int nextCh = reader.peekChar() ; +// if ( isWhitespace(nextCh) ) +// exception("No whitespace after ^^ in literal with datatype") ; + skip() ; + // Stash current token. Token mainToken = new Token(token) ; mainToken.setSubToken1(token) ; http://git-wip-us.apache.org/repos/asf/jena/blob/1992b41e/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java ---------------------------------------------------------------------- diff --git a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java index a8e9602..d81e52e 100644 --- a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java +++ b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java @@ -627,43 +627,56 @@ public class TestTokenizer extends BaseTest { tokenizeAndTestLiteralDT("'''123'''^^<xyz>", TokenType.LONG_STRING1, "123", TokenType.IRI, "xyz", null) ; } - @Test(expected = RiotParseException.class) - public void tokenLiteralDT_bad_1() { - Tokenizer tokenizer = tokenizer("'123'^^ <x> ") ; - assertTrue(tokenizer.hasNext()) ; - Token token = tokenizer.next() ; - assertNotNull(token) ; - } - - @Test(expected = RiotParseException.class) - public void tokenLiteralDT_bad_2() { - Tokenizer tokenizer = tokenizer("'123' ^^<x> ") ; - assertTrue(tokenizer.hasNext()) ; - Token token = tokenizer.next() ; - assertNotNull(token) ; // 123 - assertEquals(TokenType.STRING1, token.getType()) ; - assertEquals("123", token.getImage()) ; - - assertTrue(tokenizer.hasNext()) ; - Token token2 = tokenizer.next() ; - assertNotNull(token2) ; // ^^ +// @Test(expected = RiotParseException.class) +// public void tokenLiteralDT_bad_1() { +// Tokenizer tokenizer = tokenizer("'123'^^ <x> ") ; +// assertTrue(tokenizer.hasNext()) ; +// Token token = tokenizer.next() ; +// assertNotNull(token) ; +// } + +// @Test(expected = RiotParseException.class) +// public void tokenLiteralDT_bad_2() { +// Tokenizer tokenizer = tokenizer("'123' ^^<x> ") ; +// assertTrue(tokenizer.hasNext()) ; +// Token token = tokenizer.next() ; +// assertNotNull(token) ; // 123 +// assertEquals(TokenType.STRING1, token.getType()) ; +// assertEquals("123", token.getImage()) ; +// +// assertTrue(tokenizer.hasNext()) ; +// Token token2 = tokenizer.next() ; +// assertNotNull(token2) ; // ^^ +// } + + public void tokenLiteralDT_4() { + tokenizeAndTestLiteralDT("'123' ^^<xyz>", TokenType.STRING1, "123", TokenType.IRI, "xyz", null) ; + } + + public void tokenLiteralDT_5() { + tokenizeAndTestLiteralDT("'123'^^ <xyz>", TokenType.STRING1, "123", TokenType.IRI, "xyz", null) ; + } + + public void tokenLiteralDT_6() { + tokenizeAndTestLiteralDT("'123' ^^ <xyz>", TokenType.STRING1, "123", TokenType.IRI, "xyz", null) ; } @Test(expected = RiotParseException.class) - public void tokenLiteralDT_bad_3() { + public void tokenLiteralDT_bad_1() { + // Can't split ^^ Tokenizer tokenizer = tokenizer("'123'^ ^<x> ") ; assertTrue(tokenizer.hasNext()) ; Token token = tokenizer.next() ; assertNotNull(token) ; } - @Test(expected = RiotParseException.class) - public void tokenLiteralDT_bad_4() { - Tokenizer tokenizer = tokenizer("'123'^^ x:y") ; - assertTrue(tokenizer.hasNext()) ; - Token token = tokenizer.next() ; - assertNotNull(token) ; - } +// @Test(expected = RiotParseException.class) +// public void tokenLiteralDT_bad_4() { +// Tokenizer tokenizer = tokenizer("'123'^^ x:y") ; +// assertTrue(tokenizer.hasNext()) ; +// Token token = tokenizer.next() ; +// assertNotNull(token) ; +// } @Test public void tokenLiteralLang_0() { @@ -677,8 +690,8 @@ public class TestTokenizer extends BaseTest { @Test public void tokenLiteralLang_2() { - Tokenizer tokenizer = tokenizeAndTestFirst("'' @lang ", TokenType.STRING1, "") ; - testNextToken(tokenizer, TokenType.DIRECTIVE, "lang") ; + Tokenizer tokenizer = tokenizeAndTestFirst("'' @lang ", TokenType.LITERAL_LANG, "", "lang") ; + //testNextToken(tokenizer, TokenType.LITERAL_LANG, "lang") ; } @Test(expected = RiotParseException.class) @@ -706,11 +719,21 @@ public class TestTokenizer extends BaseTest { tokenizeAndTestExact("'X'@a-b9z-c99 ", TokenType.LITERAL_LANG, "X", "a-b9z-c99") ; } - @Test(expected = RiotParseException.class) + @Test public void tokenLiteralLang_8() { + tokenizeAndTestExact("'X' @a", TokenType.LITERAL_LANG, "X", "a") ; + } + + @Test(expected = RiotParseException.class) + public void tokenLiteralLang_bad_1() { tokenFirst("''@9-b") ; } + @Test(expected = RiotParseException.class) + public void tokenLiteralLang_bad_2() { + tokenFirst("''@ tag") ; + } + @Test public void directive_1() { tokenizeAndTestExact("@prefix", TokenType.DIRECTIVE, "prefix") ; @@ -722,6 +745,12 @@ public class TestTokenizer extends BaseTest { } @Test + public void directive_3() { + tokenizeAndTestExact("@whatever", TokenType.DIRECTIVE, "whatever") ; + } + + + @Test public void tokenComment_01() { tokenizeAndTestExact("_:123 # Comment", TokenType.BNODE, "123") ; }
