[OLINGO-568] More tests and fixes for Tokenizer
Project: http://git-wip-us.apache.org/repos/asf/olingo-odata4/repo Commit: http://git-wip-us.apache.org/repos/asf/olingo-odata4/commit/ca7059c7 Tree: http://git-wip-us.apache.org/repos/asf/olingo-odata4/tree/ca7059c7 Diff: http://git-wip-us.apache.org/repos/asf/olingo-odata4/diff/ca7059c7 Branch: refs/heads/OLINGO-811_CountForExpand Commit: ca7059c778ac57c9d1ec25c9bb3e9207e4775889 Parents: 326e177 Author: mibo <[email protected]> Authored: Wed Nov 18 20:14:50 2015 +0100 Committer: mibo <[email protected]> Committed: Wed Nov 18 20:14:50 2015 +0100 ---------------------------------------------------------------------- .../core/uri/parser/search/SearchTokenizer.java | 64 +++++++++++-- .../parser/search/SearchTokenizerException.java | 11 ++- .../uri/parser/search/SearchTokenizerTest.java | 99 +++++++++++--------- 3 files changed, 122 insertions(+), 52 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/ca7059c7/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java ---------------------------------------------------------------------- diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java index 9d37312..f5ee8f7 100644 --- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java +++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java @@ -65,14 +65,23 @@ public class SearchTokenizer { } public State forbidden(char c) throws SearchTokenizerException { - throw new SearchTokenizerException("Forbidden character for " + this.getClass().getName() + "->" + c, + throw new SearchTokenizerException("Forbidden character in state " + this.getToken() + "->" + c, SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER, "" + c); } + public State invalid() throws SearchTokenizerException { + throw new SearchTokenizerException("Token " + this.getToken() + " is in invalid state ", + SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE); + } + public State finish() { this.finished = true; return this; } + public State finishAs(Token token) { + this.finished = true; + return changeToken(token); + } public boolean isFinished() { return finished; @@ -86,6 +95,11 @@ public class SearchTokenizer { return this; } + protected State changeToken(Token token) { + this.token = token; + return this; + } + static boolean isAllowedWord(final char character) { // TODO mibo: add missing allowed characters int type = Character.getType(character); @@ -240,7 +254,7 @@ public class SearchTokenizer { @Override public String toString() { - return this.getToken().toString() + "=>{" + getLiteral() + "}"; + return this.getToken() + "=>{" + getLiteral() + "}"; } } @@ -361,12 +375,28 @@ public class SearchTokenizer { } @Override + public State finish() { + String tmpLiteral = literal.toString(); + if(tmpLiteral.length() == 3) { + if(Token.AND.name().equals(tmpLiteral)) { + return finishAs(Token.AND); + } else if(Token.NOT.name().equals(tmpLiteral)) { + return finishAs(Token.NOT); + } + } else if(tmpLiteral.length() == 2 && Token.OR.name().equals(tmpLiteral)) { + return finishAs(Token.OR); + } + return super.finish(); + } + + @Override public State close() { return finish(); } } private class SearchPhraseState extends LiteralState { + private boolean closed = false; public SearchPhraseState(char c) throws SearchTokenizerException { super(Token.PHRASE, c); if (c != QUOTATION_MARK) { @@ -376,19 +406,34 @@ public class SearchTokenizer { @Override public State nextChar(char c) throws SearchTokenizerException { - if (isAllowedPhrase(c)) { + if(closed) { + finish(); + if (c == CHAR_CLOSE) { + return new CloseState(); + } else if (isWhitespace(c)) { + return new RwsState(); + } + } else if (isAllowedPhrase(c)) { return allowed(c); } else if (isWhitespace(c)) { return allowed(c); } else if (c == QUOTATION_MARK) { - finish(); - allowed(c); - return new SearchExpressionState(); - } else if (isFinished()) { - return new SearchExpressionState().init(c); + if(literal.length() == 1) { + return invalid(); + } + closed = true; + return allowed(c); } return forbidden(c); } + + @Override + public State close() { + if(closed) { + return finish(); + } + return super.close(); + } } private class OpenState extends State { @@ -564,6 +609,9 @@ public class SearchTokenizer { if (state.close().isFinished()) { states.add(state); + } else { + throw new SearchTokenizerException("Last parsed state '" + state.toString() + "' is not finished.", + SearchTokenizerException.MessageKeys.NOT_FINISHED_QUERY); } return states; http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/ca7059c7/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java ---------------------------------------------------------------------- diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java index fb20efe..abdf84c 100644 --- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java +++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java @@ -24,9 +24,16 @@ public class SearchTokenizerException extends UriParserSyntaxException { private static final long serialVersionUID = -8295456415309640166L; - public static enum MessageKeys implements MessageKey { + public enum MessageKeys implements MessageKey { /** parameter: character */ - FORBIDDEN_CHARACTER, + FORBIDDEN_CHARACTER, + /** parameter: TOKEN */ + NOT_EXPECTED_TOKEN, + /** parameter: - */ + NOT_FINISHED_QUERY, + /** parameter: - */ + INVALID_TOKEN_STATE, + /** parameter: - */ ALREADY_FINISHED; @Override http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/ca7059c7/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java ---------------------------------------------------------------------- diff --git a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java index 8c2a05e..2fdfe1e 100644 --- a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java +++ b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java @@ -91,7 +91,7 @@ public class SearchTokenizerTest { SearchTokenizer tokenizer = new SearchTokenizer(); List<SearchQueryToken> result; - SearchValidator.init("abc AND \"x-y_z\" AND olingo").validate(); + TokenizerValidator.init("abc AND \"x-y_z\" AND olingo").validate(); // result = tokenizer.tokenize("\"abc\""); @@ -113,7 +113,7 @@ public class SearchTokenizerTest { Assert.assertEquals(PHRASE, result.get(0).getToken()); Assert.assertEquals("\"99_88.\"", result.get(0).getLiteral()); - SearchValidator.init("abc or \"xyz\"").addExpected(WORD, WORD, PHRASE).validate(); + TokenizerValidator.init("abc or \"xyz\"").validate(WORD, WORD, PHRASE); } /** @@ -124,22 +124,22 @@ public class SearchTokenizerTest { @Ignore("Test must be moved to SearchParserTest and SearchParserAndTokenizerTest") public void parsePhraseAbnfTestcases() throws Exception { // <TestCase Name="5.1.7 Search - simple phrase" Rule="queryOptions"> - SearchValidator.init("\"blue%20green\"").validate(); + TokenizerValidator.init("\"blue%20green\"").validate(); // <TestCase Name="5.1.7 Search - simple phrase" Rule="queryOptions"> - SearchValidator.init("\"blue%20green%22").validate(); + TokenizerValidator.init("\"blue%20green%22").validate(); // <TestCase Name="5.1.7 Search - phrase with escaped double-quote" Rule="queryOptions"> // <Input>$search="blue\"green"</Input> - SearchValidator.init("\"blue\\\"green\"").validate(); + TokenizerValidator.init("\"blue\\\"green\"").validate(); // <TestCase Name="5.1.7 Search - phrase with escaped backslash" Rule="queryOptions"> // <Input>$search="blue\\green"</Input> - SearchValidator.init("\"blue\\\\green\"").validate(); + TokenizerValidator.init("\"blue\\\\green\"").validate(); // <TestCase Name="5.1.7 Search - phrase with unescaped double-quote" Rule="queryOptions" FailAt="14"> - SearchValidator.init("\"blue\"green\"").validate(); + TokenizerValidator.init("\"blue\"green\"").validate(); // <TestCase Name="5.1.7 Search - phrase with unescaped double-quote" Rule="queryOptions" FailAt="16"> - SearchValidator.init("\"blue%22green\"").validate(); + TokenizerValidator.init("\"blue%22green\"").validate(); // <TestCase Name="5.1.7 Search - implicit AND" Rule="queryOptions"> // <Input>$search=blue green</Input> @@ -160,10 +160,10 @@ public class SearchTokenizerTest { Assert.assertEquals(NOT, result.get(0).getToken()); Assert.assertEquals(WORD, result.get(1).getToken()); - SearchValidator.init("not abc").addExpected(WORD, WORD).validate(); - SearchValidator.init("NOT abc").addExpected(NOT, WORD).validate(); - SearchValidator.init("NOT \"abc\"").addExpected(NOT, PHRASE).validate(); - SearchValidator.init("NOT (sdf)").validate(SearchTokenizerException.class); + TokenizerValidator.init("not abc").addExpected(WORD, WORD).validate(); + TokenizerValidator.init("NOT abc").addExpected(NOT, WORD).validate(); + TokenizerValidator.init("NOT \"abc\"").addExpected(NOT, PHRASE).validate(); + TokenizerValidator.init("NOT (sdf)").validate(SearchTokenizerException.class); } @Test @@ -187,16 +187,16 @@ public class SearchTokenizerTest { Assert.assertEquals(OR, result.get(3).getToken()); Assert.assertEquals(WORD, result.get(4).getToken()); - SearchValidator.init("abc or xyz").addExpected(WORD, WORD, WORD).validate(); + TokenizerValidator.init("abc or xyz").addExpected(WORD, WORD, WORD).validate(); } @Test public void parseImplicitAnd() throws SearchTokenizerException { - SearchValidator.init("a b").addExpected(WORD, WORD).validate(); - SearchValidator.init("a b OR c").addExpected(WORD, WORD, OR, WORD).validate(); - SearchValidator.init("a bc OR c").addExpected(WORD, WORD, OR, WORD).validate(); - SearchValidator.init("a bc c").addExpected(WORD, WORD, WORD).validate(); - SearchValidator.init("(a OR x) bc c").addExpected(OPEN, WORD, OR, WORD, CLOSE, WORD, WORD).validate(); + TokenizerValidator.init("a b").addExpected(WORD, WORD).validate(); + TokenizerValidator.init("a b OR c").addExpected(WORD, WORD, OR, WORD).validate(); + TokenizerValidator.init("a bc OR c").addExpected(WORD, WORD, OR, WORD).validate(); + TokenizerValidator.init("a bc c").addExpected(WORD, WORD, WORD).validate(); + TokenizerValidator.init("(a OR x) bc c").addExpected(OPEN, WORD, OR, WORD, CLOSE, WORD, WORD).validate(); } @Test @@ -261,7 +261,7 @@ public class SearchTokenizerTest { Assert.assertEquals(OR, result.get(3).getToken()); Assert.assertEquals(WORD, result.get(4).getToken()); - SearchValidator.init("abc AND ANDsomething") + TokenizerValidator.init("abc AND ANDsomething") .addExpected(WORD, AND, WORD).validate(); } @@ -282,7 +282,7 @@ public class SearchTokenizerTest { Assert.assertEquals(OR, it.next().getToken()); Assert.assertEquals(WORD, it.next().getToken()); - SearchValidator.init("foo AND bar OR foo AND baz OR that AND bar OR that AND baz") + TokenizerValidator.init("foo AND bar OR foo AND baz OR that AND bar OR that AND baz") .addExpected(WORD, "foo").addExpected(AND) .addExpected(WORD, "bar").addExpected(OR) .addExpected(WORD, "foo").addExpected(AND) @@ -294,7 +294,7 @@ public class SearchTokenizerTest { .validate(); - SearchValidator.init("(foo OR that) AND (bar OR baz)") + TokenizerValidator.init("(foo OR that) AND (bar OR baz)") .addExpected(OPEN) .addExpected(WORD, "foo").addExpected(OR).addExpected(WORD, "that") .addExpected(CLOSE).addExpected(AND).addExpected(OPEN) @@ -325,19 +325,19 @@ public class SearchTokenizerTest { Assert.assertEquals(AND, it.next().getToken()); Assert.assertEquals(WORD, it.next().getToken()); - SearchValidator.init("abc AND ANDsomething") + TokenizerValidator.init("abc AND ANDsomething") .addExpected(WORD, AND, WORD).validate(); - SearchValidator.init("abc ANDsomething") + TokenizerValidator.init("abc ANDsomething") .addExpected(WORD, WORD).validate(); - SearchValidator.init("abc ORsomething") + TokenizerValidator.init("abc ORsomething") .addExpected(WORD, WORD).validate(); - SearchValidator.init("abc OR orsomething") + TokenizerValidator.init("abc OR orsomething") .addExpected(WORD, OR, WORD).validate(); - SearchValidator.init("abc OR ORsomething") + TokenizerValidator.init("abc OR ORsomething") .addExpected(WORD, OR, WORD).validate(); } @@ -345,7 +345,7 @@ public class SearchTokenizerTest { @Test public void unicodeInWords() throws Exception { // Ll, Lm, Lo, Lt, Lu, Nl - SearchValidator.init("abc OR Ll\u01E3Lm\u02B5Lo\u1BE4Lt\u01F2Lu\u03D3Nl\u216F") + TokenizerValidator.init("abc OR Ll\u01E3Lm\u02B5Lo\u1BE4Lt\u01F2Lu\u03D3Nl\u216F") .addExpected(WORD, OR, WORD).validate(); } @@ -369,7 +369,7 @@ public class SearchTokenizerTest { */ @Test public void characterInPhrase() throws Exception { - SearchValidator.init("\"123\" OR \"ALPHA-._~\"") + TokenizerValidator.init("\"123\" OR \"ALPHA-._~\"") .addExpected(PHRASE, OR, PHRASE).validate(); } @@ -395,7 +395,7 @@ public class SearchTokenizerTest { validate("abc def ghi"); // mixed not - SearchValidator.init(" abc def AND ghi").validate(WORD, WORD, AND, WORD); + TokenizerValidator.init(" abc def AND ghi").validate(WORD, WORD, AND, WORD); validate("NOT abc NOT def OR NOT ghi", NOT, WORD, NOT, WORD, OR, NOT, WORD); validate(" abc def NOT ghi", WORD, WORD, NOT, WORD); @@ -409,26 +409,41 @@ public class SearchTokenizerTest { } @Test - public void parseInvalid() throws SearchTokenizerException { - SearchValidator.init("abc AND OR something").validate(); - SearchValidator.init("abc AND \"something\" )").validate(); + public void tokenizeInvalid() throws SearchTokenizerException { // - SearchValidator.init("( abc AND) OR something").validate(SearchTokenizerException.class); + TokenizerValidator.init("( abc AND) OR something").validate(SearchTokenizerException.class); + + TokenizerValidator.init("\"phrase\"word").validate(SearchTokenizerException.class); + TokenizerValidator.init("\"p\"w").validate(SearchTokenizerException.class); + TokenizerValidator.init("\"\"").validate(SearchTokenizerException.class); + } + + @Test + public void tokenizeInvalidQueryForParser() throws SearchTokenizerException { +// TokenizerValidator.init("NOT").validate(NOT); + + TokenizerValidator.init("AND").validate(AND); + TokenizerValidator.init("OR").validate(OR); + TokenizerValidator.init("NOT AND").validate(NOT, AND); + TokenizerValidator.init("NOT OR").validate(NOT, OR); + TokenizerValidator.init("NOT NOT").validate(NOT, NOT); + TokenizerValidator.init("abc AND OR something").validate(WORD, AND, OR, WORD); + TokenizerValidator.init("abc AND \"something\" )").validate(WORD, AND, PHRASE, CLOSE); } public void validate(String query) throws SearchTokenizerException { - new SearchValidator(query).validate(); + new TokenizerValidator(query).validate(); } public void validate(String query, SearchQueryToken.Token ... tokens) throws SearchTokenizerException { - SearchValidator sv = new SearchValidator(query); + TokenizerValidator sv = new TokenizerValidator(query); for (SearchQueryToken.Token token : tokens) { sv.addExpected(token); } sv.validate(); } - private static class SearchValidator { + private static class TokenizerValidator { private List<Tuple> validations = new ArrayList<Tuple>(); private boolean log; private final String searchQuery; @@ -450,24 +465,24 @@ public class SearchTokenizerTest { } } - private SearchValidator(String searchQuery) { + private TokenizerValidator(String searchQuery) { this.searchQuery = searchQuery; } - private static SearchValidator init(String searchQuery) { - return new SearchValidator(searchQuery); + private static TokenizerValidator init(String searchQuery) { + return new TokenizerValidator(searchQuery); } @SuppressWarnings("unused") - private SearchValidator enableLogging() { + private TokenizerValidator enableLogging() { log = true; return this; } - private SearchValidator addExpected(SearchQueryToken.Token token, String literal) { + private TokenizerValidator addExpected(SearchQueryToken.Token token, String literal) { validations.add(new Tuple(token, literal)); return this; } - private SearchValidator addExpected(SearchQueryToken.Token ... token) { + private TokenizerValidator addExpected(SearchQueryToken.Token ... token) { for (SearchQueryToken.Token t : token) { validations.add(new Tuple(t)); }
