[OLINGO-568] More tests and fixes for Tokenizer

Project: http://git-wip-us.apache.org/repos/asf/olingo-odata4/repo
Commit: http://git-wip-us.apache.org/repos/asf/olingo-odata4/commit/ca7059c7
Tree: http://git-wip-us.apache.org/repos/asf/olingo-odata4/tree/ca7059c7
Diff: http://git-wip-us.apache.org/repos/asf/olingo-odata4/diff/ca7059c7

Branch: refs/heads/OLINGO-811_CountForExpand
Commit: ca7059c778ac57c9d1ec25c9bb3e9207e4775889
Parents: 326e177
Author: mibo <[email protected]>
Authored: Wed Nov 18 20:14:50 2015 +0100
Committer: mibo <[email protected]>
Committed: Wed Nov 18 20:14:50 2015 +0100

----------------------------------------------------------------------
 .../core/uri/parser/search/SearchTokenizer.java | 64 +++++++++++--
 .../parser/search/SearchTokenizerException.java | 11 ++-
 .../uri/parser/search/SearchTokenizerTest.java  | 99 +++++++++++---------
 3 files changed, 122 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/ca7059c7/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
----------------------------------------------------------------------
diff --git 
a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
 
b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
index 9d37312..f5ee8f7 100644
--- 
a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
+++ 
b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
@@ -65,14 +65,23 @@ public class SearchTokenizer {
     }
 
     public State forbidden(char c) throws SearchTokenizerException {
-      throw new SearchTokenizerException("Forbidden character for " + 
this.getClass().getName() + "->" + c,
+      throw new SearchTokenizerException("Forbidden character in state " + 
this.getToken() + "->" + c,
           SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER, "" + c);
     }
 
+    public State invalid() throws SearchTokenizerException {
+      throw new SearchTokenizerException("Token " + this.getToken() + " is in 
invalid state ",
+          SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
+    }
+
     public State finish() {
       this.finished = true;
       return this;
     }
+    public State finishAs(Token token) {
+      this.finished = true;
+      return changeToken(token);
+    }
 
     public boolean isFinished() {
       return finished;
@@ -86,6 +95,11 @@ public class SearchTokenizer {
       return this;
     }
 
+    protected State changeToken(Token token) {
+      this.token = token;
+      return this;
+    }
+
     static boolean isAllowedWord(final char character) {
       // TODO mibo: add missing allowed characters
       int type = Character.getType(character);
@@ -240,7 +254,7 @@ public class SearchTokenizer {
 
     @Override
     public String toString() {
-      return this.getToken().toString() + "=>{" + getLiteral() + "}";
+      return this.getToken() + "=>{" + getLiteral() + "}";
     }
   }
 
@@ -361,12 +375,28 @@ public class SearchTokenizer {
     }
 
     @Override
+    public State finish() {
+      String tmpLiteral = literal.toString();
+      if(tmpLiteral.length() == 3) {
+        if(Token.AND.name().equals(tmpLiteral)) {
+          return finishAs(Token.AND);
+        } else if(Token.NOT.name().equals(tmpLiteral)) {
+          return finishAs(Token.NOT);
+        }
+      } else if(tmpLiteral.length() == 2 && 
Token.OR.name().equals(tmpLiteral)) {
+        return finishAs(Token.OR);
+      }
+      return super.finish();
+    }
+
+    @Override
     public State close() {
       return finish();
     }
   }
 
   private class SearchPhraseState extends LiteralState {
+    private boolean closed = false;
     public SearchPhraseState(char c) throws SearchTokenizerException {
       super(Token.PHRASE, c);
       if (c != QUOTATION_MARK) {
@@ -376,19 +406,34 @@ public class SearchTokenizer {
 
     @Override
     public State nextChar(char c) throws SearchTokenizerException {
-      if (isAllowedPhrase(c)) {
+      if(closed) {
+        finish();
+        if (c == CHAR_CLOSE) {
+          return new CloseState();
+        } else if (isWhitespace(c)) {
+          return new RwsState();
+        }
+      } else if (isAllowedPhrase(c)) {
         return allowed(c);
       } else if (isWhitespace(c)) {
         return allowed(c);
       } else if (c == QUOTATION_MARK) {
-        finish();
-        allowed(c);
-        return new SearchExpressionState();
-      } else if (isFinished()) {
-        return new SearchExpressionState().init(c);
+        if(literal.length() == 1) {
+          return invalid();
+        }
+        closed = true;
+        return allowed(c);
       }
       return forbidden(c);
     }
+
+    @Override
+    public State close() {
+      if(closed) {
+        return finish();
+      }
+      return super.close();
+    }
   }
 
   private class OpenState extends State {
@@ -564,6 +609,9 @@ public class SearchTokenizer {
 
     if (state.close().isFinished()) {
       states.add(state);
+    } else {
+      throw new SearchTokenizerException("Last parsed state '" + 
state.toString() + "' is not finished.",
+          SearchTokenizerException.MessageKeys.NOT_FINISHED_QUERY);
     }
 
     return states;

http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/ca7059c7/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java
----------------------------------------------------------------------
diff --git 
a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java
 
b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java
index fb20efe..abdf84c 100644
--- 
a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java
+++ 
b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerException.java
@@ -24,9 +24,16 @@ public class SearchTokenizerException extends 
UriParserSyntaxException {
 
   private static final long serialVersionUID = -8295456415309640166L;
 
-  public static enum MessageKeys implements MessageKey {
+  public enum MessageKeys implements MessageKey {
     /** parameter: character */
-    FORBIDDEN_CHARACTER, 
+    FORBIDDEN_CHARACTER,
+    /** parameter: TOKEN */
+    NOT_EXPECTED_TOKEN,
+    /** parameter: - */
+    NOT_FINISHED_QUERY,
+    /** parameter: - */
+    INVALID_TOKEN_STATE,
+    /** parameter: - */
     ALREADY_FINISHED;
 
     @Override

http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/ca7059c7/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
----------------------------------------------------------------------
diff --git 
a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
 
b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
index 8c2a05e..2fdfe1e 100644
--- 
a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
+++ 
b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
@@ -91,7 +91,7 @@ public class SearchTokenizerTest {
     SearchTokenizer tokenizer = new SearchTokenizer();
     List<SearchQueryToken> result;
 
-    SearchValidator.init("abc AND \"x-y_z\" AND olingo").validate();
+    TokenizerValidator.init("abc AND \"x-y_z\" AND olingo").validate();
 
     //
     result = tokenizer.tokenize("\"abc\"");
@@ -113,7 +113,7 @@ public class SearchTokenizerTest {
     Assert.assertEquals(PHRASE, result.get(0).getToken());
     Assert.assertEquals("\"99_88.\"", result.get(0).getLiteral());
 
-    SearchValidator.init("abc or \"xyz\"").addExpected(WORD, WORD, 
PHRASE).validate();
+    TokenizerValidator.init("abc or \"xyz\"").validate(WORD, WORD, PHRASE);
   }
 
   /**
@@ -124,22 +124,22 @@ public class SearchTokenizerTest {
   @Ignore("Test must be moved to SearchParserTest and 
SearchParserAndTokenizerTest")
   public void parsePhraseAbnfTestcases() throws Exception {
     //    <TestCase Name="5.1.7 Search - simple phrase" Rule="queryOptions">
-    SearchValidator.init("\"blue%20green\"").validate();
+    TokenizerValidator.init("\"blue%20green\"").validate();
     //    <TestCase Name="5.1.7 Search - simple phrase" Rule="queryOptions">
-    SearchValidator.init("\"blue%20green%22").validate();
+    TokenizerValidator.init("\"blue%20green%22").validate();
     //    <TestCase Name="5.1.7 Search - phrase with escaped double-quote" 
Rule="queryOptions">
     //    <Input>$search="blue\"green"</Input>
-    SearchValidator.init("\"blue\\\"green\"").validate();
+    TokenizerValidator.init("\"blue\\\"green\"").validate();
 
     //    <TestCase Name="5.1.7 Search - phrase with escaped backslash" 
Rule="queryOptions">
     //    <Input>$search="blue\\green"</Input>
-    SearchValidator.init("\"blue\\\\green\"").validate();
+    TokenizerValidator.init("\"blue\\\\green\"").validate();
 
     //    <TestCase Name="5.1.7 Search - phrase with unescaped double-quote" 
Rule="queryOptions" FailAt="14">
-    SearchValidator.init("\"blue\"green\"").validate();
+    TokenizerValidator.init("\"blue\"green\"").validate();
 
     //    <TestCase Name="5.1.7 Search - phrase with unescaped double-quote" 
Rule="queryOptions" FailAt="16">
-    SearchValidator.init("\"blue%22green\"").validate();
+    TokenizerValidator.init("\"blue%22green\"").validate();
 
 //    <TestCase Name="5.1.7 Search - implicit AND" Rule="queryOptions">
 //    <Input>$search=blue green</Input>
@@ -160,10 +160,10 @@ public class SearchTokenizerTest {
     Assert.assertEquals(NOT, result.get(0).getToken());
     Assert.assertEquals(WORD, result.get(1).getToken());
 
-    SearchValidator.init("not abc").addExpected(WORD, WORD).validate();
-    SearchValidator.init("NOT    abc").addExpected(NOT, WORD).validate();
-    SearchValidator.init("NOT    \"abc\"").addExpected(NOT, PHRASE).validate();
-    SearchValidator.init("NOT (sdf)").validate(SearchTokenizerException.class);
+    TokenizerValidator.init("not abc").addExpected(WORD, WORD).validate();
+    TokenizerValidator.init("NOT    abc").addExpected(NOT, WORD).validate();
+    TokenizerValidator.init("NOT    \"abc\"").addExpected(NOT, 
PHRASE).validate();
+    TokenizerValidator.init("NOT 
(sdf)").validate(SearchTokenizerException.class);
   }
 
   @Test
@@ -187,16 +187,16 @@ public class SearchTokenizerTest {
     Assert.assertEquals(OR, result.get(3).getToken());
     Assert.assertEquals(WORD, result.get(4).getToken());
 
-    SearchValidator.init("abc or xyz").addExpected(WORD, WORD, 
WORD).validate();
+    TokenizerValidator.init("abc or xyz").addExpected(WORD, WORD, 
WORD).validate();
   }
 
   @Test
   public void parseImplicitAnd() throws SearchTokenizerException {
-    SearchValidator.init("a b").addExpected(WORD, WORD).validate();
-    SearchValidator.init("a b OR c").addExpected(WORD, WORD, OR, 
WORD).validate();
-    SearchValidator.init("a bc OR c").addExpected(WORD, WORD, OR, 
WORD).validate();
-    SearchValidator.init("a bc c").addExpected(WORD, WORD, WORD).validate();
-    SearchValidator.init("(a OR x) bc c").addExpected(OPEN, WORD, OR, WORD, 
CLOSE, WORD, WORD).validate();
+    TokenizerValidator.init("a b").addExpected(WORD, WORD).validate();
+    TokenizerValidator.init("a b OR c").addExpected(WORD, WORD, OR, 
WORD).validate();
+    TokenizerValidator.init("a bc OR c").addExpected(WORD, WORD, OR, 
WORD).validate();
+    TokenizerValidator.init("a bc c").addExpected(WORD, WORD, WORD).validate();
+    TokenizerValidator.init("(a OR x) bc c").addExpected(OPEN, WORD, OR, WORD, 
CLOSE, WORD, WORD).validate();
   }
 
   @Test
@@ -261,7 +261,7 @@ public class SearchTokenizerTest {
     Assert.assertEquals(OR, result.get(3).getToken());
     Assert.assertEquals(WORD, result.get(4).getToken());
 
-    SearchValidator.init("abc AND ANDsomething")
+    TokenizerValidator.init("abc AND ANDsomething")
         .addExpected(WORD, AND, WORD).validate();
   }
 
@@ -282,7 +282,7 @@ public class SearchTokenizerTest {
     Assert.assertEquals(OR, it.next().getToken());
     Assert.assertEquals(WORD, it.next().getToken());
 
-    SearchValidator.init("foo AND bar OR foo AND baz OR that AND bar OR that 
AND baz")
+    TokenizerValidator.init("foo AND bar OR foo AND baz OR that AND bar OR 
that AND baz")
         .addExpected(WORD, "foo").addExpected(AND)
         .addExpected(WORD, "bar").addExpected(OR)
         .addExpected(WORD, "foo").addExpected(AND)
@@ -294,7 +294,7 @@ public class SearchTokenizerTest {
         .validate();
 
 
-    SearchValidator.init("(foo OR that) AND (bar OR baz)")
+    TokenizerValidator.init("(foo OR that) AND (bar OR baz)")
         .addExpected(OPEN)
         .addExpected(WORD, "foo").addExpected(OR).addExpected(WORD, "that")
         .addExpected(CLOSE).addExpected(AND).addExpected(OPEN)
@@ -325,19 +325,19 @@ public class SearchTokenizerTest {
     Assert.assertEquals(AND, it.next().getToken());
     Assert.assertEquals(WORD, it.next().getToken());
 
-    SearchValidator.init("abc AND ANDsomething")
+    TokenizerValidator.init("abc AND ANDsomething")
         .addExpected(WORD, AND, WORD).validate();
 
-    SearchValidator.init("abc ANDsomething")
+    TokenizerValidator.init("abc ANDsomething")
         .addExpected(WORD, WORD).validate();
 
-    SearchValidator.init("abc ORsomething")
+    TokenizerValidator.init("abc ORsomething")
         .addExpected(WORD, WORD).validate();
 
-    SearchValidator.init("abc OR orsomething")
+    TokenizerValidator.init("abc OR orsomething")
         .addExpected(WORD, OR, WORD).validate();
 
-    SearchValidator.init("abc OR ORsomething")
+    TokenizerValidator.init("abc OR ORsomething")
         .addExpected(WORD, OR, WORD).validate();
   }
 
@@ -345,7 +345,7 @@ public class SearchTokenizerTest {
   @Test
   public void unicodeInWords() throws Exception {
     // Ll, Lm, Lo, Lt, Lu, Nl
-    SearchValidator.init("abc OR 
Ll\u01E3Lm\u02B5Lo\u1BE4Lt\u01F2Lu\u03D3Nl\u216F")
+    TokenizerValidator.init("abc OR 
Ll\u01E3Lm\u02B5Lo\u1BE4Lt\u01F2Lu\u03D3Nl\u216F")
         .addExpected(WORD, OR, WORD).validate();
   }
 
@@ -369,7 +369,7 @@ public class SearchTokenizerTest {
    */
   @Test
   public void characterInPhrase() throws Exception {
-    SearchValidator.init("\"123\" OR \"ALPHA-._~\"")
+    TokenizerValidator.init("\"123\" OR \"ALPHA-._~\"")
         .addExpected(PHRASE, OR, PHRASE).validate();
   }
 
@@ -395,7 +395,7 @@ public class SearchTokenizerTest {
     validate("abc     def     ghi");
 
     // mixed not
-    SearchValidator.init("    abc         def AND     ghi").validate(WORD, 
WORD, AND, WORD);
+    TokenizerValidator.init("    abc         def AND     ghi").validate(WORD, 
WORD, AND, WORD);
     validate("NOT abc  NOT    def  OR NOT ghi", NOT, WORD, NOT, WORD, OR, NOT, 
WORD);
     validate("    abc         def     NOT ghi", WORD, WORD, NOT, WORD);
 
@@ -409,26 +409,41 @@ public class SearchTokenizerTest {
   }
 
   @Test
-  public void parseInvalid() throws SearchTokenizerException {
-    SearchValidator.init("abc AND OR something").validate();
-    SearchValidator.init("abc AND \"something\" )").validate();
+  public void tokenizeInvalid() throws SearchTokenizerException {
     //
-    SearchValidator.init("(  abc AND) OR 
something").validate(SearchTokenizerException.class);
+    TokenizerValidator.init("(  abc AND) OR 
something").validate(SearchTokenizerException.class);
+
+    
TokenizerValidator.init("\"phrase\"word").validate(SearchTokenizerException.class);
+    TokenizerValidator.init("\"p\"w").validate(SearchTokenizerException.class);
+    TokenizerValidator.init("\"\"").validate(SearchTokenizerException.class);
+  }
+
+  @Test
+  public void tokenizeInvalidQueryForParser() throws SearchTokenizerException {
+//    TokenizerValidator.init("NOT").validate(NOT);
+
+    TokenizerValidator.init("AND").validate(AND);
+    TokenizerValidator.init("OR").validate(OR);
+    TokenizerValidator.init("NOT AND").validate(NOT, AND);
+    TokenizerValidator.init("NOT OR").validate(NOT, OR);
+    TokenizerValidator.init("NOT NOT").validate(NOT, NOT);
+    TokenizerValidator.init("abc AND OR something").validate(WORD, AND, OR, 
WORD);
+    TokenizerValidator.init("abc AND \"something\" )").validate(WORD, AND, 
PHRASE, CLOSE);
   }
 
   public void validate(String query) throws SearchTokenizerException {
-    new SearchValidator(query).validate();
+    new TokenizerValidator(query).validate();
   }
 
   public void validate(String query, SearchQueryToken.Token ... tokens) throws 
SearchTokenizerException {
-    SearchValidator sv = new SearchValidator(query);
+    TokenizerValidator sv = new TokenizerValidator(query);
     for (SearchQueryToken.Token token : tokens) {
       sv.addExpected(token);
     }
     sv.validate();
   }
 
-  private static class SearchValidator {
+  private static class TokenizerValidator {
     private List<Tuple> validations = new ArrayList<Tuple>();
     private boolean log;
     private final String searchQuery;
@@ -450,24 +465,24 @@ public class SearchTokenizerTest {
       }
     }
 
-    private SearchValidator(String searchQuery) {
+    private TokenizerValidator(String searchQuery) {
       this.searchQuery = searchQuery;
     }
 
-    private static SearchValidator init(String searchQuery) {
-      return new SearchValidator(searchQuery);
+    private static TokenizerValidator init(String searchQuery) {
+      return new TokenizerValidator(searchQuery);
     }
     
     @SuppressWarnings("unused")
-    private SearchValidator enableLogging() {
+    private TokenizerValidator enableLogging() {
       log = true;
       return this;
     }
-    private SearchValidator addExpected(SearchQueryToken.Token token, String 
literal) {
+    private TokenizerValidator addExpected(SearchQueryToken.Token token, 
String literal) {
       validations.add(new Tuple(token, literal));
       return this;
     }
-    private SearchValidator addExpected(SearchQueryToken.Token ... token) {
+    private TokenizerValidator addExpected(SearchQueryToken.Token ... token) {
       for (SearchQueryToken.Token t : token) {
         validations.add(new Tuple(t));
       }

Reply via email to