olingo-odata4 git commit: [OLINGO-568] Added support for escape of escape and quote characters

mibo Mon, 30 Nov 2015 06:16:24 -0800

Repository: olingo-odata4
Updated Branches:
  refs/heads/OLINGO-568_RewrittenGrammar 6dd0a0f3e -> e5ac59079



[OLINGO-568] Added support for escape of escape and quote characters


Project: http://git-wip-us.apache.org/repos/asf/olingo-odata4/repo
Commit: http://git-wip-us.apache.org/repos/asf/olingo-odata4/commit/e5ac5907
Tree: http://git-wip-us.apache.org/repos/asf/olingo-odata4/tree/e5ac5907
Diff: http://git-wip-us.apache.org/repos/asf/olingo-odata4/diff/e5ac5907

Branch: refs/heads/OLINGO-568_RewrittenGrammar
Commit: e5ac590794148f5195c28cb055f26b7b4b3f5027
Parents: 6dd0a0f
Author: Michael Bolz <[email protected]>
Authored: Mon Nov 30 15:15:00 2015 +0100
Committer: Michael Bolz <[email protected]>
Committed: Mon Nov 30 15:15:00 2015 +0100

----------------------------------------------------------------------
 .../core/uri/parser/search/SearchTokenizer.java | 189 +++++++++++--------
 .../uri/parser/search/SearchTokenizerTest.java  |   8 +
 .../core/uri/antlr/TestFullResourcePath.java    |  16 +-
 .../core/uri/testutil/TestUriValidator.java     |   5 +
 4 files changed, 137 insertions(+), 81 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
----------------------------------------------------------------------
diff --git 
a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
 
b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
index 5c42e6d..2146438 100644
--- 
a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
+++ 
b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
@@ -37,6 +37,13 @@ import java.util.List;
  * searchWord = 1*ALPHA ; Actually: any character from the Unicode categories 
L or Nl,
  * ; but not the words AND, OR, and NOT
  * </code>
+ *
+ * <b>ATTENTION:</b> For a <code>searchPhrase</code> the percent encoding is 
not supported by the
+ * <code>SearchTokenizer</code>.<br/>
+ * This was a decision based on that the 
<code>org.apache.olingo.server.core.uri.parser.Parser</code>
+ * already handles in his <code>parseUri</code> method each query as 
<code>percent decoded</code> strings (see
+ * line <i>177ff</i> (<code>for (RawUri.QueryOption option : 
uri.queryOptionListDecoded)</code>).
+ *
  */
 public class SearchTokenizer {
 
@@ -45,6 +52,7 @@ public class SearchTokenizer {
     private boolean finished = false;
 
     protected static final char QUOTATION_MARK = '\"';
+    protected static final char PHRASE_ESCAPE_CHAR = '\\';
     protected static final char CHAR_N = 'N';
     protected static final char CHAR_O = 'O';
     protected static final char CHAR_T = 'T';
@@ -126,45 +134,59 @@ public class SearchTokenizer {
     }
 
     /**
-     * searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark
-     *
-     * qchar-no-AMP-DQUOTE = qchar-unescaped / escape ( escape / 
quotation-mark )
-     *
-     * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / 
":" / "@" / "/" / "?" / "$" / "'" / "="
-     *
-     * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
-     *
-     * escape = "\" / "%5C" ; reverse solidus U+005C
-     *
-     * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / 
"9" / A-to-F ) HEXDIG
+     * <code>
+     * <b>searchPhrase</b> = quotation-mark 1*qchar-no-AMP-DQUOTE 
quotation-mark
+     * <br/><br/>
+     * <b>qchar-no-AMP-DQUOTE</b> = qchar-unescaped / escape ( escape / 
quotation-mark )
+     * <br/><br/>
+     * <b>qchar-unescaped</b> = unreserved / pct-encoded-unescaped / 
other-delims /
+     * ":" / "@" / "/" / "?" / "$" / "'" / "="
+     * <br/><br/>
+     * <b>unreserved</b> = ALPHA / DIGIT / "-" / "." / "_" / "~"
+     * <br/><br/>
+     * <b>escape</b> = "\" / "%5C" ; reverse solidus U+005C
+     * <br/><br/>
+     * <b>pct-encoded-unescaped</b> = "%" ( "0" / "1" / "3" / "4" / "6" / "7" 
/ "8" / "9" / A-to-F ) HEXDIG
      * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / 
A-to-F )
      * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
+     * <br/><br/>
+     * <b>other-delims</b> = "!" / "(" / ")" / "*" / "+" / "," / ";"
+     * <br/><br/>
+     * <b>quotation-mark</b> = DQUOTE / "%22"
+     * <br/><br/>
+     * <b>ALPHA</b> = %x41-5A / %x61-7A
+     * <br/>
+     * <b>DIGIT</b> = %x30-39
+     * <br/>
+     * <b>DQUOTE</b> = %x22
+     * </code>
      *
-     * other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";"
-     *
-     * quotation-mark = DQUOTE / "%22"
-     *
-     * ALPHA = %x41-5A / %x61-7A
-     * DIGIT = %x30-39
-     * DQUOTE = %x22
+     * Checks if given <code>character</code> is allowed for a search phrase.
+     * <b>ATTENTION:</b> Escaping and percent encoding is not be validated 
here (and can not be validated on
+     * a single character).<br/>
+     * Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link 
#QUOTATION_MARK} characters this method will
+     * return <code>FALSE</code>.<br/>
+     * <b>Furthermore</b> percent encoded characters are also not validated 
(and can not be validated on
+     * a single character).<br/>
+     * Hence for the <code>%</code> character this method will return 
<code>FALSE</code>.<br/>
      *
      * @param character which is checked
      * @return true if character is allowed for a phrase
      */
     static boolean isAllowedPhrase(final char character) {
       // FIXME mibo: check missing
-      return isQCharUnescaped(character) || isEscaped(character);
+      return isQCharUnescaped(character);// || isEscaped(character);
     }
 
-    /**
-     * escape = "\" / "%5C" ; reverse solidus U+005C
-     * @param character which is checked
-     * @return true if character is allowed
-     */
-    private static boolean isEscaped(char character) {
-      // TODO: mibo(151117): check how to implement
-      return false;
-    }
+//    /**
+//     * escape = "\" / "%5C" ; reverse solidus U+005C
+//     * @param character which is checked
+//     * @return true if character is allowed
+//     */
+//    private static boolean isEscaped(char character) {
+//      // TODO: mibo(151130): is checked in SearchPhraseState
+//      return false;
+//    }
 
     /**
      * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / 
":" / "@" / "/" / "?" / "$" / "'" / "="
@@ -173,14 +195,14 @@ public class SearchTokenizer {
      */
     private static boolean isQCharUnescaped(char character) {
       return isUnreserved(character)
-              || isPctEncodedUnescaped(character)
-              || isOtherDelims(character)
-              || character == ':'
-              || character == '@'
-              || character == '/'
-              || character == '$'
-              || character == '\''
-              || character == '=';
+//          || isPctEncodedUnescaped(character)
+          || isOtherDelims(character)
+          || character == ':'
+          || character == '@'
+          || character == '/'
+          || character == '$'
+          || character == '\''
+          || character == '=';
     }
 
     /**
@@ -190,43 +212,43 @@ public class SearchTokenizer {
      */
     private static boolean isOtherDelims(char character) {
       return character == '!'
-              || character == '('
-              || character == ')'
-              || character == '*'
-              || character == '+'
-              || character == ','
-              || character == ';';
-    }
-
-    /**
-     * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / 
"9" / A-to-F ) HEXDIG
-     * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / 
A-to-F )
-     * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
-     *
-     * HEXDIG = DIGIT / A-to-F
-     *
-     * @param character which is checked
-     * @return true if character is allowed
-     */
-    private static boolean isPctEncodedUnescaped(char character) {
-      String hex = Integer.toHexString(character);
-      char aschar[] = hex.toCharArray();
-      if(aschar[0] == '%') {
-        if(aschar[1] == '2') {
-          return aschar[2] != '2' && isHexDigit(aschar[2]);
-        } else if(aschar[1] == '5') {
-          return aschar[2] != 'C' && isHexDigit(aschar[2]);
-        } else if(isHexDigit(aschar[1])) {
-          return isHexDigit(aschar[2]);
-        }
-      }
-      return false;
-    }
-
-    private static boolean isHexDigit(char character) {
-      return 'A' <= character && character <= 'F' // case A..F
-              || '0' <= character && character <= '9'; // case 0..9
-    }
+          || character == '('
+          || character == ')'
+          || character == '*'
+          || character == '+'
+          || character == ','
+          || character == ';';
+    }
+
+//    /**
+//     * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" 
/ "9" / A-to-F ) HEXDIG
+//     * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / 
A-to-F )
+//     * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
+//     *
+//     * HEXDIG = DIGIT / A-to-F
+//     *
+//     * @param character which is checked
+//     * @return true if character is allowed
+//     */
+//    private static boolean isPctEncodedUnescaped(char character) {
+//      String hex = Integer.toHexString(character);
+//      char aschar[] = hex.toCharArray();
+//      if(aschar[0] == '%') {
+//        if(aschar[1] == '2') {
+//          return aschar[2] != '2' && isHexDigit(aschar[2]);
+//        } else if(aschar[1] == '5') {
+//          return aschar[2] != 'C' && isHexDigit(aschar[2]);
+//        } else if(isHexDigit(aschar[1])) {
+//          return isHexDigit(aschar[2]);
+//        }
+//      }
+//      return false;
+//    }
+
+//    private static boolean isHexDigit(char character) {
+//      return 'A' <= character && character <= 'F' // case A..F
+//          || '0' <= character && character <= '9'; // case 0..9
+//    }
 
     /**
      * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
@@ -235,10 +257,10 @@ public class SearchTokenizer {
      */
     private static boolean isUnreserved(char character) {
       return isAlphaOrDigit(character)
-              || character == '-'
-              || character == '.'
-              || character == '_'
-              || character == '~';
+          || character == '-'
+          || character == '.'
+          || character == '_'
+          || character == '~';
     }
 
     /**
@@ -256,8 +278,6 @@ public class SearchTokenizer {
     // BWS = *( SP / HTAB / "%20" / "%09" ) ; "bad" whitespace
     // RWS = 1*( SP / HTAB / "%20" / "%09" ) ; "required" whitespace
     static boolean isWhitespace(final char character) {
-      // ( SP / HTAB / "%20" / "%09" )
-      // TODO mibo: add missing whitespaces
       return character == ' ' || character == '\t';
     }
 
@@ -400,6 +420,7 @@ public class SearchTokenizer {
 
   private class SearchPhraseState extends LiteralState {
     private boolean closed = false;
+    private boolean escaped = false;
     public SearchPhraseState(char c) throws SearchTokenizerException {
       super(Token.PHRASE, c);
       if (c != QUOTATION_MARK) {
@@ -416,6 +437,16 @@ public class SearchTokenizer {
         } else if (isWhitespace(c)) {
           return new RwsState();
         }
+      } else if(escaped) {
+        escaped = false;
+        if(c == QUOTATION_MARK || c == PHRASE_ESCAPE_CHAR) {
+          return allowed(c);
+        } else {
+          return forbidden(c);
+        }
+      } else if(c == PHRASE_ESCAPE_CHAR) {
+        escaped = true;
+        return this;
       } else if (isAllowedPhrase(c)) {
         return allowed(c);
       } else if (isWhitespace(c)) {

http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
----------------------------------------------------------------------
diff --git 
a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
 
b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
index 2340f37..46c9290 100644
--- 
a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
+++ 
b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
@@ -250,6 +250,14 @@ public class SearchTokenizerTest {
   @Test
   public void characterInPhrase() throws Exception {
     assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE);
+    //escaped characters
+    assertQuery("\"\\\"123\" OR \"\\\\abc\"").resultsIn(new 
Validator.Tuple(PHRASE, "\"\"123\""),
+        new Validator.Tuple(OR), new Validator.Tuple(PHRASE, "\"\\abc\""));
+    assertQuery("\"\\\"1\\\\23\"").resultsIn(new Validator.Tuple(PHRASE, 
"\"\"1\\23\""));
+    // exceptions
+    
assertQuery("\"\\\"1\\\\").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
+    
assertQuery("\"1\\\"").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE);
+    
assertQuery("\"1\\23\"").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
----------------------------------------------------------------------
diff --git 
a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
 
b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
index 9f66d66..3c02003 100644
--- 
a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
+++ 
b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java
@@ -45,6 +45,7 @@ import 
org.apache.olingo.server.core.uri.parser.UriParserException;
 import org.apache.olingo.server.core.uri.parser.UriParserSemanticException;
 import 
org.apache.olingo.server.core.uri.parser.UriParserSemanticException.MessageKeys;
 import org.apache.olingo.server.core.uri.parser.UriParserSyntaxException;
+import org.apache.olingo.server.core.uri.parser.search.SearchParserException;
 import org.apache.olingo.server.core.uri.testutil.FilterValidator;
 import org.apache.olingo.server.core.uri.testutil.TestUriValidator;
 import org.apache.olingo.server.core.uri.validator.UriValidationException;
@@ -5428,9 +5429,7 @@ public class TestFullResourcePath {
   }
 
   @Test
-  @Ignore("$search currently not implemented")
   public void testSearch() throws Exception {
-
     testUri.run("ESTwoKeyNav", "$search=abc");
     testUri.run("ESTwoKeyNav", "$search=NOT abc");
 
@@ -5462,6 +5461,19 @@ public class TestFullResourcePath {
     testUri.run("ESTwoKeyNav", "$search=(abc AND  def)       ghi ");
     testUri.run("ESTwoKeyNav", "$search=abc AND (def    OR  ghi)");
     testUri.run("ESTwoKeyNav", "$search=abc AND (def        ghi)");
+
+    // escaped characters
+    testUri.run("ESTwoKeyNav", "$search=\"abc\"");
+    testUri.run("ESTwoKeyNav", "$search=\"a\\\"bc\"");
+    testUri.run("ESTwoKeyNav", "$search=%22abc%22");
+    testUri.run("ESTwoKeyNav", "$search=%22a%5C%22bc%22");
+    testUri.run("ESTwoKeyNav", "$search=%22a%5C%5Cbc%22");
+
+    // wrong escaped characters
+    testUri.runEx("ESTwoKeyNav", "$search=%22a%22bc%22")
+        
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
+    testUri.runEx("ESTwoKeyNav", "$search=%22a%5Cbc%22")
+        
.isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java
----------------------------------------------------------------------
diff --git 
a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java
 
b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java
index 6a2e5b4..0d5fb4a 100644
--- 
a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java
+++ 
b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java
@@ -176,6 +176,11 @@ public class TestUriValidator implements TestValidator {
     }
   }
 
+  public TestUriValidator isExceptionMessage(final 
ODataLibraryException.MessageKey messageKey) {
+    assertEquals(messageKey, exception.getMessageKey());
+    return this;
+  }
+
   public TestUriValidator isExSyntax(final 
UriParserSyntaxException.MessageKeys messageKey) {
     assertEquals(UriParserSyntaxException.class, exception.getClass());
     assertEquals(messageKey, exception.getMessageKey());

olingo-odata4 git commit: [OLINGO-568] Added support for escape of escape and quote characters

Reply via email to