[olingo-odata4] branch master updated: [OLINGO-1571] Fixed special chars for search

mibo Sat, 13 Aug 2022 00:30:34 -0700

This is an automated email from the ASF dual-hosted git repository.

mibo pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/olingo-odata4.git



The following commit(s) were added to refs/heads/master by this push:
     new 683e471dc [OLINGO-1571] Fixed special chars for search
683e471dc is described below

commit 683e471dc2f06d3f2ca06a7a9c95e1480532fe98
Author: mibo <[email protected]>
AuthorDate: Mon Aug 8 20:06:20 2022 +0200

    [OLINGO-1571] Fixed special chars for search
---
 .../core/uri/parser/search/SearchTokenizer.java    | 87 +++-------------------
 .../uri/parser/search/SearchTokenizerTest.java     | 13 +++-
 2 files changed, 20 insertions(+), 80 deletions(-)

diff --git 
a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
 
b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
index 6f7e01e70..853537d3b 100644
--- 
a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
+++ 
b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java
@@ -124,83 +124,17 @@ public class SearchTokenizer {
           || isOtherDelimsForWord(character);
     }
 
-       /**
-     * <code>
-     * <b>searchPhrase</b> = quotation-mark 1*qchar-no-AMP-DQUOTE 
quotation-mark
-     * <br/><br/>
-     * <b>qchar-no-AMP-DQUOTE</b> = qchar-unescaped / escape ( escape / 
quotation-mark )
-     * <br/><br/>
-     * <b>qchar-unescaped</b> = unreserved / pct-encoded-unescaped / 
other-delims /
-     * ":" / "@" / "/" / "?" / "$" / "'" / "="
-     * <br/><br/>
-     * <b>unreserved</b> = ALPHA / DIGIT / "-" / "." / "_" / "~"
-     * <br/><br/>
-     * <b>escape</b> = "\" / "%5C" ; reverse solidus U+005C
-     * <br/><br/>
-     * <b>pct-encoded-unescaped</b> = "%" ( "0" / "1" / "3" / "4" / "6" / "7" 
/ "8" / "9" / A-to-F ) HEXDIG
-     * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / 
A-to-F )
-     * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" )
-     * <br/><br/>
-     * <b>other-delims</b> = "!" / "(" / ")" / "*" / "+" / "," / ";"
-     * <br/><br/>
-     * <b>quotation-mark</b> = DQUOTE / "%22"
-     * <br/><br/>
-     * <b>ALPHA</b> = %x41-5A / %x61-7A
-     * <br/>
-     * <b>DIGIT</b> = %x30-39
-     * <br/>
-     * <b>DQUOTE</b> = %x22
-     * </code>
-     *
-     * Checks if given <code>character</code> is allowed for a search phrase.
-     * <b>ATTENTION:</b> Escaping and percent encoding is not be validated 
here (and can not be validated on
-     * a single character).<br/>
-     * Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link 
#QUOTATION_MARK} characters this method will
-     * return <code>FALSE</code>.<br/>
-     * <b>Furthermore</b> percent encoded characters are also not validated 
(and can not be validated on
-     * a single character).<br/>
-     * Hence for the <code>%</code> character this method assumeS that it was 
percent encoded and is now decoded
-     * and will return <code>TRUE</code>.<br/>
+         /**
+     * The check for allowed characters in a <code>SearchPhrase</code> assumes 
that
+     * the whole phrase is already percent decoded.
+     * Hence, all characters are allowed besides the double quote 
(<code>"</code>).
      *
      * @param character which is checked
      * @return true if character is allowed for a phrase
      */
     static boolean isAllowedPhrase(final char character) {
-      // the '%' is allowed because it is assumed that it was percent encoded 
and is now decoded
-      return isQCharUnescaped(character) 
-                 || character == '%' 
-                 || Character.isUnicodeIdentifierStart(character);
-    }
-
-    /**
-     * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / 
":" / "@" / "/" / "?" / "$" / "'" / "="
-     * @param character which is checked
-     * @return true if character is allowed
-     */
-    private static boolean isQCharUnescaped(final char character) {
-      return isUnreserved(character)
-          || isOtherDelims(character)
-          || character == ':'
-          || character == '@'
-          || character == '/'
-          || character == '$'
-          || character == '\''
-          || character == '=';
-    }
-
-    /**
-     * other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";"
-     * @param character which is checked
-     * @return true if character is allowed
-     */
-    private static boolean isOtherDelims(final char character) {
-      return character == '!'
-          || character == '('
-          || character == ')'
-          || character == '*'
-          || character == '+'
-          || character == ','
-          || character == ';';
+      return Character.isUnicodeIdentifierStart(character)
+              || character != '"';
     }
 
     /**
@@ -212,7 +146,6 @@ public class SearchTokenizer {
       return character == '!'
           || character == '*'
           || character == '+'
-          || character == ','
           || character == ':'
           || character == '@'
           || character == '/'
@@ -234,7 +167,7 @@ public class SearchTokenizer {
           || character == '<'
           || character == '`';
     }
-    
+
     /**
      * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
      * @param character which is checked
@@ -355,11 +288,11 @@ public class SearchTokenizer {
   }
 
   /**
-   * 
-   * As per the updated abnf 
+   *
+   * As per the updated abnf
    * 
https://github.com/oasis-tcs/odata-abnf/blob/master/abnf/odata-abnf-construction-rules.txt#L332-L356.
    * searchWord   = 1*( ALPHA / DIGIT / COMMA / "." / "-" / pct-encoded )
-   * This includes Unicode characters of categories 
+   * This includes Unicode characters of categories
    * L or N using UTF-8 and percent-encoding.
    */
   private class SearchWordState extends LiteralState {
diff --git 
a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
 
b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
index d8c6a7cc7..cea6d3012 100644
--- 
a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
+++ 
b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java
@@ -6,9 +6,9 @@
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License. You may obtain a copy of the License at
- * 
+ *
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -107,6 +107,13 @@ public class SearchTokenizerTest {
     assertQuery("abc or \"xyz\"").resultsIn(WORD, WORD, PHRASE);
   }
 
+  @Test
+  public void parsePhrase_decoded() throws Exception {
+    assertQuery("\"a & b\"").resultsIn(PHRASE);
+    assertQuery("\" ! # $ % & ' ( ) * + , / : ; = ? @ [ ] 
\"").resultsIn(PHRASE);
+    assertQuery("\" - . < > ^ _ ` { | } ~ \"").resultsIn(PHRASE);
+  }
+
   @Test
   public void parseNot() throws Exception {
     assertQuery("NOT").resultsIn(NOT);
@@ -401,4 +408,4 @@ public class SearchTokenizerTest {
       }
     }
   }
-}
\ No newline at end of file
+}

[olingo-odata4] branch master updated: [OLINGO-1571] Fixed special chars for search

Reply via email to