Repository: olingo-odata4 Updated Branches: refs/heads/OLINGO-568_RewrittenGrammar 6dd0a0f3e -> e5ac59079
[OLINGO-568] Added support for escape of escape and quote characters Project: http://git-wip-us.apache.org/repos/asf/olingo-odata4/repo Commit: http://git-wip-us.apache.org/repos/asf/olingo-odata4/commit/e5ac5907 Tree: http://git-wip-us.apache.org/repos/asf/olingo-odata4/tree/e5ac5907 Diff: http://git-wip-us.apache.org/repos/asf/olingo-odata4/diff/e5ac5907 Branch: refs/heads/OLINGO-568_RewrittenGrammar Commit: e5ac590794148f5195c28cb055f26b7b4b3f5027 Parents: 6dd0a0f Author: Michael Bolz <[email protected]> Authored: Mon Nov 30 15:15:00 2015 +0100 Committer: Michael Bolz <[email protected]> Committed: Mon Nov 30 15:15:00 2015 +0100 ---------------------------------------------------------------------- .../core/uri/parser/search/SearchTokenizer.java | 189 +++++++++++-------- .../uri/parser/search/SearchTokenizerTest.java | 8 + .../core/uri/antlr/TestFullResourcePath.java | 16 +- .../core/uri/testutil/TestUriValidator.java | 5 + 4 files changed, 137 insertions(+), 81 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java ---------------------------------------------------------------------- diff --git a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java index 5c42e6d..2146438 100644 --- a/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java +++ b/lib/server-core/src/main/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizer.java @@ -37,6 +37,13 @@ import java.util.List; * searchWord = 1*ALPHA ; Actually: any character from the Unicode categories L or Nl, * ; but not the words AND, OR, and NOT * </code> + * + * <b>ATTENTION:</b> For a <code>searchPhrase</code> the percent encoding is not supported by the + * <code>SearchTokenizer</code>.<br/> + * This was a decision based on that the <code>org.apache.olingo.server.core.uri.parser.Parser</code> + * already handles in his <code>parseUri</code> method each query as <code>percent decoded</code> strings (see + * line <i>177ff</i> (<code>for (RawUri.QueryOption option : uri.queryOptionListDecoded)</code>). + * */ public class SearchTokenizer { @@ -45,6 +52,7 @@ public class SearchTokenizer { private boolean finished = false; protected static final char QUOTATION_MARK = '\"'; + protected static final char PHRASE_ESCAPE_CHAR = '\\'; protected static final char CHAR_N = 'N'; protected static final char CHAR_O = 'O'; protected static final char CHAR_T = 'T'; @@ -126,45 +134,59 @@ public class SearchTokenizer { } /** - * searchPhrase = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark - * - * qchar-no-AMP-DQUOTE = qchar-unescaped / escape ( escape / quotation-mark ) - * - * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "=" - * - * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" - * - * escape = "\" / "%5C" ; reverse solidus U+005C - * - * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG + * <code> + * <b>searchPhrase</b> = quotation-mark 1*qchar-no-AMP-DQUOTE quotation-mark + * <br/><br/> + * <b>qchar-no-AMP-DQUOTE</b> = qchar-unescaped / escape ( escape / quotation-mark ) + * <br/><br/> + * <b>qchar-unescaped</b> = unreserved / pct-encoded-unescaped / other-delims / + * ":" / "@" / "/" / "?" / "$" / "'" / "=" + * <br/><br/> + * <b>unreserved</b> = ALPHA / DIGIT / "-" / "." / "_" / "~" + * <br/><br/> + * <b>escape</b> = "\" / "%5C" ; reverse solidus U+005C + * <br/><br/> + * <b>pct-encoded-unescaped</b> = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F ) * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" ) + * <br/><br/> + * <b>other-delims</b> = "!" / "(" / ")" / "*" / "+" / "," / ";" + * <br/><br/> + * <b>quotation-mark</b> = DQUOTE / "%22" + * <br/><br/> + * <b>ALPHA</b> = %x41-5A / %x61-7A + * <br/> + * <b>DIGIT</b> = %x30-39 + * <br/> + * <b>DQUOTE</b> = %x22 + * </code> * - * other-delims = "!" / "(" / ")" / "*" / "+" / "," / ";" - * - * quotation-mark = DQUOTE / "%22" - * - * ALPHA = %x41-5A / %x61-7A - * DIGIT = %x30-39 - * DQUOTE = %x22 + * Checks if given <code>character</code> is allowed for a search phrase. + * <b>ATTENTION:</b> Escaping and percent encoding is not be validated here (and can not be validated on + * a single character).<br/> + * Hence for the {@link #PHRASE_ESCAPE_CHAR} and the {@link #QUOTATION_MARK} characters this method will + * return <code>FALSE</code>.<br/> + * <b>Furthermore</b> percent encoded characters are also not validated (and can not be validated on + * a single character).<br/> + * Hence for the <code>%</code> character this method will return <code>FALSE</code>.<br/> * * @param character which is checked * @return true if character is allowed for a phrase */ static boolean isAllowedPhrase(final char character) { // FIXME mibo: check missing - return isQCharUnescaped(character) || isEscaped(character); + return isQCharUnescaped(character);// || isEscaped(character); } - /** - * escape = "\" / "%5C" ; reverse solidus U+005C - * @param character which is checked - * @return true if character is allowed - */ - private static boolean isEscaped(char character) { - // TODO: mibo(151117): check how to implement - return false; - } +// /** +// * escape = "\" / "%5C" ; reverse solidus U+005C +// * @param character which is checked +// * @return true if character is allowed +// */ +// private static boolean isEscaped(char character) { +// // TODO: mibo(151130): is checked in SearchPhraseState +// return false; +// } /** * qchar-unescaped = unreserved / pct-encoded-unescaped / other-delims / ":" / "@" / "/" / "?" / "$" / "'" / "=" @@ -173,14 +195,14 @@ public class SearchTokenizer { */ private static boolean isQCharUnescaped(char character) { return isUnreserved(character) - || isPctEncodedUnescaped(character) - || isOtherDelims(character) - || character == ':' - || character == '@' - || character == '/' - || character == '$' - || character == '\'' - || character == '='; +// || isPctEncodedUnescaped(character) + || isOtherDelims(character) + || character == ':' + || character == '@' + || character == '/' + || character == '$' + || character == '\'' + || character == '='; } /** @@ -190,43 +212,43 @@ public class SearchTokenizer { */ private static boolean isOtherDelims(char character) { return character == '!' - || character == '(' - || character == ')' - || character == '*' - || character == '+' - || character == ',' - || character == ';'; - } - - /** - * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG - * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F ) - * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" ) - * - * HEXDIG = DIGIT / A-to-F - * - * @param character which is checked - * @return true if character is allowed - */ - private static boolean isPctEncodedUnescaped(char character) { - String hex = Integer.toHexString(character); - char aschar[] = hex.toCharArray(); - if(aschar[0] == '%') { - if(aschar[1] == '2') { - return aschar[2] != '2' && isHexDigit(aschar[2]); - } else if(aschar[1] == '5') { - return aschar[2] != 'C' && isHexDigit(aschar[2]); - } else if(isHexDigit(aschar[1])) { - return isHexDigit(aschar[2]); - } - } - return false; - } - - private static boolean isHexDigit(char character) { - return 'A' <= character && character <= 'F' // case A..F - || '0' <= character && character <= '9'; // case 0..9 - } + || character == '(' + || character == ')' + || character == '*' + || character == '+' + || character == ',' + || character == ';'; + } + +// /** +// * pct-encoded-unescaped = "%" ( "0" / "1" / "3" / "4" / "6" / "7" / "8" / "9" / A-to-F ) HEXDIG +// * / "%" "2" ( "0" / "1" / "3" / "4" / "5" / "6" / "7" / "8" / "9" / A-to-F ) +// * / "%" "5" ( DIGIT / "A" / "B" / "D" / "E" / "F" ) +// * +// * HEXDIG = DIGIT / A-to-F +// * +// * @param character which is checked +// * @return true if character is allowed +// */ +// private static boolean isPctEncodedUnescaped(char character) { +// String hex = Integer.toHexString(character); +// char aschar[] = hex.toCharArray(); +// if(aschar[0] == '%') { +// if(aschar[1] == '2') { +// return aschar[2] != '2' && isHexDigit(aschar[2]); +// } else if(aschar[1] == '5') { +// return aschar[2] != 'C' && isHexDigit(aschar[2]); +// } else if(isHexDigit(aschar[1])) { +// return isHexDigit(aschar[2]); +// } +// } +// return false; +// } + +// private static boolean isHexDigit(char character) { +// return 'A' <= character && character <= 'F' // case A..F +// || '0' <= character && character <= '9'; // case 0..9 +// } /** * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" @@ -235,10 +257,10 @@ public class SearchTokenizer { */ private static boolean isUnreserved(char character) { return isAlphaOrDigit(character) - || character == '-' - || character == '.' - || character == '_' - || character == '~'; + || character == '-' + || character == '.' + || character == '_' + || character == '~'; } /** @@ -256,8 +278,6 @@ public class SearchTokenizer { // BWS = *( SP / HTAB / "%20" / "%09" ) ; "bad" whitespace // RWS = 1*( SP / HTAB / "%20" / "%09" ) ; "required" whitespace static boolean isWhitespace(final char character) { - // ( SP / HTAB / "%20" / "%09" ) - // TODO mibo: add missing whitespaces return character == ' ' || character == '\t'; } @@ -400,6 +420,7 @@ public class SearchTokenizer { private class SearchPhraseState extends LiteralState { private boolean closed = false; + private boolean escaped = false; public SearchPhraseState(char c) throws SearchTokenizerException { super(Token.PHRASE, c); if (c != QUOTATION_MARK) { @@ -416,6 +437,16 @@ public class SearchTokenizer { } else if (isWhitespace(c)) { return new RwsState(); } + } else if(escaped) { + escaped = false; + if(c == QUOTATION_MARK || c == PHRASE_ESCAPE_CHAR) { + return allowed(c); + } else { + return forbidden(c); + } + } else if(c == PHRASE_ESCAPE_CHAR) { + escaped = true; + return this; } else if (isAllowedPhrase(c)) { return allowed(c); } else if (isWhitespace(c)) { http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java ---------------------------------------------------------------------- diff --git a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java index 2340f37..46c9290 100644 --- a/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java +++ b/lib/server-core/src/test/java/org/apache/olingo/server/core/uri/parser/search/SearchTokenizerTest.java @@ -250,6 +250,14 @@ public class SearchTokenizerTest { @Test public void characterInPhrase() throws Exception { assertQuery("\"123\" OR \"ALPHA-._~\"").resultsIn(PHRASE, OR, PHRASE); + //escaped characters + assertQuery("\"\\\"123\" OR \"\\\\abc\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"123\""), + new Validator.Tuple(OR), new Validator.Tuple(PHRASE, "\"\\abc\"")); + assertQuery("\"\\\"1\\\\23\"").resultsIn(new Validator.Tuple(PHRASE, "\"\"1\\23\"")); + // exceptions + assertQuery("\"\\\"1\\\\").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE); + assertQuery("\"1\\\"").resultsIn(SearchTokenizerException.MessageKeys.INVALID_TOKEN_STATE); + assertQuery("\"1\\23\"").resultsIn(SearchTokenizerException.MessageKeys.FORBIDDEN_CHARACTER); } @Test http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java ---------------------------------------------------------------------- diff --git a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java index 9f66d66..3c02003 100644 --- a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java +++ b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/antlr/TestFullResourcePath.java @@ -45,6 +45,7 @@ import org.apache.olingo.server.core.uri.parser.UriParserException; import org.apache.olingo.server.core.uri.parser.UriParserSemanticException; import org.apache.olingo.server.core.uri.parser.UriParserSemanticException.MessageKeys; import org.apache.olingo.server.core.uri.parser.UriParserSyntaxException; +import org.apache.olingo.server.core.uri.parser.search.SearchParserException; import org.apache.olingo.server.core.uri.testutil.FilterValidator; import org.apache.olingo.server.core.uri.testutil.TestUriValidator; import org.apache.olingo.server.core.uri.validator.UriValidationException; @@ -5428,9 +5429,7 @@ public class TestFullResourcePath { } @Test - @Ignore("$search currently not implemented") public void testSearch() throws Exception { - testUri.run("ESTwoKeyNav", "$search=abc"); testUri.run("ESTwoKeyNav", "$search=NOT abc"); @@ -5462,6 +5461,19 @@ public class TestFullResourcePath { testUri.run("ESTwoKeyNav", "$search=(abc AND def) ghi "); testUri.run("ESTwoKeyNav", "$search=abc AND (def OR ghi)"); testUri.run("ESTwoKeyNav", "$search=abc AND (def ghi)"); + + // escaped characters + testUri.run("ESTwoKeyNav", "$search=\"abc\""); + testUri.run("ESTwoKeyNav", "$search=\"a\\\"bc\""); + testUri.run("ESTwoKeyNav", "$search=%22abc%22"); + testUri.run("ESTwoKeyNav", "$search=%22a%5C%22bc%22"); + testUri.run("ESTwoKeyNav", "$search=%22a%5C%5Cbc%22"); + + // wrong escaped characters + testUri.runEx("ESTwoKeyNav", "$search=%22a%22bc%22") + .isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION); + testUri.runEx("ESTwoKeyNav", "$search=%22a%5Cbc%22") + .isExceptionMessage(SearchParserException.MessageKeys.TOKENIZER_EXCEPTION); } @Test http://git-wip-us.apache.org/repos/asf/olingo-odata4/blob/e5ac5907/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java ---------------------------------------------------------------------- diff --git a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java index 6a2e5b4..0d5fb4a 100644 --- a/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java +++ b/lib/server-test/src/test/java/org/apache/olingo/server/core/uri/testutil/TestUriValidator.java @@ -176,6 +176,11 @@ public class TestUriValidator implements TestValidator { } } + public TestUriValidator isExceptionMessage(final ODataLibraryException.MessageKey messageKey) { + assertEquals(messageKey, exception.getMessageKey()); + return this; + } + public TestUriValidator isExSyntax(final UriParserSyntaxException.MessageKeys messageKey) { assertEquals(UriParserSyntaxException.class, exception.getClass()); assertEquals(messageKey, exception.getMessageKey());
