This is an automated email from the ASF dual-hosted git repository. spmallette pushed a commit to branch TINKERPOP-2842 in repository https://gitbox.apache.org/repos/asf/tinkerpop.git
commit 1ad787513934a696c679b0d08a83c9ee93301ac1 Author: Stephen Mallette <[email protected]> AuthorDate: Thu Dec 22 07:37:27 2022 -0500 TINKERPOP-2842 Parsed requestId in GremlinScriptChecker --- CHANGELOG.asciidoc | 1 + .../groovy/jsr223/GremlinScriptChecker.java | 226 ++++++++++++--------- .../groovy/jsr223/GremlinScriptCheckerTest.java | 113 ++++++++++- 3 files changed, 242 insertions(+), 98 deletions(-) diff --git a/CHANGELOG.asciidoc b/CHANGELOG.asciidoc index aa6681bb9f..4bc2b176ae 100644 --- a/CHANGELOG.asciidoc +++ b/CHANGELOG.asciidoc @@ -53,6 +53,7 @@ image::https://raw.githubusercontent.com/apache/tinkerpop/master/docs/static/ima * Improved logging for `gremlin-driver`. * Modified `Connection` and `Host` job scheduling in `gremlin-driver` by dividing their work to two different thread pools and sparing work from the primary pool responsible for submitting requests and reading results. * Prevented usage of the fork-join pool for `gremlin-driver` job scheduling. +* Modified `GremlinScriptChecker` to extract the `Tokens.REQUEST_ID` from Gremlin scripts. * Changed `Host` initialization within a `Client` to be parallel again in `gremlin-driver`. * Changed mechanism for determining `Host` health which should make the driver more resilient to intermittent network failures. * Removed the delay for reconnecting to a potentially unhealthy `Host` only marking it as unavailable after that initial retry fails. diff --git a/gremlin-groovy/src/main/java/org/apache/tinkerpop/gremlin/groovy/jsr223/GremlinScriptChecker.java b/gremlin-groovy/src/main/java/org/apache/tinkerpop/gremlin/groovy/jsr223/GremlinScriptChecker.java index 49b1b2a946..249c659d8c 100644 --- a/gremlin-groovy/src/main/java/org/apache/tinkerpop/gremlin/groovy/jsr223/GremlinScriptChecker.java +++ b/gremlin-groovy/src/main/java/org/apache/tinkerpop/gremlin/groovy/jsr223/GremlinScriptChecker.java @@ -19,24 +19,35 @@ package org.apache.tinkerpop.gremlin.groovy.jsr223; import java.util.Arrays; -import java.util.List; +import java.util.HashSet; import java.util.Optional; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; /** - * Processes Gremlin strings using regex so as to try to detect certain properties from the script without actual + * Processes Gremlin strings using regex to try to detect certain properties from the script without actual * having to execute a {@code eval()} on it. */ public class GremlinScriptChecker { - public static final Result EMPTY_RESULT = new Result(0); - private static final List<String> tokens = Arrays.asList("evaluationTimeout", "scriptEvaluationTimeout", - "ARGS_EVAL_TIMEOUT", "ARGS_SCRIPT_EVAL_TIMEOUT"); + /** + * An empty result whose properties return as empty. + */ + public static final Result EMPTY_RESULT = new Result(null, null); + /** + * At least one of these tokens should be present somewhere in the Gremlin string for {@link #parse(String)} to + * take any action at all. + */ + private static final Set<String> tokens = new HashSet<>(Arrays.asList("evaluationTimeout", "scriptEvaluationTimeout", + "ARGS_EVAL_TIMEOUT", "ARGS_SCRIPT_EVAL_TIMEOUT", + "requestId", "REQUEST_ID")); /** * Matches single line comments, multi-line comments and space characters. * <pre> + * From https://regex101.com/ + * * OR: match either of the followings * Sequence: match all of the followings in order * / / @@ -69,96 +80,89 @@ public class GremlinScriptChecker { * <li>{@code ARGS_EVAL_TIMEOUT} which is a enum type of value which can be referenced with or without a {@code Tokens} qualifier</li> * <li>{@code ARGS_SCRIPT_EVAL_TIMEOUT} which is a enum type of value which can be referenced with or without a {@code Tokens} qualifier</li> * </ul> - * <pre> - * OR: match either of the followings - * Sequence: match all of the followings in order - * AnyCharIn[ " '] - * e v a l u a t i o n T i m e o u t - * AnyCharIn[ " '] - * Sequence: match all of the followings in order - * AnyCharIn[ " '] - * s c r i p t E v a l u a t i o n T i m e o u t - * AnyCharIn[ " '] - * Sequence: match all of the followings in order - * Repeat - * CapturingGroup - * (NonCapturingGroup) - * Sequence: match all of the followings in order - * T o k e n s - * . - * optional - * A R G S _ E V A L _ T I M E O U T - * Sequence: match all of the followings in order - * Repeat - * CapturingGroup - * (NonCapturingGroup) - * Sequence: match all of the followings in order - * T o k e n s - * . - * optional - * A R G S _ S C R I P T _ E V A L _ T I M E O U T - * </pre> + * See {@link #patternWithOptions} for explain as this regex is embedded in there. */ private static final String timeoutTokens = "[\"']evaluationTimeout[\"']|[\"']scriptEvaluationTimeout[\"']|(?:Tokens\\.)?ARGS_EVAL_TIMEOUT|(?:Tokens\\.)?ARGS_SCRIPT_EVAL_TIMEOUT"; + /** + * Regex fragment for the timeout tokens to look for. There are basically four: + * <ul> + * <li>{@code requestId} which is a string value and thus single or double quoted</li> + * <li>{@code REQUEST_ID} which is a enum type of value which can be referenced with or without a {@code Tokens} qualifier</li> + * </ul> + * See {@link #patternWithOptions} for a full explain as this regex is embedded in there. + */ + private static final String requestIdTokens = "[\"']requestId[\"']|(?:Tokens\\.)?REQUEST_ID"; + /** * Matches {@code .with({timeout-token},{timeout})} with a matching group on the {@code timeout}. * * <pre> - * Sequence: match all of the followings in order - * . - * w i t h - * ( - * CapturingGroup - * (NonCapturingGroup) - * OR: match either of the followings - * Sequence: match all of the followings in order - * AnyCharIn[ " '] - * e v a l u a t i o n T i m e o u t - * AnyCharIn[ " '] - * Sequence: match all of the followings in order - * AnyCharIn[ " '] - * s c r i p t E v a l u a t i o n T i m e o u t - * AnyCharIn[ " '] - * Sequence: match all of the followings in order - * Repeat - * CapturingGroup - * (NonCapturingGroup) - * Sequence: match all of the followings in order - * T o k e n s - * . - * optional - * A R G S _ E V A L _ T I M E O U T - * Sequence: match all of the followings in order - * Repeat - * CapturingGroup - * (NonCapturingGroup) - * Sequence: match all of the followings in order - * T o k e n s - * . - * optional - * A R G S _ S C R I P T _ E V A L _ T I M E O U T - * , - * CapturingGroup - * GroupNumber:1 - * Repeat - * Digit - * zero or more times - * Repeat - * CapturingGroup - * GroupNumber:2 - * OR: match either of the followings - * Sequence: match all of the followings in order - * Repeat - * : - * optional - * L - * l - * optional - * ) + * From https://regex101.com/ + * + * \.with\((?:(?:["']evaluationTimeout["']|["']scriptEvaluationTimeout["']|(?:Tokens\.)?ARGS_EVAL_TIMEOUT|(?:Tokens\.)?ARGS_SCRIPT_EVAL_TIMEOUT),(?<to>\d*)(:?L|l)?)|(?:(?:["']requestId["']|(?:Tokens\.)?REQUEST_ID),["'](?<rid>.*?))["']\) + * + * gm + * 1st Alternative \.with\((?:(?:["']evaluationTimeout["']|["']scriptEvaluationTimeout["']|(?:Tokens\.)?ARGS_EVAL_TIMEOUT|(?:Tokens\.)?ARGS_SCRIPT_EVAL_TIMEOUT),(?<to>\d*)(:?L|l)?) + * \. matches the character . with index 4610 (2E16 or 568) literally (case sensitive) + * with matches the characters with literally (case sensitive) + * \( matches the character ( with index 4010 (2816 or 508) literally (case sensitive) + * Non-capturing group (?:(?:["']evaluationTimeout["']|["']scriptEvaluationTimeout["']|(?:Tokens\.)?ARGS_EVAL_TIMEOUT|(?:Tokens\.)?ARGS_SCRIPT_EVAL_TIMEOUT),(?<to>\d*)(:?L|l)?) + * Non-capturing group (?:["']evaluationTimeout["']|["']scriptEvaluationTimeout["']|(?:Tokens\.)?ARGS_EVAL_TIMEOUT|(?:Tokens\.)?ARGS_SCRIPT_EVAL_TIMEOUT) + * 1st Alternative ["']evaluationTimeout["'] + * Match a single character present in the list below ["'] + * "' matches a single character in the list "' (case sensitive) + * evaluationTimeout matches the characters evaluationTimeout literally (case sensitive) + * Match a single character present in the list below ["'] + * "' matches a single character in the list "' (case sensitive) + * 2nd Alternative ["']scriptEvaluationTimeout["'] + * Match a single character present in the list below ["'] + * "' matches a single character in the list "' (case sensitive) + * scriptEvaluationTimeout matches the characters scriptEvaluationTimeout literally (case sensitive) + * Match a single character present in the list below ["'] + * "' matches a single character in the list "' (case sensitive) + * 3rd Alternative (?:Tokens\.)?ARGS_EVAL_TIMEOUT + * Non-capturing group (?:Tokens\.)? + * ? matches the previous token between zero and one times, as many times as possible, giving back as needed (greedy) + * Tokens matches the characters Tokens literally (case sensitive) + * \. matches the character . with index 4610 (2E16 or 568) literally (case sensitive) + * ARGS_EVAL_TIMEOUT matches the characters ARGS_EVAL_TIMEOUT literally (case sensitive) + * 4th Alternative (?:Tokens\.)?ARGS_SCRIPT_EVAL_TIMEOUT + * Non-capturing group (?:Tokens\.)? + * ? matches the previous token between zero and one times, as many times as possible, giving back as needed (greedy) + * Tokens matches the characters Tokens literally (case sensitive) + * \. matches the character . with index 4610 (2E16 or 568) literally (case sensitive) + * ARGS_SCRIPT_EVAL_TIMEOUT matches the characters ARGS_SCRIPT_EVAL_TIMEOUT literally (case sensitive) + * , matches the character , with index 4410 (2C16 or 548) literally (case sensitive) + * Named Capture Group to (?<to>\d*) + * \d matches a digit (equivalent to [0-9]) + * * matches the previous token between zero and unlimited times, as many times as possible, giving back as needed (greedy) + * 2nd Capturing Group (:?L|l)? + * ? matches the previous token between zero and one times, as many times as possible, giving back as needed (greedy) + * 1st Alternative :?L + * : matches the character : with index 5810 (3A16 or 728) literally (case sensitive) + * ? matches the previous token between zero and one times, as many times as possible, giving back as needed (greedy) + * L matches the character L with index 7610 (4C16 or 1148) literally (case sensitive) + * 2nd Alternative l + * l matches the character l with index 10810 (6C16 or 1548) literally (case sensitive) + * 2nd Alternative (?:(?:["']requestId["']|(?:Tokens\.)?REQUEST_ID),["'](?<rid>.*?))["']\) + * Non-capturing group (?:(?:["']requestId["']|(?:Tokens\.)?REQUEST_ID),["'](?<rid>.*?)) + * Non-capturing group (?:["']requestId["']|(?:Tokens\.)?REQUEST_ID) + * 1st Alternative ["']requestId["'] + * 2nd Alternative (?:Tokens\.)?REQUEST_ID + * , matches the character , with index 4410 (2C16 or 548) literally (case sensitive) + * Match a single character present in the list below ["'] + * "' matches a single character in the list "' (case sensitive) + * Named Capture Group rid (?<rid>.*?) + * . matches any character (except for line terminators) + * *? matches the previous token between zero and unlimited times, as few times as possible, expanding as needed (lazy) + * Match a single character present in the list below ["'] + * "' matches a single character in the list "' (case sensitive) + * \) matches the character ) with index 4110 (2916 or 518) literally (case sensitive) * </pre> */ - private static final Pattern patternTimeout = Pattern.compile("\\.with\\((?:" + timeoutTokens + "),(\\d*)(:?L|l)?\\)"); + private static final Pattern patternWithOptions = + Pattern.compile("\\.with\\(((?:" + timeoutTokens + "),(?<to>\\d*)(:?L|l)?)|((?:" + requestIdTokens + "),[\"'](?<rid>.*?))[\"']\\)"); /** * Parses a Gremlin script and extracts a {@code Result} containing properties that are relevant to the checker. @@ -174,22 +178,38 @@ public class GremlinScriptChecker { // isn't currently a requirement final String cleanGremlin = patternClean.matcher(gremlin).replaceAll(""); - final Matcher m = patternTimeout.matcher(cleanGremlin); + final Matcher m = patternWithOptions.matcher(cleanGremlin); if (!m.find()) return EMPTY_RESULT; - long l = Long.parseLong(m.group(1)); - while (m.find()) { - l += Long.parseLong(m.group(1)); - } + // arguments given to Result class as null mean they weren't assigned (or the parser didn't find them somehow - eek!) + Long timeout = null; + String requestId = null; + do { + // timeout is added up across all scripts + final String to = m.group("to"); + if (to != null) { + if (null == timeout) timeout = 0L; + timeout += Long.parseLong(to); + } + + // request id just uses the last one found + final String rid = m.group("rid"); + if (rid != null) requestId = rid; + } while (m.find()); - return new Result(l); + return new Result(timeout, requestId); } + /** + * A result returned from a {@link #parse(String)} of a Gremlin string. + */ public static class Result { - private final long timeout; + private final Long timeout; + private final String requestId; - private Result(final long timeout) { + private Result(final Long timeout, final String requestId) { this.timeout = timeout; + this.requestId = requestId; } /** @@ -197,7 +217,23 @@ public class GremlinScriptChecker { * commands using this step, the timeouts are summed together. */ public final Optional<Long> getTimeout() { - return timeout == 0 ? Optional.empty() : Optional.of(timeout); + return null == timeout ? Optional.empty() : Optional.of(timeout); + } + + /** + * Gets the value of the request identifier supplied using the {@code with()} source step. If there are + * multiple commands using this step, the last usage should represent the id returned here. + */ + public Optional<String> getRequestId() { + return null == requestId ? Optional.empty() : Optional.of(requestId); + } + + @Override + public String toString() { + return "GremlinScriptChecker.Result{" + + "timeout=" + timeout + + ", requestId='" + requestId + '\'' + + '}'; } } } diff --git a/gremlin-groovy/src/test/java/org/apache/tinkerpop/gremlin/groovy/jsr223/GremlinScriptCheckerTest.java b/gremlin-groovy/src/test/java/org/apache/tinkerpop/gremlin/groovy/jsr223/GremlinScriptCheckerTest.java index b9dd917511..c544743890 100644 --- a/gremlin-groovy/src/test/java/org/apache/tinkerpop/gremlin/groovy/jsr223/GremlinScriptCheckerTest.java +++ b/gremlin-groovy/src/test/java/org/apache/tinkerpop/gremlin/groovy/jsr223/GremlinScriptCheckerTest.java @@ -23,19 +23,26 @@ import org.junit.Test; import java.util.Optional; import static org.apache.tinkerpop.gremlin.groovy.jsr223.GremlinScriptChecker.EMPTY_RESULT; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.Is.is; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertSame; public class GremlinScriptCheckerTest { @Test - public void shouldNotFindTimeout() { - assertEquals(Optional.empty(), GremlinScriptChecker.parse("g.with(true).V().out('knows')").getTimeout()); + public void shouldNotFindAResult() { + final GremlinScriptChecker.Result r = GremlinScriptChecker.parse("g.with(true).V().out('knows')"); + assertEquals(Optional.empty(), r.getTimeout()); + assertEquals(Optional.empty(), r.getRequestId()); } @Test public void shouldReturnEmpty() { - assertSame(EMPTY_RESULT, GremlinScriptChecker.parse("")); + final GremlinScriptChecker.Result r = GremlinScriptChecker.parse(""); + assertSame(EMPTY_RESULT, r); + assertEquals(Optional.empty(), r.getTimeout()); + assertEquals(Optional.empty(), r.getRequestId()); } @Test @@ -45,6 +52,50 @@ public class GremlinScriptCheckerTest { " with(true).V().out('knows')").getTimeout()); } + @Test + public void shouldIdentifyTimeoutWithOddSpacing() { + assertEquals(1000, GremlinScriptChecker.parse("g.with('evaluationTimeout' , 1000L).with(true).V().out('knows')"). + getTimeout().get().longValue()); + assertEquals(1000, GremlinScriptChecker.parse("g.with('scriptEvaluationTimeout' , 1000L).with(true).V().out('knows')"). + getTimeout().get().longValue()); + assertEquals(1000, GremlinScriptChecker.parse("g.with('evaluationTimeout',1000L).with(true).V().out('knows')"). + getTimeout().get().longValue()); + } + + @Test + public void shouldIdentifyRequestIdWithOddSpacing() { + assertEquals("test", GremlinScriptChecker.parse("g.with('requestId' , 'test').with(true).V().out('knows')"). + getRequestId().get()); + assertEquals("test", GremlinScriptChecker.parse("g.with('requestId' , 'test').with(true).V().out('knows')"). + getRequestId().get()); + assertEquals("test", GremlinScriptChecker.parse("g.with('requestId','test').with(true).V().out('knows')"). + getRequestId().get()); + } + + @Test + public void shouldIdentifyRequestIdWithEmbeddedQuote() { + assertEquals("te\"st", GremlinScriptChecker.parse("g.with('requestId','te\"st').with(true).V().out('knows')"). + getRequestId().get()); + assertEquals("te\\\"st", GremlinScriptChecker.parse("g.with('requestId', \"te\\\"st\").with(true).V().out('knows')"). + getRequestId().get()); + } + + @Test + public void shouldIdentifyTimeoutWithLowerL() { + assertEquals(1000, GremlinScriptChecker.parse("g.with('evaluationTimeout', 1000l).with(true).V().out('knows')"). + getTimeout().get().longValue()); + assertEquals(1000, GremlinScriptChecker.parse("g.with('scriptEvaluationTimeout', 1000L).with(true).V().out('knows')"). + getTimeout().get().longValue()); + } + + @Test + public void shouldIdentifyTimeoutWithNoL() { + assertEquals(1000, GremlinScriptChecker.parse("g.with('evaluationTimeout', 1000).with(true).V().out('knows')"). + getTimeout().get().longValue()); + assertEquals(1000, GremlinScriptChecker.parse("g.with('scriptEvaluationTimeout', 1000).with(true).V().out('knows')"). + getTimeout().get().longValue()); + } + @Test public void shouldIdentifyTimeoutAsStringKeySingleQuoted() { assertEquals(1000, GremlinScriptChecker.parse("g.with('evaluationTimeout', 1000L).with(true).V().out('knows')"). @@ -53,6 +104,14 @@ public class GremlinScriptCheckerTest { getTimeout().get().longValue()); } + @Test + public void shouldIdentifyRequestIdAsStringKeySingleQuoted() { + assertEquals("db024fca-ed15-4375-95de-4c6106aef895", GremlinScriptChecker.parse("g.with('requestId', 'db024fca-ed15-4375-95de-4c6106aef895').with(true).V().out('knows')"). + getRequestId().get()); + assertEquals("db024fca-ed15-4375-95de-4c6106aef895", GremlinScriptChecker.parse("g.with(\"requestId\", 'db024fca-ed15-4375-95de-4c6106aef895').with(true).V().out('knows')"). + getRequestId().get()); + } + @Test public void shouldIdentifyTimeoutAsStringKeyDoubleQuoted() { assertEquals(1000, GremlinScriptChecker.parse("g.with(\"evaluationTimeout\", 1000L).with(true).V().out('knows')"). @@ -61,6 +120,14 @@ public class GremlinScriptCheckerTest { getTimeout().get().longValue()); } + @Test + public void shouldIdentifyRequestIdAsStringKeyDoubleQuoted() { + assertEquals("db024fca-ed15-4375-95de-4c6106aef895", GremlinScriptChecker.parse("g.with(\"requestId\", \"db024fca-ed15-4375-95de-4c6106aef895\").with(true).V().out('knows')"). + getRequestId().get()); + assertEquals("db024fca-ed15-4375-95de-4c6106aef895", GremlinScriptChecker.parse("g.with(\"requestId\", \"db024fca-ed15-4375-95de-4c6106aef895\").with(true).V().out('knows')"). + getRequestId().get()); + } + @Test public void shouldIdentifyTimeoutAsTokenKey() { assertEquals(1000, GremlinScriptChecker.parse("g.with(Tokens.ARGS_EVAL_TIMEOUT, 1000L).with(true).V().out('knows')"). @@ -69,6 +136,14 @@ public class GremlinScriptCheckerTest { getTimeout().get().longValue()); } + @Test + public void shouldIdentifyRequestIdAsTokenKey() { + assertEquals("db024fca-ed15-4375-95de-4c6106aef895", GremlinScriptChecker.parse("g.with(Tokens.REQUEST_ID, \"db024fca-ed15-4375-95de-4c6106aef895\").with(true).V().out('knows')"). + getRequestId().get()); + assertEquals("db024fca-ed15-4375-95de-4c6106aef895", GremlinScriptChecker.parse("g.with(Tokens.REQUEST_ID, \"db024fca-ed15-4375-95de-4c6106aef895\").with(true).V().out('knows')"). + getRequestId().get()); + } + @Test public void shouldIdentifyTimeoutAsTokenKeyWithoutClassName() { assertEquals(1000, GremlinScriptChecker.parse("g.with(ARGS_EVAL_TIMEOUT, 1000L).with(true).V().out('knows')"). @@ -77,6 +152,14 @@ public class GremlinScriptCheckerTest { getTimeout().get().longValue()); } + @Test + public void shouldIdentifyRequestIdAsTokenKeyWithoutClassName() { + assertEquals("db024fca-ed15-4375-95de-4c6106aef895", GremlinScriptChecker.parse("g.with(REQUEST_ID, \"db024fca-ed15-4375-95de-4c6106aef895\").with(true).V().out('knows')"). + getRequestId().get()); + assertEquals("db024fca-ed15-4375-95de-4c6106aef895", GremlinScriptChecker.parse("g.with(REQUEST_ID, \"db024fca-ed15-4375-95de-4c6106aef895\").with(true).V().out('knows')"). + getRequestId().get()); + } + @Test public void shouldIdentifyMultipleTimeouts() { assertEquals(6000, GremlinScriptChecker.parse("g.with('evaluationTimeout', 1000L).with(true).V().out('knows');" + @@ -93,6 +176,30 @@ public class GremlinScriptCheckerTest { getTimeout().get().longValue()); } + @Test + public void shouldIdentifyMultipleRequestIds() { + assertEquals("test9", GremlinScriptChecker.parse("g.with('requestId', 'test1').with(true).V().out('knows');" + + "g.with('requestId', 'test2').with(true).V().out('knows');\n" + + " //g.with('requestId', 'test3').with(true).V().out('knows');\n" + + " /* g.with('requestId', 'test4').with(true).V().out('knows');*/\n" + + " /* \n" + + "g.with('requestId', 'test5').with(true).V().out('knows'); \n" + + "*/ \n" + + " g.with('requestId', 'test6').with(true).V().out('knows');\n" + + " g.with(Tokens.REQUEST_ID, 'test7').with(true).V().out('knows');\n" + + " g.with(REQUEST_ID, 'test8').with(true).V().out('knows');\n" + + " g.with('requestId', 'test9').with(true).V().out('knows');"). + getRequestId().get()); + } + + @Test + public void shouldFindAllResults() { + final GremlinScriptChecker.Result r = GremlinScriptChecker.parse( + "g.with('evaluationTimeout', 1000).with(true).with(REQUEST_ID, \"db024fca-ed15-4375-95de-4c6106aef895\").V().out('knows')"); + assertEquals(1000, r.getTimeout().get().longValue()); + assertEquals("db024fca-ed15-4375-95de-4c6106aef895", r.getRequestId().get()); + } + @Test public void shouldParseLong() { assertEquals(1000, GremlinScriptChecker.parse("g.with('evaluationTimeout', 1000L).addV().property(id, 'blue').as('b').\n" +
