This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 561e471a86 Add support for regexpReplace scalar function (#9123)
561e471a86 is described below
commit 561e471a86278e0e486cd9e602f8499fc8f8517c
Author: Vivek Iyer Vaidyanathan <[email protected]>
AuthorDate: Tue Aug 2 16:29:16 2022 -0700
Add support for regexpReplace scalar function (#9123)
* Add support for regexpReplace scalar function
* Address minor review comments.
---
.../LiteralOnlyBrokerRequestTest.java | 17 +++
.../common/function/scalar/StringFunctions.java | 107 ++++++++++++++++++
.../tests/OfflineClusterIntegrationTest.java | 119 +++++++++++++++++++++
3 files changed, 243 insertions(+)
diff --git
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java
index bd807818ce..516fe5fb65 100644
---
a/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java
+++
b/pinot-broker/src/test/java/org/apache/pinot/broker/requesthandler/LiteralOnlyBrokerRequestTest.java
@@ -122,6 +122,23 @@ public class LiteralOnlyBrokerRequestTest {
CalciteSqlParser.compileToPinotQuery("SELECT count(*) from foo " +
"where bar = fromBase64('aGVsbG8h')")));
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser.compileToPinotQuery(
"SELECT count(*) from foo " + "where bar =
fromUtf8(fromBase64('aGVsbG8h'))")));
+
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+ .compileToPinotQuery("SELECT count(*) from myTable where
regexpReplace(col1, \"b(..)\", \"X$1Y\") = "
+ + "\"fooXarYXazY\"")));
+
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+ .compileToPinotQuery("SELECT count(*) from myTable where
regexpReplace(col1, \"b(..)\", \"X$1Y\", 10) = "
+ + "\"fooXarYXazY\"")));
+
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+ .compileToPinotQuery("SELECT count(*) from myTable where
regexpReplace(col1, \"b(..)\", \"X$1Y\", 10 , 1) = "
+ + "\"fooXarYXazY\"")));
+
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+ .compileToPinotQuery("SELECT count(*) from myTable where
regexpReplace(col1, \"b(..)\", \"X$1Y\", 10 , 1, "
+ + "\"i\") = "
+ + "\"fooXarYXazY\"")));
+
Assert.assertFalse(BaseBrokerRequestHandler.isLiteralOnlyQuery(CalciteSqlParser
+ .compileToPinotQuery("SELECT count(*) from myTable where
regexpReplace(col1, \"b(..)\", \"X$1Y\", 10 , 1, "
+ + "\"m\") = "
+ + "\"fooXarYXazY\"")));
}
@Test
diff --git
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
index 08b9c5ac22..4277cec152 100644
---
a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
+++
b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
@@ -588,4 +588,111 @@ public class StringFunctions {
public static byte[] fromBase64(String input) {
return Base64.getDecoder().decode(input);
}
+
+ /**
+ * Replace a regular expression pattern. If matchStr is not found, inputStr
will be returned. By default, all
+ * occurences of match pattern in the input string will be replaced. Default
matching pattern is case sensitive.
+ *
+ * @param inputStr Input string to apply the regexpReplace
+ * @param matchStr Regexp or string to match against inputStr
+ * @param replaceStr Regexp or string to replace if matchStr is found
+ * @param matchStartPos Index of inputStr from where matching should start.
Default is 0.
+ * @param occurence Controls which occurence of the matched pattern must be
replaced. Counting starts at 0. Default
+ * is -1
+ * @param flag Single character flag that controls how the regex finds
matches in inputStr. If an incorrect flag is
+ * specified, the function applies default case sensitive match.
Only one flag can be specified. Supported
+ * flags:
+ * i -> Case insensitive
+ * @return replaced input string
+ */
+ @ScalarFunction
+ public static String regexpReplace(String inputStr, String matchStr, String
replaceStr, int matchStartPos,
+ int occurence, String flag) {
+ Integer patternFlag;
+
+ // TODO: Support more flags like MULTILINE, COMMENTS, etc.
+ switch (flag) {
+ case "i":
+ patternFlag = Pattern.CASE_INSENSITIVE;
+ break;
+ default:
+ patternFlag = null;
+ break;
+ }
+
+ Pattern p;
+ if (patternFlag != null) {
+ p = Pattern.compile(matchStr, patternFlag);
+ } else {
+ p = Pattern.compile(matchStr);
+ }
+
+ Matcher matcher = p.matcher(inputStr).region(matchStartPos,
inputStr.length());
+ StringBuffer sb;
+
+ if (occurence >= 0) {
+ sb = new StringBuffer(inputStr);
+ while (occurence >= 0 && matcher.find()) {
+ if (occurence == 0) {
+ sb.replace(matcher.start(), matcher.end(), replaceStr);
+ break;
+ }
+ occurence--;
+ }
+ } else {
+ sb = new StringBuffer();
+ while (matcher.find()) {
+ matcher.appendReplacement(sb, replaceStr);
+ }
+ matcher.appendTail(sb);
+ }
+
+ return sb.toString();
+ }
+
+ /**
+ * See #regexpReplace(String, String, String, int, int, String). Matches
against entire inputStr and replaces all
+ * occurences. Match is performed in case-sensitive mode.
+ *
+ * @param inputStr Input string to apply the regexpReplace
+ * @param matchStr Regexp or string to match against inputStr
+ * @param replaceStr Regexp or string to replace if matchStr is found
+ * @return replaced input string
+ */
+ @ScalarFunction
+ public static String regexpReplace(String inputStr, String matchStr, String
replaceStr) {
+ return regexpReplace(inputStr, matchStr, replaceStr, 0, -1, "");
+ }
+
+ /**
+ * See #regexpReplace(String, String, String, int, int, String). Matches
against entire inputStr and replaces all
+ * occurences. Match is performed in case-sensitive mode.
+ *
+ * @param inputStr Input string to apply the regexpReplace
+ * @param matchStr Regexp or string to match against inputStr
+ * @param replaceStr Regexp or string to replace if matchStr is found
+ * @param matchStartPos Index of inputStr from where matching should start.
Default is 0.
+ * @return replaced input string
+ */
+ @ScalarFunction
+ public static String regexpReplace(String inputStr, String matchStr, String
replaceStr, int matchStartPos) {
+ return regexpReplace(inputStr, matchStr, replaceStr, matchStartPos, -1,
"");
+ }
+
+ /**
+ * See #regexpReplace(String, String, String, int, int, String). Match is
performed in case-sensitive mode.
+ *
+ * @param inputStr Input string to apply the regexpReplace
+ * @param matchStr Regexp or string to match against inputStr
+ * @param replaceStr Regexp or string to replace if matchStr is found
+ * @param matchStartPos Index of inputStr from where matching should start.
Default is 0.
+ * @param occurence Controls which occurence of the matched pattern must be
replaced. Counting starts
+ * at 0. Default is -1
+ * @return replaced input string
+ */
+ @ScalarFunction
+ public static String regexpReplace(String inputStr, String matchStr, String
replaceStr, int matchStartPos,
+ int occurence) {
+ return regexpReplace(inputStr, matchStr, replaceStr, matchStartPos,
occurence, "");
+ }
}
diff --git
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index c95e3d91e5..053af685ef 100644
---
a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++
b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -88,6 +88,7 @@ import static
org.apache.pinot.common.function.scalar.StringFunctions.*;
import static
org.apache.pinot.controller.helix.core.PinotHelixResourceManager.EXTERNAL_VIEW_CHECK_INTERVAL_MS;
import static
org.apache.pinot.controller.helix.core.PinotHelixResourceManager.EXTERNAL_VIEW_ONLINE_SEGMENTS_MAX_WAIT_MS;
import static org.testng.Assert.*;
+import static org.testng.Assert.assertEquals;
/**
@@ -590,6 +591,124 @@ public class OfflineClusterIntegrationTest extends
BaseClusterIntegrationTestSet
assertEquals(oneHourAgoTodayStr, expectedOneHourAgoTodayStr);
}
+ @Test
+ public void testRegexpReplace() throws Exception {
+ // Correctness tests of regexpReplace.
+
+ // Test replace all.
+ String sqlQuery = "SELECT regexpReplace('CA', 'C', 'TEST')";
+ JsonNode response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ String result =
response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "TESTA");
+
+ sqlQuery = "SELECT regexpReplace('foobarbaz', 'b', 'X')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "fooXarXaz");
+
+ sqlQuery = "SELECT regexpReplace('foobarbaz', 'b', 'XY')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "fooXYarXYaz");
+
+ sqlQuery = "SELECT regexpReplace('Argentina', '(.)', '$1 ')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "A r g e n t i n a ");
+
+ sqlQuery = "SELECT regexpReplace('Pinot is blazing fast', '( ){2,}', '
')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "Pinot is blazing fast");
+
+ sqlQuery = "SELECT regexpReplace('healthy, wealthy, and wise','\\w+thy',
'something')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "something, something, and wise");
+
+ sqlQuery = "SELECT
regexpReplace('11234567898','(\\d)(\\d{3})(\\d{3})(\\d{4})', '$1-($2) $3-$4')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "1-(123) 456-7898");
+
+ // Test replace starting at index.
+
+ sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and
wise','\\w+thy', 'something', 4)";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "healthy, something, something and wise");
+
+ sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and
wise','\\w+thy', 'something', 1)";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "hsomething, something, something and wise");
+
+
+ // Test occurence
+ sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and
wise','\\w+thy', 'something', 0, 2)";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "healthy, wealthy, something and wise");
+
+ sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and
wise','\\w+thy', 'something', 0, 0)";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "something, wealthy, stealthy and wise");
+
+ // Test flags
+ sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and
wise','\\w+tHy', 'something', 0, 0, 'i')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "something, wealthy, stealthy and wise");
+
+ // Negative test. Pattern match not found.
+ sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and
wise','\\w+tHy', 'something')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "healthy, wealthy, stealthy and wise");
+
+ // Negative test. Pattern match not found.
+ sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and
wise','\\w+tHy', 'something', 3, 21, 'i')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "healthy, wealthy, stealthy and wise");
+
+ // Negative test - incorrect flag
+ sqlQuery = "SELECT regexpReplace('healthy, wealthy, stealthy and
wise','\\w+tHy', 'something', 3, 12, 'xyz')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ result = response.get("resultTable").get("rows").get(0).get(0).asText();
+ assertEquals(result, "healthy, wealthy, stealthy and wise");
+
+ // Test in select clause with column values
+ sqlQuery = "SELECT regexpReplace(DestCityName, ' ', '', 0, -1, 'i') from
myTable where OriginState = 'CA'";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ JsonNode rows = response.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ JsonNode row = rows.get(i);
+ boolean containsSpace = row.get(0).asText().contains(" ");
+ assertEquals(containsSpace, false);
+ }
+
+ // Test in where clause
+ sqlQuery = "SELECT count(*) from myTable where regexpReplace(originState,
'[VC]A', 'TEST') = 'TEST'";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ int count1 = response.get("resultTable").get("rows").get(0).get(0).asInt();
+ sqlQuery = "SELECT count(*) from myTable where originState='CA' or
originState='VA'";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ int count2 = response.get("resultTable").get("rows").get(0).get(0).asInt();
+ assertEquals(count1, count2);
+
+ // Test nested transform
+ sqlQuery = "SELECT count(*) from myTable where
contains(regexpReplace(originState, '(C)(A)', '$1TEST$2'), "
+ + "'CTESTA')";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ count1 = response.get("resultTable").get("rows").get(0).get(0).asInt();
+ sqlQuery = "SELECT count(*) from myTable where originState='CA'";
+ response = postQuery(sqlQuery, _brokerBaseApiUrl);
+ count2 = response.get("resultTable").get("rows").get(0).get(0).asInt();
+ assertEquals(count1, count2);
+ }
+
@Test
public void testUrlFunc()
throws Exception {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]