This is an automated email from the ASF dual-hosted git repository. jiajunxie pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push: new 2ddc6053b5 [CALCITE-5826] Add FIND_IN_SET function (enabled in Hive and Spark libraries) 2ddc6053b5 is described below commit 2ddc6053b55147622f4a4642786d36403ce7f49a Author: Runkang He <hrun...@gmail.com> AuthorDate: Fri Jul 14 09:12:35 2023 +0800 [CALCITE-5826] Add FIND_IN_SET function (enabled in Hive and Spark libraries) --- .../calcite/adapter/enumerable/RexImpTable.java | 2 ++ .../org/apache/calcite/runtime/SqlFunctions.java | 25 ++++++++++++++++++++++ .../calcite/sql/fun/SqlLibraryOperators.java | 8 +++++++ .../org/apache/calcite/util/BuiltInMethod.java | 1 + site/_docs/reference.md | 1 + .../org/apache/calcite/test/SqlOperatorTest.java | 25 ++++++++++++++++++++++ 6 files changed, 62 insertions(+) diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index eaf12af158..77cd73d24f 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -178,6 +178,7 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXISTS_NODE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FACTORIAL; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.FIND_IN_SET; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FLOOR_BIG_QUERY; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATETIME; @@ -598,6 +599,7 @@ public class RexImpTable { defineReflective(REGEXP_INSTR, BuiltInMethod.REGEXP_INSTR2.method, BuiltInMethod.REGEXP_INSTR3.method, BuiltInMethod.REGEXP_INSTR4.method, BuiltInMethod.REGEXP_INSTR5.method); + defineMethod(FIND_IN_SET, BuiltInMethod.FIND_IN_SET.method, NullPolicy.ANY); map.put(TRIM, new TrimImplementor()); diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java index a92e1a95a9..5f3cbf9563 100644 --- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java +++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java @@ -146,6 +146,8 @@ import static java.util.Objects.requireNonNull; @SuppressWarnings("UnnecessaryUnboxing") @Deterministic public class SqlFunctions { + private static final String COMMA_DELIMITER = ","; + @SuppressWarnings("unused") private static final DecimalFormat DOUBLE_FORMAT = NumberUtil.decimalFormat("0.0E0"); @@ -1142,6 +1144,29 @@ public class SqlFunctions { return LEVENSHTEIN_DISTANCE.apply(string1, string2); } + /** SQL FIND_IN_SET(matchStr, textStr) function. + * Returns the index (1-based) of the given matchStr + * in the comma-delimited list textStr. Returns 0, + * if the matchStr is not found or if the matchStr + * contains a comma. */ + public static @Nullable Integer findInSet( + @Nullable String matchStr, + @Nullable String textStr) { + if (matchStr == null || textStr == null) { + return null; + } + if (matchStr.contains(COMMA_DELIMITER)) { + return 0; + } + String[] splits = textStr.split(COMMA_DELIMITER); + for (int i = 0; i < splits.length; i++) { + if (matchStr.equals(splits[i])) { + return i + 1; + } + } + return 0; + } + /** SQL ASCII(string) function. */ public static int ascii(String s) { return s.isEmpty() diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index e05c4fd1c4..a5440a211d 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -413,6 +413,14 @@ public abstract class SqlLibraryOperators { OperandTypes.STRING_STRING_OPTIONAL_STRING, SqlFunctionCategory.STRING); + /** The "FIND_IN_SET(matchStr, textStr)" function. */ + @LibraryOperator(libraries = {HIVE, SPARK}) + public static final SqlFunction FIND_IN_SET = + SqlBasicFunction.create("FIND_IN_SET", + ReturnTypes.INTEGER_NULLABLE, + OperandTypes.STRING_STRING, + SqlFunctionCategory.STRING); + /** The "GREATEST(value, value)" function. */ @LibraryOperator(libraries = {BIG_QUERY, ORACLE}) public static final SqlFunction GREATEST = diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java index a5fc4b03e2..89f3318a28 100644 --- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java +++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java @@ -379,6 +379,7 @@ public enum BuiltInMethod { DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class), REVERSE(SqlFunctions.class, "reverse", String.class), LEVENSHTEIN(SqlFunctions.class, "levenshtein", String.class, String.class), + FIND_IN_SET(SqlFunctions.class, "findInSet", String.class, String.class), LEFT(SqlFunctions.class, "left", String.class, int.class), RIGHT(SqlFunctions.class, "right", String.class, int.class), TO_BASE64(SqlFunctions.class, "toBase64", String.class), diff --git a/site/_docs/reference.md b/site/_docs/reference.md index e279e87339..6210c1d8ff 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2732,6 +2732,7 @@ BigQuery's type system uses confusingly different names for types and functions: | o | EXISTSNODE(xml, xpath, [, namespaces ]) | Determines whether traversal of a XML document using a specified xpath results in any nodes. Returns 0 if no nodes remain after applying the XPath traversal on the document fragment of the element or elements matched by the XPath expression. Returns 1 if any nodes remain. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression. | m | EXTRACTVALUE(xml, xpathExpr)) | Returns the text of the first text node which is a child of the element or elements matched by the XPath expression. | h s | FACTORIAL(integer) | Returns the factorial of *integer*, the range of *integer* is [0, 20]. Otherwise, returns NULL +| h s | FIND_IN_SET(matchStr, textStr) | Returns the index (1-based) of the given *matchStr* in the comma-delimited *textStr*. Returns 0, if the given *matchStr* is not found or if the *matchStr* contains a comma. For example, FIND_IN_SET('bc', 'a,bc,def') returns 2 | b | FLOOR(value) | Similar to standard `FLOOR(value)` except if *value* is an integer type, the return type is a double | b | FORMAT_DATE(string, date) | Formats *date* according to the specified format *string* | b | FORMAT_DATETIME(string, timestamp) | Formats *timestamp* according to the specified format *string* diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index 013843fd8f..799b5ddbd0 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -4708,6 +4708,31 @@ public class SqlOperatorTest { f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer); } + @Test void testFindInSetFunc() { + final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.FIND_IN_SET); + f0.checkFails("^find_in_set('ab', 'abc,b,ab,c,def')^", + "No match found for function signature FIND_IN_SET\\(<CHARACTER>, <CHARACTER>\\)", + false); + final Consumer<SqlOperatorFixture> consumer = f -> { + f.checkString("find_in_set('ab', 'abc,b,ab,c,def')", + "3", "INTEGER NOT NULL"); + f.checkString("find_in_set('ab', ',,,ab,abc,b,ab,c,def')", + "4", "INTEGER NOT NULL"); + f.checkString("find_in_set('def', ',,,ab,abc,c,def')", + "7", "INTEGER NOT NULL"); + f.checkString("find_in_set(_UTF8'\u4F60\u597D', _UTF8'b,ab,c,def,\u4F60\u597D')", + "5", "INTEGER NOT NULL"); + f.checkString("find_in_set('acd', ',,,ab,abc,c,def')", + "0", "INTEGER NOT NULL"); + f.checkString("find_in_set('ab,', 'abc,b,ab,c,def')", + "0", "INTEGER NOT NULL"); + f.checkNull("find_in_set(cast(null as varchar), 'abc,b,ab,c,def')"); + f.checkNull("find_in_set('ab', cast(null as varchar))"); + f.checkNull("find_in_set(cast(null as varchar), cast(null as varchar))"); + }; + f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer); + } + @Test void testIfFunc() { final SqlOperatorFixture f = fixture(); checkIf(f.withLibrary(SqlLibrary.BIG_QUERY));