This is an automated email from the ASF dual-hosted git repository.

jiajunxie pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 2ddc6053b5 [CALCITE-5826] Add FIND_IN_SET function (enabled in Hive 
and Spark libraries)
2ddc6053b5 is described below

commit 2ddc6053b55147622f4a4642786d36403ce7f49a
Author: Runkang He <hrun...@gmail.com>
AuthorDate: Fri Jul 14 09:12:35 2023 +0800

    [CALCITE-5826] Add FIND_IN_SET function (enabled in Hive and Spark 
libraries)
---
 .../calcite/adapter/enumerable/RexImpTable.java    |  2 ++
 .../org/apache/calcite/runtime/SqlFunctions.java   | 25 ++++++++++++++++++++++
 .../calcite/sql/fun/SqlLibraryOperators.java       |  8 +++++++
 .../org/apache/calcite/util/BuiltInMethod.java     |  1 +
 site/_docs/reference.md                            |  1 +
 .../org/apache/calcite/test/SqlOperatorTest.java   | 25 ++++++++++++++++++++++
 6 files changed, 62 insertions(+)

diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index eaf12af158..77cd73d24f 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -178,6 +178,7 @@ import static 
org.apache.calcite.sql.fun.SqlLibraryOperators.EXISTS_NODE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_VALUE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.EXTRACT_XML;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.FACTORIAL;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.FIND_IN_SET;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.FLOOR_BIG_QUERY;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.FORMAT_DATETIME;
@@ -598,6 +599,7 @@ public class RexImpTable {
       defineReflective(REGEXP_INSTR, BuiltInMethod.REGEXP_INSTR2.method,
           BuiltInMethod.REGEXP_INSTR3.method, 
BuiltInMethod.REGEXP_INSTR4.method,
           BuiltInMethod.REGEXP_INSTR5.method);
+      defineMethod(FIND_IN_SET, BuiltInMethod.FIND_IN_SET.method, 
NullPolicy.ANY);
 
       map.put(TRIM, new TrimImplementor());
 
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java 
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index a92e1a95a9..5f3cbf9563 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -146,6 +146,8 @@ import static java.util.Objects.requireNonNull;
 @SuppressWarnings("UnnecessaryUnboxing")
 @Deterministic
 public class SqlFunctions {
+  private static final String COMMA_DELIMITER = ",";
+
   @SuppressWarnings("unused")
   private static final DecimalFormat DOUBLE_FORMAT =
       NumberUtil.decimalFormat("0.0E0");
@@ -1142,6 +1144,29 @@ public class SqlFunctions {
     return LEVENSHTEIN_DISTANCE.apply(string1, string2);
   }
 
+  /** SQL FIND_IN_SET(matchStr, textStr) function.
+   * Returns the index (1-based) of the given matchStr
+   * in the comma-delimited list textStr. Returns 0,
+   * if the matchStr is not found or if the matchStr
+   * contains a comma. */
+  public static @Nullable Integer findInSet(
+      @Nullable String matchStr,
+      @Nullable String textStr) {
+    if (matchStr == null || textStr == null) {
+      return null;
+    }
+    if (matchStr.contains(COMMA_DELIMITER)) {
+      return 0;
+    }
+    String[] splits = textStr.split(COMMA_DELIMITER);
+    for (int i = 0; i < splits.length; i++) {
+      if (matchStr.equals(splits[i])) {
+        return i + 1;
+      }
+    }
+    return 0;
+  }
+
   /** SQL ASCII(string) function. */
   public static int ascii(String s) {
     return s.isEmpty()
diff --git 
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java 
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index e05c4fd1c4..a5440a211d 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -413,6 +413,14 @@ public abstract class SqlLibraryOperators {
               OperandTypes.STRING_STRING_OPTIONAL_STRING,
               SqlFunctionCategory.STRING);
 
+  /** The "FIND_IN_SET(matchStr, textStr)" function. */
+  @LibraryOperator(libraries = {HIVE, SPARK})
+  public static final SqlFunction FIND_IN_SET =
+      SqlBasicFunction.create("FIND_IN_SET",
+          ReturnTypes.INTEGER_NULLABLE,
+          OperandTypes.STRING_STRING,
+          SqlFunctionCategory.STRING);
+
   /** The "GREATEST(value, value)" function. */
   @LibraryOperator(libraries = {BIG_QUERY, ORACLE})
   public static final SqlFunction GREATEST =
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java 
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index a5fc4b03e2..89f3318a28 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -379,6 +379,7 @@ public enum BuiltInMethod {
   DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class),
   REVERSE(SqlFunctions.class, "reverse", String.class),
   LEVENSHTEIN(SqlFunctions.class, "levenshtein", String.class, String.class),
+  FIND_IN_SET(SqlFunctions.class, "findInSet", String.class, String.class),
   LEFT(SqlFunctions.class, "left", String.class, int.class),
   RIGHT(SqlFunctions.class, "right", String.class, int.class),
   TO_BASE64(SqlFunctions.class, "toBase64", String.class),
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index e279e87339..6210c1d8ff 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2732,6 +2732,7 @@ BigQuery's type system uses confusingly different names 
for types and functions:
 | o | EXISTSNODE(xml, xpath, [, namespaces ])        | Determines whether 
traversal of a XML document using a specified xpath results in any nodes. 
Returns 0 if no nodes remain after applying the XPath traversal on the document 
fragment of the element or elements matched by the XPath expression. Returns 1 
if any nodes remain. The optional namespace value that specifies a default 
mapping or namespace mapping for prefixes, which is used when evaluating the 
XPath expression.
 | m | EXTRACTVALUE(xml, xpathExpr))                  | Returns the text of the 
first text node which is a child of the element or elements matched by the 
XPath expression.
 | h s | FACTORIAL(integer)                           | Returns the factorial 
of *integer*, the range of *integer* is [0, 20]. Otherwise, returns NULL
+| h s | FIND_IN_SET(matchStr, textStr)               | Returns the index 
(1-based) of the given *matchStr* in the comma-delimited *textStr*. Returns 0, 
if the given *matchStr* is not found or if the *matchStr* contains a comma. For 
example, FIND_IN_SET('bc', 'a,bc,def') returns 2
 | b | FLOOR(value)                                   | Similar to standard 
`FLOOR(value)` except if *value* is an integer type, the return type is a double
 | b | FORMAT_DATE(string, date)                      | Formats *date* 
according to the specified format *string*
 | b | FORMAT_DATETIME(string, timestamp)             | Formats *timestamp* 
according to the specified format *string*
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java 
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 013843fd8f..799b5ddbd0 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -4708,6 +4708,31 @@ public class SqlOperatorTest {
     f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
   }
 
+  @Test void testFindInSetFunc() {
+    final SqlOperatorFixture f0 = 
fixture().setFor(SqlLibraryOperators.FIND_IN_SET);
+    f0.checkFails("^find_in_set('ab', 'abc,b,ab,c,def')^",
+        "No match found for function signature FIND_IN_SET\\(<CHARACTER>, 
<CHARACTER>\\)",
+        false);
+    final Consumer<SqlOperatorFixture> consumer = f -> {
+      f.checkString("find_in_set('ab', 'abc,b,ab,c,def')",
+          "3", "INTEGER NOT NULL");
+      f.checkString("find_in_set('ab', ',,,ab,abc,b,ab,c,def')",
+          "4", "INTEGER NOT NULL");
+      f.checkString("find_in_set('def', ',,,ab,abc,c,def')",
+          "7", "INTEGER NOT NULL");
+      f.checkString("find_in_set(_UTF8'\u4F60\u597D', 
_UTF8'b,ab,c,def,\u4F60\u597D')",
+          "5", "INTEGER NOT NULL");
+      f.checkString("find_in_set('acd', ',,,ab,abc,c,def')",
+          "0", "INTEGER NOT NULL");
+      f.checkString("find_in_set('ab,', 'abc,b,ab,c,def')",
+          "0", "INTEGER NOT NULL");
+      f.checkNull("find_in_set(cast(null as varchar), 'abc,b,ab,c,def')");
+      f.checkNull("find_in_set('ab', cast(null as varchar))");
+      f.checkNull("find_in_set(cast(null as varchar), cast(null as varchar))");
+    };
+    f0.forEachLibrary(list(SqlLibrary.HIVE, SqlLibrary.SPARK), consumer);
+  }
+
   @Test void testIfFunc() {
     final SqlOperatorFixture f = fixture();
     checkIf(f.withLibrary(SqlLibrary.BIG_QUERY));

Reply via email to