This is an automated email from the ASF dual-hosted git repository.

tanner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 76ba489c13 [CALCITE-5831] Add SOUNDEX function (enabled in Spark 
library)
76ba489c13 is described below

commit 76ba489c137f7a65f8e0497d3f9041debe3118b3
Author: Ran Tao <[email protected]>
AuthorDate: Mon Jul 10 17:51:12 2023 +0800

    [CALCITE-5831] Add SOUNDEX function (enabled in Spark library)
---
 .../calcite/adapter/enumerable/RexImpTable.java    |  2 ++
 .../org/apache/calcite/runtime/SqlFunctions.java   |  9 ++++++++
 .../org/apache/calcite/sql/SqlBasicFunction.java   | 10 +++++++++
 .../main/java/org/apache/calcite/sql/SqlKind.java  |  3 +++
 .../calcite/sql/fun/SqlLibraryOperators.java       |  6 ++++++
 .../org/apache/calcite/util/BuiltInMethod.java     |  1 +
 .../calcite/rel/rel2sql/RelToSqlConverterTest.java | 15 ++++++++++++++
 site/_docs/reference.md                            |  1 +
 .../org/apache/calcite/test/SqlOperatorTest.java   | 24 ++++++++++++++++++++++
 9 files changed, 71 insertions(+)

diff --git 
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java 
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 94daa1016d..7220052d10 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -231,6 +231,7 @@ import static 
org.apache.calcite.sql.fun.SqlLibraryOperators.SHA512;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SINH;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SORT_ARRAY;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SOUNDEX;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.SOUNDEX_SPARK;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPACE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPLIT;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.STARTS_WITH;
@@ -543,6 +544,7 @@ public class RexImpTable {
       defineMethod(SPACE, BuiltInMethod.SPACE.method, NullPolicy.STRICT);
       defineMethod(STRCMP, BuiltInMethod.STRCMP.method, NullPolicy.STRICT);
       defineMethod(SOUNDEX, BuiltInMethod.SOUNDEX.method, NullPolicy.STRICT);
+      defineMethod(SOUNDEX_SPARK, BuiltInMethod.SOUNDEX_SPARK.method, 
NullPolicy.STRICT);
       defineMethod(DIFFERENCE, BuiltInMethod.DIFFERENCE.method, 
NullPolicy.STRICT);
       defineMethod(REVERSE, BuiltInMethod.REVERSE.method, NullPolicy.STRICT);
       defineMethod(LEVENSHTEIN, BuiltInMethod.LEVENSHTEIN.method, 
NullPolicy.STRICT);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java 
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 4c3233bbfd..98fa1330bd 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -729,6 +729,15 @@ public class SqlFunctions {
     return SOUNDEX.soundex(s);
   }
 
+  /** SQL SOUNDEX(string) function but return original s when not mapped. */
+  public static String soundexSpark(String s) {
+    try {
+      return SOUNDEX.soundex(s);
+    } catch (IllegalArgumentException ignore) {
+      return s;
+    }
+  }
+
   /** SQL DIFFERENCE(string, string) function. */
   public static int difference(String s0, String s1) {
     String result0 = soundex(s0);
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java 
b/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
index 0230bdf3ca..7e3da75791 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
@@ -182,6 +182,16 @@ public class SqlBasicFunction extends SqlFunction {
         getFunctionType(), monotonicityInference);
   }
 
+  /** Returns a copy of this function with a given strategy for inferring
+   * returned type. */
+  public SqlBasicFunction withReturnTypeInference(
+      SqlReturnTypeInference returnTypeInference) {
+    return new SqlBasicFunction(getName(), kind, syntax, deterministic,
+        returnTypeInference, getOperandTypeInference(), operandHandler,
+        getOperandTypeChecker(), callValidator,
+        getFunctionType(), monotonicityInference);
+  }
+
   /** Returns a copy of this function with a given strategy for inferring
    * the types of its operands. */
   public SqlBasicFunction withOperandTypeInference(
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java 
b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
index c98a0ccf49..0eb3a165f8 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
@@ -776,6 +776,9 @@ public enum SqlKind {
   /** {@code REVERSE} function (SQL Server, MySQL). */
   REVERSE,
 
+  /** {@code SOUNDEX} function (Spark semantics). */
+  SOUNDEX_SPARK,
+
   /** {@code SUBSTR} function (BigQuery semantics). */
   SUBSTR_BIG_QUERY,
 
diff --git 
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java 
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 4a720c8982..7a5d116631 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -830,6 +830,12 @@ public abstract class SqlLibraryOperators {
           OperandTypes.CHARACTER,
           SqlFunctionCategory.STRING);
 
+  /** The variant of the SOUNDEX operator. */
+  @LibraryOperator(libraries = {SPARK})
+  public static final SqlFunction SOUNDEX_SPARK =
+      ((SqlBasicFunction) SOUNDEX).withKind(SqlKind.SOUNDEX_SPARK)
+          .withReturnTypeInference(ReturnTypes.VARCHAR_NULLABLE);
+
   @LibraryOperator(libraries = {POSTGRESQL})
   public static final SqlFunction DIFFERENCE =
       SqlBasicFunction.create("DIFFERENCE",
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java 
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index c7361bd472..e7bfbc0a1e 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -357,6 +357,7 @@ public enum BuiltInMethod {
   REPEAT(SqlFunctions.class, "repeat", String.class, int.class),
   SPACE(SqlFunctions.class, "space", int.class),
   SOUNDEX(SqlFunctions.class, "soundex", String.class),
+  SOUNDEX_SPARK(SqlFunctions.class, "soundexSpark", String.class),
   STRCMP(SqlFunctions.class, "strcmp", String.class, String.class),
   DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class),
   REVERSE(SqlFunctions.class, "reverse", String.class),
diff --git 
a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java 
b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
index 92a934c707..51044cb250 100644
--- 
a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
+++ 
b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
@@ -7144,6 +7144,21 @@ class RelToSqlConverterTest {
     sql(query).withMssql().ok(expectedMssql);
   }
 
+  /** Test case for
+   * <a 
href="https://issues.apache.org/jira/browse/CALCITE-5831";>[CALCITE-5831]
+   * Add SOUNDEX function(enabled in Spark library) </a>.
+   *
+   * <p>Calcite's Spark dialect SOUNDEX function should be SOUNDEX instead of 
SOUNDEX_SPARK
+   * when unparsing it.
+   */
+  @Test void testSparkSoundexFunction() {
+    final String query = "select soundex('Miller') from \"product\"\n";
+    final String expectedSql = "SELECT SOUNDEX('Miller')\n"
+        + "FROM foodmart.product";
+
+    sql(query).withSpark().withLibrary(SqlLibrary.SPARK).ok(expectedSql);
+  }
+
   /** Fluid interface to run tests. */
   static class Sql {
     private final CalciteAssert.SchemaSpec schemaSpec;
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 1211499a25..63c169a753 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2795,6 +2795,7 @@ BigQuery's type system uses confusingly different names 
for types and functions:
 | b p | SHA512(string)                               | Calculates a SHA-512 
hash value of *string* and returns it as a hex string
 | * | SINH(numeric)                                  | Returns the hyperbolic 
sine of *numeric*
 | b m o p | SOUNDEX(string)                          | Returns the phonetic 
representation of *string*; throws if *string* is encoded with multi-byte 
encoding such as UTF-8
+| s | SOUNDEX(string)                                | Returns the phonetic 
representation of *string*; return original *string* if *string* is encoded 
with multi-byte encoding such as UTF-8
 | m | SPACE(integer)                                 | Returns a string of 
*integer* spaces; returns an empty string if *integer* is less than 1
 | b | SPLIT(string [, delimiter ])                   | Returns the string 
array of *string* split at *delimiter* (if omitted, default is comma)
 | b | STARTS_WITH(string1, string2)                  | Returns whether 
*string2* is a prefix of *string1*
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java 
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index ac54e24c70..3fc1ec425a 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -4376,6 +4376,30 @@ public class SqlOperatorTest {
     f0.forEachLibrary(libraries, consumer);
   }
 
+  @Test void testSoundexSparkFunc() {
+    final SqlOperatorFixture f0 = 
fixture().setFor(SqlLibraryOperators.SOUNDEX_SPARK);
+    f0.checkFails("^soundex('tech on the net')^",
+        "No match found for function signature SOUNDEX\\(<CHARACTER>\\)",
+        false);
+    final Consumer<SqlOperatorFixture> consumer = f -> {
+      f.checkString("SOUNDEX('TECH ON THE NET')", "T253", "VARCHAR NOT NULL");
+      f.checkString("SOUNDEX('Miller')", "M460", "VARCHAR NOT NULL");
+      f.checkString("SOUNDEX('miler')", "M460", "VARCHAR NOT NULL");
+      f.checkString("SOUNDEX('myller')", "M460", "VARCHAR NOT NULL");
+      f.checkString("SOUNDEX('muller')", "M460", "VARCHAR NOT NULL");
+      f.checkString("SOUNDEX('m')", "M000", "VARCHAR NOT NULL");
+      f.checkString("SOUNDEX('mu')", "M000", "VARCHAR NOT NULL");
+      f.checkString("SOUNDEX('mile')", "M400", "VARCHAR NOT NULL");
+      // note: it's different with soundex for bigquery/mysql/oracle/pg
+      f.checkString("SOUNDEX(_UTF8'\u5B57\u5B57')",
+          "字字", "VARCHAR NOT NULL");
+      f.checkString("SOUNDEX(_UTF8'\u5B57\u5B57\u5B57\u5B57')",
+          "字字字字", "VARCHAR NOT NULL");
+      f.checkNull("SOUNDEX(cast(null as varchar(1)))");
+    };
+    f0.forEachLibrary(list(SqlLibrary.SPARK), consumer);
+  }
+
   @Test void testDifferenceFunc() {
     final SqlOperatorFixture f = fixture()
         .setFor(SqlLibraryOperators.DIFFERENCE)

Reply via email to