This is an automated email from the ASF dual-hosted git repository.
tanner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 76ba489c13 [CALCITE-5831] Add SOUNDEX function (enabled in Spark
library)
76ba489c13 is described below
commit 76ba489c137f7a65f8e0497d3f9041debe3118b3
Author: Ran Tao <[email protected]>
AuthorDate: Mon Jul 10 17:51:12 2023 +0800
[CALCITE-5831] Add SOUNDEX function (enabled in Spark library)
---
.../calcite/adapter/enumerable/RexImpTable.java | 2 ++
.../org/apache/calcite/runtime/SqlFunctions.java | 9 ++++++++
.../org/apache/calcite/sql/SqlBasicFunction.java | 10 +++++++++
.../main/java/org/apache/calcite/sql/SqlKind.java | 3 +++
.../calcite/sql/fun/SqlLibraryOperators.java | 6 ++++++
.../org/apache/calcite/util/BuiltInMethod.java | 1 +
.../calcite/rel/rel2sql/RelToSqlConverterTest.java | 15 ++++++++++++++
site/_docs/reference.md | 1 +
.../org/apache/calcite/test/SqlOperatorTest.java | 24 ++++++++++++++++++++++
9 files changed, 71 insertions(+)
diff --git
a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 94daa1016d..7220052d10 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -231,6 +231,7 @@ import static
org.apache.calcite.sql.fun.SqlLibraryOperators.SHA512;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.SINH;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.SORT_ARRAY;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.SOUNDEX;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.SOUNDEX_SPARK;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPACE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.SPLIT;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.STARTS_WITH;
@@ -543,6 +544,7 @@ public class RexImpTable {
defineMethod(SPACE, BuiltInMethod.SPACE.method, NullPolicy.STRICT);
defineMethod(STRCMP, BuiltInMethod.STRCMP.method, NullPolicy.STRICT);
defineMethod(SOUNDEX, BuiltInMethod.SOUNDEX.method, NullPolicy.STRICT);
+ defineMethod(SOUNDEX_SPARK, BuiltInMethod.SOUNDEX_SPARK.method,
NullPolicy.STRICT);
defineMethod(DIFFERENCE, BuiltInMethod.DIFFERENCE.method,
NullPolicy.STRICT);
defineMethod(REVERSE, BuiltInMethod.REVERSE.method, NullPolicy.STRICT);
defineMethod(LEVENSHTEIN, BuiltInMethod.LEVENSHTEIN.method,
NullPolicy.STRICT);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 4c3233bbfd..98fa1330bd 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -729,6 +729,15 @@ public class SqlFunctions {
return SOUNDEX.soundex(s);
}
+ /** SQL SOUNDEX(string) function but return original s when not mapped. */
+ public static String soundexSpark(String s) {
+ try {
+ return SOUNDEX.soundex(s);
+ } catch (IllegalArgumentException ignore) {
+ return s;
+ }
+ }
+
/** SQL DIFFERENCE(string, string) function. */
public static int difference(String s0, String s1) {
String result0 = soundex(s0);
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
b/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
index 0230bdf3ca..7e3da75791 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlBasicFunction.java
@@ -182,6 +182,16 @@ public class SqlBasicFunction extends SqlFunction {
getFunctionType(), monotonicityInference);
}
+ /** Returns a copy of this function with a given strategy for inferring
+ * returned type. */
+ public SqlBasicFunction withReturnTypeInference(
+ SqlReturnTypeInference returnTypeInference) {
+ return new SqlBasicFunction(getName(), kind, syntax, deterministic,
+ returnTypeInference, getOperandTypeInference(), operandHandler,
+ getOperandTypeChecker(), callValidator,
+ getFunctionType(), monotonicityInference);
+ }
+
/** Returns a copy of this function with a given strategy for inferring
* the types of its operands. */
public SqlBasicFunction withOperandTypeInference(
diff --git a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
index c98a0ccf49..0eb3a165f8 100644
--- a/core/src/main/java/org/apache/calcite/sql/SqlKind.java
+++ b/core/src/main/java/org/apache/calcite/sql/SqlKind.java
@@ -776,6 +776,9 @@ public enum SqlKind {
/** {@code REVERSE} function (SQL Server, MySQL). */
REVERSE,
+ /** {@code SOUNDEX} function (Spark semantics). */
+ SOUNDEX_SPARK,
+
/** {@code SUBSTR} function (BigQuery semantics). */
SUBSTR_BIG_QUERY,
diff --git
a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 4a720c8982..7a5d116631 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -830,6 +830,12 @@ public abstract class SqlLibraryOperators {
OperandTypes.CHARACTER,
SqlFunctionCategory.STRING);
+ /** The variant of the SOUNDEX operator. */
+ @LibraryOperator(libraries = {SPARK})
+ public static final SqlFunction SOUNDEX_SPARK =
+ ((SqlBasicFunction) SOUNDEX).withKind(SqlKind.SOUNDEX_SPARK)
+ .withReturnTypeInference(ReturnTypes.VARCHAR_NULLABLE);
+
@LibraryOperator(libraries = {POSTGRESQL})
public static final SqlFunction DIFFERENCE =
SqlBasicFunction.create("DIFFERENCE",
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index c7361bd472..e7bfbc0a1e 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -357,6 +357,7 @@ public enum BuiltInMethod {
REPEAT(SqlFunctions.class, "repeat", String.class, int.class),
SPACE(SqlFunctions.class, "space", int.class),
SOUNDEX(SqlFunctions.class, "soundex", String.class),
+ SOUNDEX_SPARK(SqlFunctions.class, "soundexSpark", String.class),
STRCMP(SqlFunctions.class, "strcmp", String.class, String.class),
DIFFERENCE(SqlFunctions.class, "difference", String.class, String.class),
REVERSE(SqlFunctions.class, "reverse", String.class),
diff --git
a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
index 92a934c707..51044cb250 100644
---
a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
+++
b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java
@@ -7144,6 +7144,21 @@ class RelToSqlConverterTest {
sql(query).withMssql().ok(expectedMssql);
}
+ /** Test case for
+ * <a
href="https://issues.apache.org/jira/browse/CALCITE-5831">[CALCITE-5831]
+ * Add SOUNDEX function(enabled in Spark library) </a>.
+ *
+ * <p>Calcite's Spark dialect SOUNDEX function should be SOUNDEX instead of
SOUNDEX_SPARK
+ * when unparsing it.
+ */
+ @Test void testSparkSoundexFunction() {
+ final String query = "select soundex('Miller') from \"product\"\n";
+ final String expectedSql = "SELECT SOUNDEX('Miller')\n"
+ + "FROM foodmart.product";
+
+ sql(query).withSpark().withLibrary(SqlLibrary.SPARK).ok(expectedSql);
+ }
+
/** Fluid interface to run tests. */
static class Sql {
private final CalciteAssert.SchemaSpec schemaSpec;
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 1211499a25..63c169a753 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2795,6 +2795,7 @@ BigQuery's type system uses confusingly different names
for types and functions:
| b p | SHA512(string) | Calculates a SHA-512
hash value of *string* and returns it as a hex string
| * | SINH(numeric) | Returns the hyperbolic
sine of *numeric*
| b m o p | SOUNDEX(string) | Returns the phonetic
representation of *string*; throws if *string* is encoded with multi-byte
encoding such as UTF-8
+| s | SOUNDEX(string) | Returns the phonetic
representation of *string*; return original *string* if *string* is encoded
with multi-byte encoding such as UTF-8
| m | SPACE(integer) | Returns a string of
*integer* spaces; returns an empty string if *integer* is less than 1
| b | SPLIT(string [, delimiter ]) | Returns the string
array of *string* split at *delimiter* (if omitted, default is comma)
| b | STARTS_WITH(string1, string2) | Returns whether
*string2* is a prefix of *string1*
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index ac54e24c70..3fc1ec425a 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -4376,6 +4376,30 @@ public class SqlOperatorTest {
f0.forEachLibrary(libraries, consumer);
}
+ @Test void testSoundexSparkFunc() {
+ final SqlOperatorFixture f0 =
fixture().setFor(SqlLibraryOperators.SOUNDEX_SPARK);
+ f0.checkFails("^soundex('tech on the net')^",
+ "No match found for function signature SOUNDEX\\(<CHARACTER>\\)",
+ false);
+ final Consumer<SqlOperatorFixture> consumer = f -> {
+ f.checkString("SOUNDEX('TECH ON THE NET')", "T253", "VARCHAR NOT NULL");
+ f.checkString("SOUNDEX('Miller')", "M460", "VARCHAR NOT NULL");
+ f.checkString("SOUNDEX('miler')", "M460", "VARCHAR NOT NULL");
+ f.checkString("SOUNDEX('myller')", "M460", "VARCHAR NOT NULL");
+ f.checkString("SOUNDEX('muller')", "M460", "VARCHAR NOT NULL");
+ f.checkString("SOUNDEX('m')", "M000", "VARCHAR NOT NULL");
+ f.checkString("SOUNDEX('mu')", "M000", "VARCHAR NOT NULL");
+ f.checkString("SOUNDEX('mile')", "M400", "VARCHAR NOT NULL");
+ // note: it's different with soundex for bigquery/mysql/oracle/pg
+ f.checkString("SOUNDEX(_UTF8'\u5B57\u5B57')",
+ "字字", "VARCHAR NOT NULL");
+ f.checkString("SOUNDEX(_UTF8'\u5B57\u5B57\u5B57\u5B57')",
+ "字字字字", "VARCHAR NOT NULL");
+ f.checkNull("SOUNDEX(cast(null as varchar(1)))");
+ };
+ f0.forEachLibrary(list(SqlLibrary.SPARK), consumer);
+ }
+
@Test void testDifferenceFunc() {
final SqlOperatorFixture f = fixture()
.setFor(SqlLibraryOperators.DIFFERENCE)