This is an automated email from the ASF dual-hosted git repository.
richox pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/auron.git
The following commit(s) were added to refs/heads/master by this push:
new c7f09f58 [AURON #1571]Implement native function of levenshtein(80)
#1571 (#1573)
c7f09f58 is described below
commit c7f09f587187e53f18582c66e9303842dc5e74e8
Author: guixiaowen <[email protected]>
AuthorDate: Mon Nov 3 10:52:30 2025 +0800
[AURON #1571]Implement native function of levenshtein(80) #1571 (#1573)
* [AURON #1571]Implement native function of Levenshtein(80) #1571
* [AURON #1571]Implement native function of levenshtein(80) #1571
---------
Co-authored-by: guihuawen <[email protected]>
---
native-engine/auron-serde/proto/auron.proto | 1 +
native-engine/auron-serde/src/from_proto.rs | 2 ++
.../spark/sql/auron/AuronFunctionSuite.scala | 24 ++++++++++++++++++++++
.../apache/spark/sql/auron/NativeConverters.scala | 3 +++
4 files changed, 30 insertions(+)
diff --git a/native-engine/auron-serde/proto/auron.proto
b/native-engine/auron-serde/proto/auron.proto
index db1ef8ec..2f567ec8 100644
--- a/native-engine/auron-serde/proto/auron.proto
+++ b/native-engine/auron-serde/proto/auron.proto
@@ -267,6 +267,7 @@ enum ScalarFunction {
Factorial=65;
Hex=66;
Power=67;
+ Levenshtein=80;
SparkExtFunctions=10000;
}
diff --git a/native-engine/auron-serde/src/from_proto.rs
b/native-engine/auron-serde/src/from_proto.rs
index 2e4f6486..89fb1579 100644
--- a/native-engine/auron-serde/src/from_proto.rs
+++ b/native-engine/auron-serde/src/from_proto.rs
@@ -813,6 +813,8 @@ impl From<protobuf::ScalarFunction> for Arc<ScalarUDF> {
ScalarFunction::Rpad => f::unicode::rpad(),
ScalarFunction::SplitPart => f::string::split_part(),
ScalarFunction::StartsWith => f::string::starts_with(),
+ ScalarFunction::Levenshtein => f::string::levenshtein(),
+
ScalarFunction::Strpos => f::unicode::strpos(),
ScalarFunction::Substr => f::unicode::substr(),
// ScalarFunction::ToHex => f::string::to_hex(),
diff --git
a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/auron/AuronFunctionSuite.scala
b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/auron/AuronFunctionSuite.scala
index ffa10136..8ef13584 100644
---
a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/auron/AuronFunctionSuite.scala
+++
b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/auron/AuronFunctionSuite.scala
@@ -319,4 +319,28 @@ class AuronFunctionSuite
val row = df.collect().head
assert(row.isNullAt(0) && row.isNullAt(1) && row.isNullAt(2))
}
+
+ test("test function Levenshtein") {
+ withTable("t1") {
+ sql(
+ "create table test_levenshtein using parquet as select '' as a, 'abc'
as b, 'kitten' as c, 'frog' as d, '千世' as i, '世界千世' as j")
+ val functions =
+ """
+ |select
+ | levenshtein(null, a),
+ | levenshtein(a, null),
+ | levenshtein(a, a),
+ | levenshtein(b, b),
+ | levenshtein(c, 'sitting'),
+ | levenshtein(d, 'fog'),
+ | levenshtein(i, 'fog'),
+ | levenshtein(j, '大a界b')
+ |from test_levenshtein
+ """.stripMargin
+
+ val df = sql(functions)
+ df.show()
+ checkAnswer(df, Seq(Row(null, null, 0, 0, 3, 1, 3, 4)))
+ }
+ }
}
diff --git
a/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
b/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
index 785ca3c8..d8418b17 100644
---
a/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
+++
b/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
@@ -880,6 +880,9 @@ object NativeConverters extends Logging {
case Month(child) => buildExtScalarFunction("Month", child :: Nil,
IntegerType)
case DayOfMonth(child) => buildExtScalarFunction("Day", child :: Nil,
IntegerType)
+ case e: Levenshtein =>
+ buildScalarFunction(pb.ScalarFunction.Levenshtein, e.children,
e.dataType)
+
// startswith is converted to scalar function in pruning-expr mode
case StartsWith(expr, Literal(prefix, StringType)) if isPruningExpr =>
buildExprNode(