This is an automated email from the ASF dual-hosted git repository.

richox pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/auron.git


The following commit(s) were added to refs/heads/master by this push:
     new c7f09f58 [AURON #1571]Implement native function of levenshtein(80) 
#1571 (#1573)
c7f09f58 is described below

commit c7f09f587187e53f18582c66e9303842dc5e74e8
Author: guixiaowen <[email protected]>
AuthorDate: Mon Nov 3 10:52:30 2025 +0800

    [AURON #1571]Implement native function of levenshtein(80) #1571 (#1573)
    
    * [AURON #1571]Implement native function of Levenshtein(80) #1571
    
    * [AURON #1571]Implement native function of levenshtein(80) #1571
    
    ---------
    
    Co-authored-by: guihuawen <[email protected]>
---
 native-engine/auron-serde/proto/auron.proto        |  1 +
 native-engine/auron-serde/src/from_proto.rs        |  2 ++
 .../spark/sql/auron/AuronFunctionSuite.scala       | 24 ++++++++++++++++++++++
 .../apache/spark/sql/auron/NativeConverters.scala  |  3 +++
 4 files changed, 30 insertions(+)

diff --git a/native-engine/auron-serde/proto/auron.proto 
b/native-engine/auron-serde/proto/auron.proto
index db1ef8ec..2f567ec8 100644
--- a/native-engine/auron-serde/proto/auron.proto
+++ b/native-engine/auron-serde/proto/auron.proto
@@ -267,6 +267,7 @@ enum ScalarFunction {
   Factorial=65;
   Hex=66;
   Power=67;
+  Levenshtein=80;
   SparkExtFunctions=10000;
 }
 
diff --git a/native-engine/auron-serde/src/from_proto.rs 
b/native-engine/auron-serde/src/from_proto.rs
index 2e4f6486..89fb1579 100644
--- a/native-engine/auron-serde/src/from_proto.rs
+++ b/native-engine/auron-serde/src/from_proto.rs
@@ -813,6 +813,8 @@ impl From<protobuf::ScalarFunction> for Arc<ScalarUDF> {
             ScalarFunction::Rpad => f::unicode::rpad(),
             ScalarFunction::SplitPart => f::string::split_part(),
             ScalarFunction::StartsWith => f::string::starts_with(),
+            ScalarFunction::Levenshtein => f::string::levenshtein(),
+
             ScalarFunction::Strpos => f::unicode::strpos(),
             ScalarFunction::Substr => f::unicode::substr(),
             // ScalarFunction::ToHex => f::string::to_hex(),
diff --git 
a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/auron/AuronFunctionSuite.scala
 
b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/auron/AuronFunctionSuite.scala
index ffa10136..8ef13584 100644
--- 
a/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/auron/AuronFunctionSuite.scala
+++ 
b/spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/auron/AuronFunctionSuite.scala
@@ -319,4 +319,28 @@ class AuronFunctionSuite
     val row = df.collect().head
     assert(row.isNullAt(0) && row.isNullAt(1) && row.isNullAt(2))
   }
+
+  test("test function Levenshtein") {
+    withTable("t1") {
+      sql(
+        "create table test_levenshtein using parquet as select '' as a, 'abc' 
as b, 'kitten' as c, 'frog' as d, '千世' as i, '世界千世' as j")
+      val functions =
+        """
+          |select
+          |   levenshtein(null, a),
+          |   levenshtein(a, null),
+          |   levenshtein(a, a),
+          |   levenshtein(b, b),
+          |   levenshtein(c, 'sitting'),
+          |   levenshtein(d, 'fog'),
+          |   levenshtein(i, 'fog'),
+          |   levenshtein(j, '大a界b')
+          |from test_levenshtein
+        """.stripMargin
+
+      val df = sql(functions)
+      df.show()
+      checkAnswer(df, Seq(Row(null, null, 0, 0, 3, 1, 3, 4)))
+    }
+  }
 }
diff --git 
a/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
 
b/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
index 785ca3c8..d8418b17 100644
--- 
a/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
+++ 
b/spark-extension/src/main/scala/org/apache/spark/sql/auron/NativeConverters.scala
@@ -880,6 +880,9 @@ object NativeConverters extends Logging {
       case Month(child) => buildExtScalarFunction("Month", child :: Nil, 
IntegerType)
       case DayOfMonth(child) => buildExtScalarFunction("Day", child :: Nil, 
IntegerType)
 
+      case e: Levenshtein =>
+        buildScalarFunction(pb.ScalarFunction.Levenshtein, e.children, 
e.dataType)
+
       // startswith is converted to scalar function in pruning-expr mode
       case StartsWith(expr, Literal(prefix, StringType)) if isPruningExpr =>
         buildExprNode(

Reply via email to