This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 83b332adc1 [GLUTEN-7545][CH] Fix regexp_replace group catching syntax 
diff (#7603)
83b332adc1 is described below

commit 83b332adc17d541bc3d3ce295eba325f7ba43fec
Author: zhanglistar <[email protected]>
AuthorDate: Tue Oct 22 10:11:10 2024 +0800

    [GLUTEN-7545][CH] Fix regexp_replace group catching syntax diff (#7603)
---
 .../expression/CHExpressionTransformer.scala       | 26 +++++++++++++++++++++-
 .../GlutenClickhouseFunctionSuite.scala            | 16 +++++++++++++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git 
a/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
 
b/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
index fe8d23f9a9..ab9fdd29f6 100644
--- 
a/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
+++ 
b/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
@@ -195,7 +195,31 @@ case class CHRegExpReplaceTransformer(
       !posNode.isInstanceOf[IntLiteralNode] ||
       posNode.asInstanceOf[IntLiteralNode].getValue != 1
     ) {
-      throw new UnsupportedOperationException(s"$original not supported yet.")
+      throw new UnsupportedOperationException(s"$original dose not supported 
position yet.")
+    }
+    // Replace $num in rep with \num used in CH
+    val repNode = childrenWithPos(2).doTransform(args)
+    repNode match {
+      case node: StringLiteralNode =>
+        val strValue = node.getValue
+        val replacedValue = strValue.replaceAll("\\$(\\d+)", "\\\\$1")
+        if (replacedValue != strValue) {
+          val functionName = ConverterUtils.makeFuncName(
+            substraitExprName,
+            Seq(original.subject.dataType, original.regexp.dataType, 
original.rep.dataType),
+            FunctionConfig.OPT)
+          val replacedRepNode = ExpressionBuilder.makeLiteral(replacedValue, 
StringType, false)
+          val exprNodes = Lists.newArrayList(
+            childrenWithPos(0).doTransform(args),
+            childrenWithPos(1).doTransform(args),
+            replacedRepNode)
+          val functionMap = args.asInstanceOf[java.util.HashMap[String, 
java.lang.Long]]
+          return ExpressionBuilder.makeScalarFunction(
+            ExpressionBuilder.newScalarFunction(functionMap, functionName),
+            exprNodes,
+            ConverterUtils.getTypeNode(original.dataType, original.nullable))
+        }
+      case _ =>
     }
 
     super.doTransform(args)
diff --git 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
index 5cf2bb1a78..869ca603b1 100644
--- 
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
+++ 
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
@@ -369,4 +369,20 @@ class GlutenClickhouseFunctionSuite extends 
GlutenClickHouseTPCHAbstractSuite {
     }
   }
 
+  test("GLUTEN-7545: https://github.com/apache/incubator-gluten/issues/7545";) {
+    withTable("regexp_test") {
+      sql("create table if not exists regexp_test (id string) using parquet")
+      sql("insert into regexp_test values('1999-6-1')")
+      compareResultsAgainstVanillaSpark(
+        """
+          |select regexp_replace(id,
+          |'([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})',
+          |'$1-$2-$3') from regexp_test
+        """.stripMargin,
+        true,
+        { _ => }
+      )
+    }
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to