This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 83b332adc1 [GLUTEN-7545][CH] Fix regexp_replace group catching syntax
diff (#7603)
83b332adc1 is described below
commit 83b332adc17d541bc3d3ce295eba325f7ba43fec
Author: zhanglistar <[email protected]>
AuthorDate: Tue Oct 22 10:11:10 2024 +0800
[GLUTEN-7545][CH] Fix regexp_replace group catching syntax diff (#7603)
---
.../expression/CHExpressionTransformer.scala | 26 +++++++++++++++++++++-
.../GlutenClickhouseFunctionSuite.scala | 16 +++++++++++++
2 files changed, 41 insertions(+), 1 deletion(-)
diff --git
a/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
b/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
index fe8d23f9a9..ab9fdd29f6 100644
---
a/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
+++
b/backends-clickhouse/src/main/scala/org/apache/gluten/expression/CHExpressionTransformer.scala
@@ -195,7 +195,31 @@ case class CHRegExpReplaceTransformer(
!posNode.isInstanceOf[IntLiteralNode] ||
posNode.asInstanceOf[IntLiteralNode].getValue != 1
) {
- throw new UnsupportedOperationException(s"$original not supported yet.")
+ throw new UnsupportedOperationException(s"$original dose not supported
position yet.")
+ }
+ // Replace $num in rep with \num used in CH
+ val repNode = childrenWithPos(2).doTransform(args)
+ repNode match {
+ case node: StringLiteralNode =>
+ val strValue = node.getValue
+ val replacedValue = strValue.replaceAll("\\$(\\d+)", "\\\\$1")
+ if (replacedValue != strValue) {
+ val functionName = ConverterUtils.makeFuncName(
+ substraitExprName,
+ Seq(original.subject.dataType, original.regexp.dataType,
original.rep.dataType),
+ FunctionConfig.OPT)
+ val replacedRepNode = ExpressionBuilder.makeLiteral(replacedValue,
StringType, false)
+ val exprNodes = Lists.newArrayList(
+ childrenWithPos(0).doTransform(args),
+ childrenWithPos(1).doTransform(args),
+ replacedRepNode)
+ val functionMap = args.asInstanceOf[java.util.HashMap[String,
java.lang.Long]]
+ return ExpressionBuilder.makeScalarFunction(
+ ExpressionBuilder.newScalarFunction(functionMap, functionName),
+ exprNodes,
+ ConverterUtils.getTypeNode(original.dataType, original.nullable))
+ }
+ case _ =>
}
super.doTransform(args)
diff --git
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
index 5cf2bb1a78..869ca603b1 100644
---
a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
+++
b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala
@@ -369,4 +369,20 @@ class GlutenClickhouseFunctionSuite extends
GlutenClickHouseTPCHAbstractSuite {
}
}
+ test("GLUTEN-7545: https://github.com/apache/incubator-gluten/issues/7545") {
+ withTable("regexp_test") {
+ sql("create table if not exists regexp_test (id string) using parquet")
+ sql("insert into regexp_test values('1999-6-1')")
+ compareResultsAgainstVanillaSpark(
+ """
+ |select regexp_replace(id,
+ |'([0-9]{4})-([0-9]{1,2})-([0-9]{1,2})',
+ |'$1-$2-$3') from regexp_test
+ """.stripMargin,
+ true,
+ { _ => }
+ )
+ }
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]