This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 63e83bd30 [CORE] Add support for Spark url_decode function (#5070)
63e83bd30 is described below
commit 63e83bd30f112ae4f305b4decc087153726f94ed
Author: 高阳阳 <[email protected]>
AuthorDate: Fri Mar 22 20:53:45 2024 +0800
[CORE] Add support for Spark url_decode function (#5070)
---
.../scala/io/glutenproject/utils/CHExpressionUtil.scala | 1 +
.../execution/VeloxFunctionsValidateSuite.scala | 15 +++++++++++++++
.../io/glutenproject/expression/ExpressionConverter.scala | 14 ++++++++++++++
.../io/glutenproject/expression/ExpressionNames.scala | 1 +
4 files changed, 31 insertions(+)
diff --git
a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
index 24555c05c..028e4e9e9 100644
---
a/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
+++
b/backends-clickhouse/src/main/scala/io/glutenproject/utils/CHExpressionUtil.scala
@@ -177,6 +177,7 @@ object CHExpressionUtil {
DATE_FROM_UNIX_DATE -> DefaultValidator(),
MONOTONICALLY_INCREASING_ID -> DefaultValidator(),
SPARK_PARTITION_ID -> DefaultValidator(),
+ URL_DECODE -> DefaultValidator(),
SKEWNESS -> DefaultValidator(),
BIT_LENGTH -> DefaultValidator()
)
diff --git
a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
index 1e9871a6c..ef9c80c4e 100644
---
a/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
+++
b/backends-velox/src/test/scala/io/glutenproject/execution/VeloxFunctionsValidateSuite.scala
@@ -458,6 +458,21 @@ class VeloxFunctionsValidateSuite extends
VeloxWholeStageTransformerSuite {
}
}
+ testWithSpecifiedSparkVersion("Test url_decode function", Some("3.4.2")) {
+ withTempPath {
+ path =>
+ Seq("https%3A%2F%2Fspark.apache.org")
+ .toDF("a")
+ .write
+ .parquet(path.getCanonicalPath)
+ spark.sparkContext.setLogLevel("info")
+
spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("url_tbl")
+ runQueryAndCompare("select url_decode(a) from url_tbl") {
+ checkOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
test("Test hex function") {
runQueryAndCompare("SELECT hex(l_partkey), hex(l_shipmode) FROM lineitem
limit 1") {
checkOperatorMatch[ProjectExecTransformer]
diff --git
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
index f8f3ead05..4de0ab142 100644
---
a/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
+++
b/gluten-core/src/main/scala/io/glutenproject/expression/ExpressionConverter.scala
@@ -28,6 +28,7 @@ import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext,
ExprCode}
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
import org.apache.spark.sql.catalyst.optimizer.NormalizeNaNAndZero
import org.apache.spark.sql.execution.{ScalarSubquery, _}
import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
@@ -115,6 +116,19 @@ object ExpressionConverter extends SQLConfHelper with
Logging {
return replaceScalaUDFWithExpressionTransformer(s, attributeSeq,
expressionsMap)
case _ if HiveUDFTransformer.isHiveUDF(expr) =>
return HiveUDFTransformer.replaceWithExpressionTransformer(expr,
attributeSeq)
+ case i: StaticInvoke =>
+ val objectName = i.staticObject.getName.stripSuffix("$")
+ if (objectName.endsWith("UrlCodec")) {
+ val child = i.arguments(0)
+ i.functionName match {
+ case "decode" =>
+ return GenericExpressionTransformer(
+ ExpressionNames.URL_DECODE,
+ child.map(
+ replaceWithExpressionTransformerInternal(_, attributeSeq,
expressionsMap)),
+ i)
+ }
+ }
case _ =>
}
diff --git
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
index cb9e1ab71..2f3391629 100644
---
a/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
+++
b/shims/common/src/main/scala/io/glutenproject/expression/ExpressionNames.scala
@@ -117,6 +117,7 @@ object ExpressionNames {
// URL functions
final val PARSE_URL = "parse_url"
+ final val URL_DECODE = "url_decode"
// SparkSQL Math functions
final val ABS = "abs"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]