This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new a5cbd1d192 [GLUTEN-7475][VL] Add a config to control whether to add
trim node when CAST from varchar (#7476)
a5cbd1d192 is described below
commit a5cbd1d192bdc29b5759a53875f5e17f1c9e3bde
Author: Henry2SS <[email protected]>
AuthorDate: Thu Oct 24 15:18:57 2024 +0800
[GLUTEN-7475][VL] Add a config to control whether to add trim node when
CAST from varchar (#7476)
---
.../gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala | 4 ++--
docs/Configuration.md | 1 +
.../src/main/scala/org/apache/gluten/GlutenConfig.scala | 12 ++++++++++++
3 files changed, 15 insertions(+), 2 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
index d30caa1779..81564a4401 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala
@@ -729,7 +729,7 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
c
case FloatType | DoubleType | _: DecimalType =>
c.child.dataType match {
- case StringType =>
+ case StringType if GlutenConfig.getConf.castFromVarcharAddTrimNode =>
val trimNode = StringTrim(c.child, Some(Literal(trimSpaceStr)))
c.withNewChildren(Seq(trimNode)).asInstanceOf[Cast]
case _ =>
@@ -737,7 +737,7 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi {
}
case _ =>
c.child.dataType match {
- case StringType =>
+ case StringType if GlutenConfig.getConf.castFromVarcharAddTrimNode =>
val trimNode = StringTrim(
c.child,
Some(
diff --git a/docs/Configuration.md b/docs/Configuration.md
index e044caf852..76549dd4fe 100644
--- a/docs/Configuration.md
+++ b/docs/Configuration.md
@@ -91,6 +91,7 @@ The following configurations are related to Velox settings.
| spark.gluten.sql.columnar.backend.velox.maxCoalescedBytes | Set
the max coalesced bytes for velox file scan.
|
|
| spark.gluten.sql.columnar.backend.velox.cachePrefetchMinPct | Set
prefetch cache min pct for velox file scan.
|
|
| spark.gluten.velox.awsSdkLogLevel | Log
granularity of AWS C++ SDK in velox.
| FATAL
|
+| spark.gluten.velox.castFromVarcharAddTrimNode | If
enabled, will add a trim node to CAST-from-varchar. Default is false.
|
|
| spark.gluten.velox.fs.s3a.retry.mode | Retry
mode for AWS s3 connection error, can be "legacy", "standard" and "adaptive".
| legacy
|
| spark.gluten.velox.fs.s3a.connect.timeout |
Timeout for AWS s3 connection.
| 1s
|
| spark.gluten.sql.columnar.backend.velox.orc.scan.enabled |
Enable velox orc scan. If disabled, vanilla spark orc scan will be used.
| true
|
diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
index 089f695958..de111d33ed 100644
--- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
+++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala
@@ -311,6 +311,8 @@ class GlutenConfig(conf: SQLConf) extends Logging {
def veloxBloomFilterMaxNumBits: Long =
conf.getConf(COLUMNAR_VELOX_BLOOM_FILTER_MAX_NUM_BITS)
+ def castFromVarcharAddTrimNode: Boolean =
conf.getConf(CAST_FROM_VARCHAR_ADD_TRIM_NODE)
+
case class ResizeRange(min: Int, max: Int) {
assert(max >= min)
assert(min > 0, "Min batch size should be larger than 0")
@@ -2125,4 +2127,14 @@ object GlutenConfig {
"Otherwise, throw an exception.")
.booleanConf
.createWithDefault(true)
+
+ val CAST_FROM_VARCHAR_ADD_TRIM_NODE =
+ buildConf("spark.gluten.velox.castFromVarcharAddTrimNode")
+ .internal()
+ .doc(
+ "If true, will add a trim node " +
+ "which has the same sementic as vanilla Spark to CAST-from-varchar."
+
+ "Otherwise, do nothing.")
+ .booleanConf
+ .createWithDefault(false)
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]