This is an automated email from the ASF dual-hosted git repository.
jlfsdtc pushed a commit to branch kylin5
in repository https://gitbox.apache.org/repos/asf/kylin.git
The following commit(s) were added to refs/heads/kylin5 by this push:
new fa490cb879 KYLIN-6021 Add use_excel_serialization=true for CSV
fa490cb879 is described below
commit fa490cb879a5c032a9e65bc91ac4f1f8f217c81d
Author: Shuai li <[email protected]>
AuthorDate: Mon Nov 11 10:06:38 2024 +0800
KYLIN-6021 Add use_excel_serialization=true for CSV
---
src/core-common/src/main/resources/kylin-defaults0.properties | 2 ++
.../test/java/org/apache/spark/sql/common/GlutenTestConfig.scala | 9 +++++----
2 files changed, 7 insertions(+), 4 deletions(-)
diff --git a/src/core-common/src/main/resources/kylin-defaults0.properties
b/src/core-common/src/main/resources/kylin-defaults0.properties
index edfb9729be..c5f617b6fa 100644
--- a/src/core-common/src/main/resources/kylin-defaults0.properties
+++ b/src/core-common/src/main/resources/kylin-defaults0.properties
@@ -498,6 +498,7 @@
kylin.storage.columnar.spark-conf.spark.gluten.sql.columnar.backend.ch.runtime_c
kylin.storage.columnar.spark-conf.spark.gluten.sql.columnar.backend.ch.runtime_config.tmp_path=/tmp/kyligence_glt/tmp_ch
kylin.storage.columnar.spark-conf.spark.gluten.sql.columnar.extended.columnar.pre.rules=org.apache.spark.sql.execution.gluten.ConvertKylinFileSourceToGlutenRule
kylin.storage.columnar.spark-conf.spark.gluten.sql.columnar.extended.expressions.transformer=org.apache.spark.sql.catalyst.expressions.gluten.CustomerExpressionTransformer
+kylin.storage.columnar.spark-conf.spark.gluten.sql.columnar.backend.ch.runtime_settings.use_excel_serialization=true
## pageIndex
kylin.storage.columnar.spark-conf.spark.gluten.sql.columnar.backend.ch.runtime_config.use_local_format=true
@@ -532,6 +533,7 @@
kylin.engine.spark-conf.spark.gluten.sql.columnar.backend.ch.runtime_config.path
kylin.engine.spark-conf.spark.gluten.sql.columnar.backend.ch.runtime_config.tmp_path=/tmp/kyligence_glt/tmp_ch
kylin.engine.spark-conf.spark.gluten.sql.columnar.extended.columnar.pre.rules=org.apache.spark.sql.execution.gluten.ConvertKylinFileSourceToGlutenRule
kylin.engine.spark-conf.spark.gluten.sql.columnar.extended.expressions.transformer=org.apache.spark.sql.catalyst.expressions.gluten.CustomerExpressionTransformer
+kylin.engine.spark-conf.spark.gluten.sql.columnar.backend.ch.runtime_settings.use_excel_serialization=true
## on yarn
kylin.engine.spark-conf.spark.gluten.sql.columnar.backend.ch.runtime_config.hdfs.libhdfs3_conf=/etc/hadoop/conf/hdfs-site.xml
kylin.engine.spark-conf.spark.gluten.sql.columnar.executor.libpath=libch.so
diff --git
a/src/spark-project/spark-common/src/test/java/org/apache/spark/sql/common/GlutenTestConfig.scala
b/src/spark-project/spark-common/src/test/java/org/apache/spark/sql/common/GlutenTestConfig.scala
index cf9183e390..47bc43014b 100644
---
a/src/spark-project/spark-common/src/test/java/org/apache/spark/sql/common/GlutenTestConfig.scala
+++
b/src/spark-project/spark-common/src/test/java/org/apache/spark/sql/common/GlutenTestConfig.scala
@@ -67,22 +67,23 @@ object GlutenTestConfig extends Logging {
conf.set("spark.databricks.delta.stalenessLimit", "3600000")
conf.set("spark.gluten.sql.columnar.backend.ch.worker.id", "1")
conf.set("spark.gluten.sql.columnar.coalesce.batches", "false")
- conf.set("spark.gluten.sql.columnar.backend.ch.runtime_conf.logger.level",
"error")
+
conf.set("spark.gluten.sql.columnar.backend.ch.runtime_config.logger.level",
"error")
conf.set("spark.io.compression.codec", "LZ4")
conf.set("spark.gluten.sql.columnar.shuffle.customizedCompression.codec",
"LZ4")
conf.set("spark.gluten.sql.columnar.backend.ch.customized.shuffle.codec.enable",
"true")
conf.set("spark.gluten.sql.columnar.backend.ch.customized.buffer.size",
"4096")
conf.set("spark.gluten.sql.columnar.backend.ch.files.per.partition.threshold",
"5")
-
conf.set("spark.gluten.sql.columnar.backend.ch.runtime_conf.enable_nullable",
"true")
+
conf.set("spark.gluten.sql.columnar.backend.ch.runtime_config.enable_nullable",
"true")
conf.set(
-
"spark.gluten.sql.columnar.backend.ch.runtime_conf.local_engine.settings.metrics_perf_events_enabled",
"false")
+
"spark.gluten.sql.columnar.backend.ch.runtime_config.local_engine.settings.metrics_perf_events_enabled",
"false")
conf.set("spark.gluten.sql.columnar.backend.ch.runtime_config.logger.level",
"ERROR")
conf.set(
-
"spark.gluten.sql.columnar.backend.ch.runtime_conf.local_engine.settings.max_bytes_before_external_group_by",
+
"spark.gluten.sql.columnar.backend.ch.runtime_config.local_engine.settings.max_bytes_before_external_group_by",
"5000000000")
conf.set("spark.gluten.sql.columnar.maxBatchSize", "32768")
conf.set("spark.gluten.sql.columnar.backend.ch.shuffle.hash.algorithm",
"sparkMurmurHash3_32")
conf.set("spark.gluten.sql.columnar.backend.ch.runtime_config.use_local_format",
"true")
+
conf.set("spark.gluten.sql.columnar.backend.ch.runtime_settings.use_excel_serialization",
"true")
}
}