This is an automated email from the ASF dual-hosted git repository. yuanzhou pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 54465a9019 [CORE] Gen config also verify config as expected (#10259)
54465a9019 is described below
commit 54465a90193435f1247e4c2844783d90033793cd
Author: Kaifei Yi <[email protected]>
AuthorDate: Mon Jul 28 22:30:02 2025 +0800
[CORE] Gen config also verify config as expected (#10259)
#9572 added respected parquet configurations, which were not generated by
the configuration generation mechanism. surprisingly, the check mechanism
passed. this was because the check mechanism incorrectly handled the row count
check. this PR fixes this issue.
---
dev/gen_all_config_docs.sh | 0
docs/Configuration.md | 46 ----------------------
.../gluten/config/AllGlutenConfiguration.scala | 9 ++---
3 files changed, 4 insertions(+), 51 deletions(-)
diff --git a/dev/gen_all_config_docs.sh b/dev/gen_all_config_docs.sh
old mode 100644
new mode 100755
diff --git a/docs/Configuration.md b/docs/Configuration.md
index cdf809ed42..5313386cad 100644
--- a/docs/Configuration.md
+++ b/docs/Configuration.md
@@ -173,49 +173,3 @@ nav_order: 15
| spark.gluten.supported.scala.udfs
|| Supported scala udf names.
[...]
| spark.gluten.ui.enabled | true
| Whether to enable the gluten web UI, If true, attach the gluten UI
page to the Spark web UI.
[...]
-## Parquet write configurations
-| | parquet-mr default
| Spark default | Velox Default | Gluten Support |
-|---------------------------------------------------|
------------------------------------------ |---------------| -------------
|----------------|
-| -------------------Spark---------------- |
| | | |
-| spark.sql.parquet.outputTimestampType |
| int96 | | |
-| spark.sql.parquet.writeLegacyFormat |
| false | | |
-| -------------------Velox/Arrow---------------- |
| | | |
-| write_batch_size |
| | 1024 | Y (batch size) |
-| rowgroup_length |
| | 1M | |
-| compression_level |
| | 0 | |
-| page_index |
| | false | |
-| decimal_as_integer |
| | false | |
-| statistics_enabled |
| | false | |
-| -------------------parquet-mr---------------- |
| | | |
-| parquet.summary.metadata.level | all
| | | |
-| parquet.enable.summary-metadata | true
| | | |
-| parquet.block.size | 128m
| | | Y |
-| parquet.page.size | 1m
| | 1M | Y |
-| parquet.compression | uncompressed
| snappy | uncompressed | Y |
-| parquet.write.support.class |
org.apache.parquet.hadoop.api.WriteSupport | | |
|
-| parquet.enable.dictionary | true
| | true | Y |
-| parquet.dictionary.page.size | 1m
| | 1m | |
-| parquet.validation | false
| | | |
-| parquet.writer.version | PARQUET_1_0
| | PARQUET_2_6 | Y |
-| parquet.memory.pool.ratio | 0.95
| | | |
-| parquet.memory.min.chunk.size | 1m
| | | |
-| parquet.writer.max-padding | 8m
| | | |
-| parquet.page.size.row.check.min | 100
| | | |
-| parquet.page.size.row.check.max | 10000
| | | |
-| parquet.page.value.count.threshold | Integer.MAX_VALUE / 2
| | | |
-| parquet.page.size.check.estimate | true
| | | |
-| parquet.columnindex.truncate.length | 64
| | | |
-| parquet.statistics.truncate.length | 2147483647
| | | |
-| parquet.bloom.filter.enabled | false
| | | |
-| parquet.bloom.filter.adaptive.enabled | false
| | | |
-| parquet.bloom.filter.candidates.number | 5
| | | |
-| parquet.bloom.filter.expected.ndv |
| | | |
-| parquet.bloom.filter.fpp | 0.01
| | | |
-| parquet.bloom.filter.max.bytes | 1m
| | | |
-| parquet.decrypt.off-heap.buffer.enabled | false
| | | |
-| parquet.page.row.count.limit | 20000
| | | |
-| parquet.page.write-checksum.enabled | true
| | false | |
-| parquet.crypto.factory.class | None
| | | |
-| parquet.compression.codec.zstd.bufferPool.enabled | true
| | | |
-| parquet.compression.codec.zstd.level | 3
| | 0 | Y |
-| parquet.compression.codec.zstd.workers | 0
| | | |
diff --git
a/gluten-substrait/src/test/scala/org/apache/gluten/config/AllGlutenConfiguration.scala
b/gluten-substrait/src/test/scala/org/apache/gluten/config/AllGlutenConfiguration.scala
index fc85494d3e..d5afbe9c1d 100644
---
a/gluten-substrait/src/test/scala/org/apache/gluten/config/AllGlutenConfiguration.scala
+++
b/gluten-substrait/src/test/scala/org/apache/gluten/config/AllGlutenConfiguration.scala
@@ -191,7 +191,10 @@ object AllGlutenConfiguration {
s" Please regenerate it by running `${regenScript.stripMargin}`. "
} else ""
}
- var fileLineCount = 0
+ val fileLineCount = fileLinesIter.length
+ withClue(s"Line number is not expected. $regenerationHint") {
+ assertResult(expectedLinesIter.size)(fileLineCount)(prettifier, pos)
+ }
fileLinesIter.zipWithIndex
.zip(expectedLinesIter)
.foreach {
@@ -200,11 +203,7 @@ object AllGlutenConfiguration {
withClue(s"Line $lineNum is not expected. $regenerationHint") {
assertResult(expectedLine)(lineInFile)(prettifier, pos)
}
- fileLineCount = Math.max(lineNum, fileLineCount)
}
- withClue(s"Line number is not expected. $regenerationHint") {
- assertResult(expectedLinesIter.size)(fileLineCount)(prettifier, pos)
- }
} finally {
fileSource.close()
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]
