This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new dc3d703210 [GLUTEN-1632][CH]Daily Update Clickhouse Version (20250604)
(#9856)
dc3d703210 is described below
commit dc3d70321083fff8138f7cad030ba067f6ce30db
Author: Kyligence Git <[email protected]>
AuthorDate: Wed Jun 4 02:14:28 2025 -0500
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20250604) (#9856)
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20250604)
* Fix Build due to https://github.com/ClickHouse/ClickHouse/pull/79649
---------
Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang chen <[email protected]>
---
cpp-ch/clickhouse.version | 4 ++--
cpp-ch/local-engine/Storages/Parquet/ParquetMeta.cpp | 4 +++-
cpp-ch/local-engine/Storages/Parquet/ParquetMeta.h | 2 ++
.../Storages/Parquet/VectorizedParquetRecordReader.cpp | 9 +++++----
.../local-engine/Storages/SubstraitSource/ParquetFormatFile.cpp | 1 +
cpp-ch/local-engine/tests/benchmark_parquet_read.cpp | 2 ++
cpp-ch/local-engine/tests/gtest_parquet_read.cpp | 1 +
cpp-ch/local-engine/tests/gtest_parquet_write.cpp | 2 +-
8 files changed, 17 insertions(+), 8 deletions(-)
diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index b40f883e34..13bc986ba6 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20250529
-CH_COMMIT=2f645f377ac
+CH_BRANCH=rebase_ch/20250604
+CH_COMMIT=549ef373744
diff --git a/cpp-ch/local-engine/Storages/Parquet/ParquetMeta.cpp
b/cpp-ch/local-engine/Storages/Parquet/ParquetMeta.cpp
index cd3bc58e89..22c8e66c01 100644
--- a/cpp-ch/local-engine/Storages/Parquet/ParquetMeta.cpp
+++ b/cpp-ch/local-engine/Storages/Parquet/ParquetMeta.cpp
@@ -57,6 +57,7 @@ Block ParquetMetaBuilder::collectFileSchema(const ContextPtr
& context, ReadBuff
FormatSettings format_settings = getFormatSettings(context);
ParquetMetaBuilder metaBuilder{
+ .format_settings = format_settings,
.case_insensitive =
format_settings.parquet.case_insensitive_column_matching,
.allow_missing_columns = false,
.collectPageIndex = false,
@@ -110,7 +111,8 @@ ParquetMetaBuilder & ParquetMetaBuilder::buildSchema(const
parquet::FileMetaData
std::shared_ptr<arrow::Schema> schema;
THROW_ARROW_NOT_OK(parquet::arrow::FromParquetSchema(file_meta.schema(),
&schema));
- fileHeader = ArrowColumnToCHColumn::arrowSchemaToCHHeader(*schema,
file_meta.key_value_metadata(), "Parquet", false, true);
+ fileHeader = ArrowColumnToCHColumn::arrowSchemaToCHHeader(
+ *schema, file_meta.key_value_metadata(), "Parquet",
format_settings, false, true);
}
return *this;
}
diff --git a/cpp-ch/local-engine/Storages/Parquet/ParquetMeta.h
b/cpp-ch/local-engine/Storages/Parquet/ParquetMeta.h
index 35abf5fe82..5668d5444a 100644
--- a/cpp-ch/local-engine/Storages/Parquet/ParquetMeta.h
+++ b/cpp-ch/local-engine/Storages/Parquet/ParquetMeta.h
@@ -17,6 +17,7 @@
#pragma once
#include <Core/Block.h>
+#include <Formats/FormatSettings.h>
#include <Storages/Parquet/ColumnIndexFilter.h>
#include <Storages/Parquet/RowRanges.h>
#include <base/types.h>
@@ -40,6 +41,7 @@ struct RowGroupInformation
struct ParquetMetaBuilder
{
+ DB::FormatSettings format_settings;
// control flag
bool case_insensitive = false;
bool allow_missing_columns = false;
diff --git
a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
index ace4abb2fd..1662245ef3 100644
--- a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
+++ b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
@@ -181,10 +181,11 @@
VectorizedParquetRecordReader::VectorizedParquetRecordReader(const DB::Block & h
, arrow_column_to_ch_column_(
parquet_header_,
"Parquet",
- format_settings.parquet.allow_missing_columns,
- format_settings.null_as_default,
- format_settings.date_time_overflow_behavior,
- format_settings.parquet.case_insensitive_column_matching)
+ format_settings_,
+ format_settings_.parquet.allow_missing_columns,
+ format_settings_.null_as_default,
+ format_settings_.date_time_overflow_behavior,
+ format_settings_.parquet.case_insensitive_column_matching)
{
}
diff --git a/cpp-ch/local-engine/Storages/SubstraitSource/ParquetFormatFile.cpp
b/cpp-ch/local-engine/Storages/SubstraitSource/ParquetFormatFile.cpp
index 7c81dd8494..a4ccfa93b7 100644
--- a/cpp-ch/local-engine/Storages/SubstraitSource/ParquetFormatFile.cpp
+++ b/cpp-ch/local-engine/Storages/SubstraitSource/ParquetFormatFile.cpp
@@ -154,6 +154,7 @@ FormatFile::InputFormatPtr
ParquetFormatFile::createInputFormat(const Block & he
Block read_header =
DeltaVirtualMeta::removeMetaColumns(removeMetaColumns(header));
ParquetMetaBuilder metaBuilder{
+ .format_settings = format_settings,
.collectPageIndex = usePageIndexReader || readRowIndex,
.collectSkipRowGroup = !usePageIndexReader,
.case_insensitive =
format_settings.parquet.case_insensitive_column_matching,
diff --git a/cpp-ch/local-engine/tests/benchmark_parquet_read.cpp
b/cpp-ch/local-engine/tests/benchmark_parquet_read.cpp
index a1b72dba51..2b56f44622 100644
--- a/cpp-ch/local-engine/tests/benchmark_parquet_read.cpp
+++ b/cpp-ch/local-engine/tests/benchmark_parquet_read.cpp
@@ -56,6 +56,7 @@ void BM_ColumnIndexRead_NoFilter(benchmark::State & state)
for (auto _ : state)
{
local_engine::ParquetMetaBuilder metaBuilder{
+ .format_settings = format_settings,
.collectPageIndex = true,
.collectSkipRowGroup = false,
.case_insensitive =
format_settings.parquet.case_insensitive_column_matching,
@@ -299,6 +300,7 @@ std::pair<size_t, int64_t>
calculateRowsAndDeleteCount(benchmark::State & state,
FormatSettings format_settings;
ParquetMetaBuilder metaBuilder{
+ .format_settings = format_settings,
.case_insensitive =
format_settings.parquet.case_insensitive_column_matching,
.allow_missing_columns =
format_settings.parquet.allow_missing_columns};
ReadBufferFromFilePRead fileReader(file_path);
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
b/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
index 88644d4b18..57b6b86ae0 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
@@ -361,6 +361,7 @@ TEST(ParquetRead, ArrowRead)
ArrowColumnToCHColumn converter(
header,
"Parquet",
+ format_settings,
format_settings.parquet.allow_missing_columns,
format_settings.null_as_default,
format_settings.date_time_overflow_behavior,
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
index 18f86a9ba6..6e31f48157 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
@@ -209,7 +209,7 @@ TEST(ParquetWrite, ComplexTypes)
ch2arrow.chChunkToArrowTable(arrow_table, input_chunks, header.columns());
/// Convert Arrow Table to CH Block
- ArrowColumnToCHColumn arrow2ch(header, "Parquet", true, true,
FormatSettings::DateTimeOverflowBehavior::Ignore, false);
+ ArrowColumnToCHColumn arrow2ch(header, "Parquet", format_settings, true,
true, FormatSettings::DateTimeOverflowBehavior::Ignore, false);
Chunk output_chunk = arrow2ch.arrowTableToCHChunk(arrow_table,
arrow_table->num_rows(), nullptr, nullptr);
/// Compare input and output columns
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]