This is an automated email from the ASF dual-hosted git repository.
changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 8e5f63c0c [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240409)
(#5329)
8e5f63c0c is described below
commit 8e5f63c0c5524d8bc5da25c53f2a179fcd96725c
Author: Kyligence Git <[email protected]>
AuthorDate: Tue Apr 9 05:01:28 2024 -0500
[GLUTEN-1632][CH]Daily Update Clickhouse Version (20240409) (#5329)
* [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240409)
* fix build due to https://github.com/ClickHouse/ClickHouse/pull/61984
---------
Co-authored-by: kyligence-git <[email protected]>
Co-authored-by: Chang Chen <[email protected]>
---
cpp-ch/clickhouse.version | 4 ++--
.../Storages/Parquet/VectorizedParquetRecordReader.cpp | 15 +++++----------
.../Storages/Parquet/VectorizedParquetRecordReader.h | 1 +
cpp-ch/local-engine/tests/gtest_parquet_read.cpp | 3 +--
cpp-ch/local-engine/tests/gtest_parquet_write.cpp | 3 +--
5 files changed, 10 insertions(+), 16 deletions(-)
diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index c319d669d..7d185eec8 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20240407
-CH_COMMIT=b89fd4d532b
\ No newline at end of file
+CH_BRANCH=rebase_ch/20240409
+CH_COMMIT=9f58d706e23
\ No newline at end of file
diff --git
a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
index 2e697b959..4f4e88aa7 100644
--- a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
+++ b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
@@ -234,20 +234,15 @@ DB::Chunk VectorizedParquetRecordReader::nextBatch()
{
assert(initialized());
::arrow::ChunkedArrayVector columns(column_readers_.size());
- DB::ArrowColumnToCHColumn::NameToColumnPtr name_to_column_ptr;
+ DB::ArrowColumnToCHColumn::NameToArrowColumn name_to_column_ptr;
for (auto & vectorized_column_reader : column_readers_)
{
- const std::shared_ptr<arrow::ChunkedArray> arrow_column
- =
vectorized_column_reader.readBatch(format_settings_.parquet.max_block_size);
-
name_to_column_ptr[lowerColumnNameIfNeed(vectorized_column_reader.columnName(),
format_settings_)] = arrow_column;
+
name_to_column_ptr[lowerColumnNameIfNeed(vectorized_column_reader.columnName(),
format_settings_)]
+ =
{vectorized_column_reader.readBatch(format_settings_.parquet.max_block_size),
vectorized_column_reader.arrowField()};
}
- if (const size_t num_rows = name_to_column_ptr.begin()->second->length();
num_rows > 0)
- {
- DB::Chunk result;
- arrow_column_to_ch_column_.arrowColumnsToCHChunk(result,
name_to_column_ptr, num_rows, nullptr);
- return result;
- }
+ if (const size_t num_rows =
name_to_column_ptr.begin()->second.column->length(); num_rows > 0)
+ return
arrow_column_to_ch_column_.arrowColumnsToCHChunk(name_to_column_ptr, num_rows,
nullptr);
return {};
}
diff --git
a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.h
b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.h
index 88338887b..a9c796a75 100644
--- a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.h
+++ b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.h
@@ -188,6 +188,7 @@ class VectorizedColumnReader
public:
VectorizedColumnReader(const parquet::arrow::SchemaField & field,
ParquetFileReaderExt * reader, const std::vector<Int32> & row_groups);
const std::string & columnName() const { return arrow_field_->name(); }
+ std::shared_ptr<arrow::Field> arrowField() const { return arrow_field_; }
bool hasMoreRead() const { return read_state_ &&
read_state_->hasMoreRead(); }
std::shared_ptr<arrow::ChunkedArray> readBatch(int64_t batch_size);
};
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
b/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
index 0414ece6d..94f28763e 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
@@ -344,8 +344,7 @@ TEST(ParquetRead, ArrowRead)
format_settings.date_time_overflow_behavior,
format_settings.parquet.case_insensitive_column_matching);
- Chunk chunk;
- converter.arrowTableToCHChunk(chunk, table, table->num_rows());
+ Chunk chunk = converter.arrowTableToCHChunk(table, table->num_rows());
Block res = header.cloneWithColumns(chunk.detachColumns());
EXPECT_EQ(res.rows(), 20);
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
index 8b97d22d2..28cc4010d 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
@@ -210,8 +210,7 @@ TEST(ParquetWrite, ComplexTypes)
/// Convert Arrow Table to CH Block
ArrowColumnToCHColumn arrow2ch(header, "Parquet", true, true,
FormatSettings::DateTimeOverflowBehavior::Ignore);
- Chunk output_chunk;
- arrow2ch.arrowTableToCHChunk(output_chunk, arrow_table,
arrow_table->num_rows());
+ Chunk output_chunk = arrow2ch.arrowTableToCHChunk(arrow_table,
arrow_table->num_rows());
/// Compare input and output columns
const auto & input_columns = input_chunks.back().getColumns();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]