This is an automated email from the ASF dual-hosted git repository.

changchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 8e5f63c0c [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240409) 
(#5329)
8e5f63c0c is described below

commit 8e5f63c0c5524d8bc5da25c53f2a179fcd96725c
Author: Kyligence Git <[email protected]>
AuthorDate: Tue Apr 9 05:01:28 2024 -0500

    [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240409) (#5329)
    
    * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240409)
    
    * fix build due to https://github.com/ClickHouse/ClickHouse/pull/61984
    
    ---------
    
    Co-authored-by: kyligence-git <[email protected]>
    Co-authored-by: Chang Chen <[email protected]>
---
 cpp-ch/clickhouse.version                                 |  4 ++--
 .../Storages/Parquet/VectorizedParquetRecordReader.cpp    | 15 +++++----------
 .../Storages/Parquet/VectorizedParquetRecordReader.h      |  1 +
 cpp-ch/local-engine/tests/gtest_parquet_read.cpp          |  3 +--
 cpp-ch/local-engine/tests/gtest_parquet_write.cpp         |  3 +--
 5 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version
index c319d669d..7d185eec8 100644
--- a/cpp-ch/clickhouse.version
+++ b/cpp-ch/clickhouse.version
@@ -1,3 +1,3 @@
 CH_ORG=Kyligence
-CH_BRANCH=rebase_ch/20240407
-CH_COMMIT=b89fd4d532b
\ No newline at end of file
+CH_BRANCH=rebase_ch/20240409
+CH_COMMIT=9f58d706e23
\ No newline at end of file
diff --git 
a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp 
b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
index 2e697b959..4f4e88aa7 100644
--- a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
+++ b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.cpp
@@ -234,20 +234,15 @@ DB::Chunk VectorizedParquetRecordReader::nextBatch()
 {
     assert(initialized());
     ::arrow::ChunkedArrayVector columns(column_readers_.size());
-    DB::ArrowColumnToCHColumn::NameToColumnPtr name_to_column_ptr;
+    DB::ArrowColumnToCHColumn::NameToArrowColumn name_to_column_ptr;
     for (auto & vectorized_column_reader : column_readers_)
     {
-        const std::shared_ptr<arrow::ChunkedArray> arrow_column
-            = 
vectorized_column_reader.readBatch(format_settings_.parquet.max_block_size);
-        
name_to_column_ptr[lowerColumnNameIfNeed(vectorized_column_reader.columnName(), 
format_settings_)] = arrow_column;
+        
name_to_column_ptr[lowerColumnNameIfNeed(vectorized_column_reader.columnName(), 
format_settings_)]
+            = 
{vectorized_column_reader.readBatch(format_settings_.parquet.max_block_size), 
vectorized_column_reader.arrowField()};
     }
 
-    if (const size_t num_rows = name_to_column_ptr.begin()->second->length(); 
num_rows > 0)
-    {
-        DB::Chunk result;
-        arrow_column_to_ch_column_.arrowColumnsToCHChunk(result, 
name_to_column_ptr, num_rows, nullptr);
-        return result;
-    }
+    if (const size_t num_rows = 
name_to_column_ptr.begin()->second.column->length(); num_rows > 0)
+        return 
arrow_column_to_ch_column_.arrowColumnsToCHChunk(name_to_column_ptr, num_rows, 
nullptr);
     return {};
 }
 
diff --git 
a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.h 
b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.h
index 88338887b..a9c796a75 100644
--- a/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.h
+++ b/cpp-ch/local-engine/Storages/Parquet/VectorizedParquetRecordReader.h
@@ -188,6 +188,7 @@ class VectorizedColumnReader
 public:
     VectorizedColumnReader(const parquet::arrow::SchemaField & field, 
ParquetFileReaderExt * reader, const std::vector<Int32> & row_groups);
     const std::string & columnName() const { return arrow_field_->name(); }
+    std::shared_ptr<arrow::Field> arrowField() const { return arrow_field_; }
     bool hasMoreRead() const { return read_state_ && 
read_state_->hasMoreRead(); }
     std::shared_ptr<arrow::ChunkedArray> readBatch(int64_t batch_size);
 };
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_read.cpp 
b/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
index 0414ece6d..94f28763e 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_read.cpp
@@ -344,8 +344,7 @@ TEST(ParquetRead, ArrowRead)
         format_settings.date_time_overflow_behavior,
         format_settings.parquet.case_insensitive_column_matching);
 
-    Chunk chunk;
-    converter.arrowTableToCHChunk(chunk, table, table->num_rows());
+    Chunk chunk = converter.arrowTableToCHChunk(table, table->num_rows());
     Block res = header.cloneWithColumns(chunk.detachColumns());
     EXPECT_EQ(res.rows(), 20);
 
diff --git a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp 
b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
index 8b97d22d2..28cc4010d 100644
--- a/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
+++ b/cpp-ch/local-engine/tests/gtest_parquet_write.cpp
@@ -210,8 +210,7 @@ TEST(ParquetWrite, ComplexTypes)
 
     /// Convert Arrow Table to CH Block
     ArrowColumnToCHColumn arrow2ch(header, "Parquet", true, true, 
FormatSettings::DateTimeOverflowBehavior::Ignore);
-    Chunk output_chunk;
-    arrow2ch.arrowTableToCHChunk(output_chunk, arrow_table, 
arrow_table->num_rows());
+    Chunk output_chunk = arrow2ch.arrowTableToCHChunk(arrow_table, 
arrow_table->num_rows());
 
     /// Compare input and output columns
     const auto & input_columns = input_chunks.back().getColumns();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to