HappenLee commented on code in PR #12970:
URL: https://github.com/apache/doris/pull/12970#discussion_r981024664


##########
be/src/vec/runtime/vparquet_writer.cpp:
##########
@@ -173,22 +178,27 @@ Status VParquetWriterWrapper::write(const Block& block) {
     try {
         for (size_t i = 0; i < block.columns(); i++) {
             auto& raw_column = block.get_by_position(i).column;
-            const auto col = raw_column->is_nullable()
-                                     ? reinterpret_cast<const ColumnNullable*>(
-                                               
block.get_by_position(i).column.get())
-                                               ->get_nested_column_ptr()
-                                               .get()
-                                     : block.get_by_position(i).column.get();
-            auto null_map =
-                    raw_column->is_nullable() && reinterpret_cast<const 
ColumnNullable*>(
-                                                         
block.get_by_position(i).column.get())
-                                                         
->get_null_map_column_ptr()
-                                                         ->has_null()
-                            ? reinterpret_cast<const ColumnNullable*>(
-                                      block.get_by_position(i).column.get())
-                                      ->get_null_map_column_ptr()
-                            : nullptr;
+            auto nullable = raw_column->is_nullable();
+            const auto col = nullable ? reinterpret_cast<const 
ColumnNullable*>(
+                                                
block.get_by_position(i).column.get())
+                                                ->get_nested_column_ptr()
+                                                .get()
+                                      : block.get_by_position(i).column.get();
+            auto null_map = nullable && reinterpret_cast<const 
ColumnNullable*>(
+                                                
block.get_by_position(i).column.get())
+                                                    ->has_null()
+                                    ? reinterpret_cast<const ColumnNullable*>(
+                                              
block.get_by_position(i).column.get())
+                                              ->get_null_map_column_ptr()
+                                    : nullptr;
             auto& type = block.get_by_position(i).type;
+
+            int16_t def_level[sz];
+            for (size_t def_index = 0; def_index < sz; def_index++) {

Review Comment:
   better use `std::fill` ?



##########
be/src/vec/runtime/vparquet_writer.cpp:
##########
@@ -215,58 +225,64 @@ Status VParquetWriterWrapper::write(const Block& block) {
                 parquet::RowGroupWriter* rgWriter = get_rg_writer();
                 parquet::Int32Writer* col_writer =
                         
static_cast<parquet::Int32Writer*>(rgWriter->column(i));
-                int32_t default_int32 = 0;
                 if (null_map != nullptr) {
+                    auto& null_data = assert_cast<const 
ColumnUInt8&>(*null_map).get_data();
                     if (const auto* nested_column =
                                 check_and_get_column<const 
ColumnVector<Int32>>(col)) {
                         for (size_t row_id = 0; row_id < sz; row_id++) {
-                            col_writer->WriteBatch(
-                                    1, nullptr, nullptr,
-                                    (*null_map)[row_id] != 0
-                                            ? &default_int32
-                                            : reinterpret_cast<const int32_t*>(
-                                                      
nested_column->get_data_at(row_id).data));
+                            if (null_data[row_id] != 0) {
+                                def_level[row_id] = 0;
+                            }
                         }
+                        col_writer->WriteBatch(sz, def_level, nullptr,
+                                               
nested_column->get_data().data());
                     } else if (const auto* int16_column =
                                        check_and_get_column<const 
ColumnVector<Int16>>(col)) {
                         for (size_t row_id = 0; row_id < sz; row_id++) {

Review Comment:
   seems not need to do the work of set `def_level`



##########
be/src/vec/runtime/vparquet_writer.cpp:
##########
@@ -215,58 +225,64 @@ Status VParquetWriterWrapper::write(const Block& block) {
                 parquet::RowGroupWriter* rgWriter = get_rg_writer();
                 parquet::Int32Writer* col_writer =
                         
static_cast<parquet::Int32Writer*>(rgWriter->column(i));
-                int32_t default_int32 = 0;
                 if (null_map != nullptr) {
+                    auto& null_data = assert_cast<const 
ColumnUInt8&>(*null_map).get_data();
                     if (const auto* nested_column =
                                 check_and_get_column<const 
ColumnVector<Int32>>(col)) {
                         for (size_t row_id = 0; row_id < sz; row_id++) {
-                            col_writer->WriteBatch(
-                                    1, nullptr, nullptr,
-                                    (*null_map)[row_id] != 0
-                                            ? &default_int32
-                                            : reinterpret_cast<const int32_t*>(
-                                                      
nested_column->get_data_at(row_id).data));
+                            if (null_data[row_id] != 0) {
+                                def_level[row_id] = 0;
+                            }
                         }
+                        col_writer->WriteBatch(sz, def_level, nullptr,
+                                               
nested_column->get_data().data());
                     } else if (const auto* int16_column =
                                        check_and_get_column<const 
ColumnVector<Int16>>(col)) {
                         for (size_t row_id = 0; row_id < sz; row_id++) {
+                            if (null_data[row_id] != 0) {
+                                def_level[row_id] = 0;
+                            }
+                        }
+                        for (size_t row_id = 0; row_id < sz; row_id++) {
+                            if (null_data[row_id] != 0) {
+                                single_def_level = 0;
+                            }
                             const int32_t tmp = 
int16_column->get_data()[row_id];
-                            col_writer->WriteBatch(
-                                    1, nullptr, nullptr,
-                                    (*null_map)[row_id] != 0
-                                            ? &default_int32
-                                            : reinterpret_cast<const 
int32_t*>(&tmp));
+                            col_writer->WriteBatch(1, &single_def_level, 
nullptr,

Review Comment:
   TODO: Maybe do pre alloc the `std::vector<int32>` may faster ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to