morningman commented on code in PR #24350:
URL: https://github.com/apache/doris/pull/24350#discussion_r1329711757


##########
be/src/vec/runtime/vorc_transformer.cpp:
##########
@@ -419,207 +422,358 @@ Status VOrcTransformer::write(const Block& block) {
     try {
         for (size_t i = 0; i < block.columns(); i++) {
             auto& raw_column = block.get_by_position(i).column;
-            auto nullable = raw_column->is_nullable();
-            const auto col = nullable ? reinterpret_cast<const 
ColumnNullable*>(
-                                                
block.get_by_position(i).column.get())
-                                                ->get_nested_column_ptr()
-                                                .get()
-                                      : block.get_by_position(i).column.get();
-            auto null_map = nullable && reinterpret_cast<const 
ColumnNullable*>(
-                                                
block.get_by_position(i).column.get())
-                                                    ->has_null()
-                                    ? reinterpret_cast<const ColumnNullable*>(
-                                              
block.get_by_position(i).column.get())
-                                              ->get_null_map_column_ptr()
-                                    : nullptr;
-            switch (_output_vexpr_ctxs[i]->root()->type().type) {
-            case TYPE_BOOLEAN: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<UInt8>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_TINYINT: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<Int8>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_SMALLINT: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<Int16>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_INT: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<Int32>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_BIGINT: {
-                WRITE_CONTINUOUS_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<Int64>,
-                                                     Int64)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_LARGEINT: {
-                char* ptr = (char*)malloc(BUFFER_UNIT_SIZE);
-                bufferList[i].data = ptr;
-                bufferList[i].size = BUFFER_UNIT_SIZE;
-                size_t offset = 0;
-                WRITE_LARGEINT_STRING_INTO_BATCH(orc::StringVectorBatch, 
ColumnVector<Int128>,
-                                                 bufferList[i])
-                SET_NUM_ELEMENTS;
-                break;
-            }
-            case TYPE_FLOAT: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::DoubleVectorBatch, 
ColumnVector<Float32>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_DOUBLE: {
-                WRITE_CONTINUOUS_ELEMENTS_INTO_BATCH(orc::DoubleVectorBatch, 
ColumnVector<Float64>,
-                                                     Float64)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_DATETIME:
-            case TYPE_DATE: {
-                char* ptr = (char*)malloc(BUFFER_UNIT_SIZE);
-                bufferList[i].data = ptr;
-                bufferList[i].size = BUFFER_UNIT_SIZE;
-                size_t offset = 0;
-                WRITE_DATE_STRING_INTO_BATCH(Int64, VecDateTimeValue, 
bufferList[i])
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_DATEV2: {
-                char* ptr = (char*)malloc(BUFFER_UNIT_SIZE);
-                bufferList[i].data = ptr;
-                bufferList[i].size = BUFFER_UNIT_SIZE;
-                size_t offset = 0;
-                WRITE_DATE_STRING_INTO_BATCH(UInt32, 
DateV2Value<DateV2ValueType>, bufferList[i])
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_DATETIMEV2: {
-                char* ptr = (char*)malloc(BUFFER_UNIT_SIZE);
-                bufferList[i].data = ptr;
-                bufferList[i].size = BUFFER_UNIT_SIZE;
-                size_t offset = 0;
-                WRITE_DATETIMEV2_STRING_INTO_BATCH(UInt64, 
DateV2Value<DateTimeV2ValueType>,
-                                                   bufferList[i])
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_OBJECT: {
-                if (_output_object_data) {
-                    WRITE_COMPLEX_TYPE_INTO_BATCH(orc::StringVectorBatch, 
ColumnBitmap)
-                    SET_NUM_ELEMENTS
+            _write_one_col(_output_vexpr_ctxs[i]->root()->type(), 
root->fields[i], raw_column, 0,
+                           sz, &bufferList);
+        }
+    } catch (const std::exception& e) {
+        LOG(WARNING) << "Orc write error: " << e.what();
+        return Status::InternalError(e.what());
+    }
+    root->numElements = sz;
+    _writer->add(*row_batch);
+    _cur_written_rows += sz;
+
+    return Status::OK();
+}
+
+Status VOrcTransformer::_write_one_col(const TypeDescriptor type_descriptor,

Review Comment:
   ```suggestion
   Status VOrcTransformer::_write_one_col(const TypeDescriptor& type_descriptor,
   ```



##########
be/src/vec/runtime/vorc_transformer.cpp:
##########
@@ -419,207 +422,358 @@ Status VOrcTransformer::write(const Block& block) {
     try {
         for (size_t i = 0; i < block.columns(); i++) {
             auto& raw_column = block.get_by_position(i).column;
-            auto nullable = raw_column->is_nullable();
-            const auto col = nullable ? reinterpret_cast<const 
ColumnNullable*>(
-                                                
block.get_by_position(i).column.get())
-                                                ->get_nested_column_ptr()
-                                                .get()
-                                      : block.get_by_position(i).column.get();
-            auto null_map = nullable && reinterpret_cast<const 
ColumnNullable*>(
-                                                
block.get_by_position(i).column.get())
-                                                    ->has_null()
-                                    ? reinterpret_cast<const ColumnNullable*>(
-                                              
block.get_by_position(i).column.get())
-                                              ->get_null_map_column_ptr()
-                                    : nullptr;
-            switch (_output_vexpr_ctxs[i]->root()->type().type) {
-            case TYPE_BOOLEAN: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<UInt8>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_TINYINT: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<Int8>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_SMALLINT: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<Int16>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_INT: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<Int32>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_BIGINT: {
-                WRITE_CONTINUOUS_ELEMENTS_INTO_BATCH(orc::LongVectorBatch, 
ColumnVector<Int64>,
-                                                     Int64)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_LARGEINT: {
-                char* ptr = (char*)malloc(BUFFER_UNIT_SIZE);
-                bufferList[i].data = ptr;
-                bufferList[i].size = BUFFER_UNIT_SIZE;
-                size_t offset = 0;
-                WRITE_LARGEINT_STRING_INTO_BATCH(orc::StringVectorBatch, 
ColumnVector<Int128>,
-                                                 bufferList[i])
-                SET_NUM_ELEMENTS;
-                break;
-            }
-            case TYPE_FLOAT: {
-                WRITE_SINGLE_ELEMENTS_INTO_BATCH(orc::DoubleVectorBatch, 
ColumnVector<Float32>)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_DOUBLE: {
-                WRITE_CONTINUOUS_ELEMENTS_INTO_BATCH(orc::DoubleVectorBatch, 
ColumnVector<Float64>,
-                                                     Float64)
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_DATETIME:
-            case TYPE_DATE: {
-                char* ptr = (char*)malloc(BUFFER_UNIT_SIZE);
-                bufferList[i].data = ptr;
-                bufferList[i].size = BUFFER_UNIT_SIZE;
-                size_t offset = 0;
-                WRITE_DATE_STRING_INTO_BATCH(Int64, VecDateTimeValue, 
bufferList[i])
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_DATEV2: {
-                char* ptr = (char*)malloc(BUFFER_UNIT_SIZE);
-                bufferList[i].data = ptr;
-                bufferList[i].size = BUFFER_UNIT_SIZE;
-                size_t offset = 0;
-                WRITE_DATE_STRING_INTO_BATCH(UInt32, 
DateV2Value<DateV2ValueType>, bufferList[i])
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_DATETIMEV2: {
-                char* ptr = (char*)malloc(BUFFER_UNIT_SIZE);
-                bufferList[i].data = ptr;
-                bufferList[i].size = BUFFER_UNIT_SIZE;
-                size_t offset = 0;
-                WRITE_DATETIMEV2_STRING_INTO_BATCH(UInt64, 
DateV2Value<DateTimeV2ValueType>,
-                                                   bufferList[i])
-                SET_NUM_ELEMENTS
-                break;
-            }
-            case TYPE_OBJECT: {
-                if (_output_object_data) {
-                    WRITE_COMPLEX_TYPE_INTO_BATCH(orc::StringVectorBatch, 
ColumnBitmap)
-                    SET_NUM_ELEMENTS
+            _write_one_col(_output_vexpr_ctxs[i]->root()->type(), 
root->fields[i], raw_column, 0,
+                           sz, &bufferList);
+        }
+    } catch (const std::exception& e) {
+        LOG(WARNING) << "Orc write error: " << e.what();
+        return Status::InternalError(e.what());
+    }
+    root->numElements = sz;
+    _writer->add(*row_batch);
+    _cur_written_rows += sz;
+
+    return Status::OK();
+}
+
+Status VOrcTransformer::_write_one_col(const TypeDescriptor type_descriptor,
+                                       orc::ColumnVectorBatch* orc_col_batch,
+                                       const ColumnPtr& raw_column, size_t 
start_row_id,
+                                       size_t end_row_id, 
std::vector<StringRef>* bufferList) {

Review Comment:
   ```suggestion
                                          size_t end_row_id, 
std::vector<StringRef>* buffer_list) {
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to