This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new e13ce905cff [Fix](hive-writer) Fix hive partition update file size and
remove redundant column names. (#34651) (#34885)
e13ce905cff is described below
commit e13ce905cff81e30d790f774ec14ab168a29a6bf
Author: Qi Chen <[email protected]>
AuthorDate: Wed May 15 11:23:32 2024 +0800
[Fix](hive-writer) Fix hive partition update file size and remove redundant
column names. (#34651) (#34885)
Backport #34651.
---
be/src/vec/sink/writer/vhive_partition_writer.cpp | 7 ++++---
be/src/vec/sink/writer/vhive_partition_writer.h | 1 -
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/be/src/vec/sink/writer/vhive_partition_writer.cpp
b/be/src/vec/sink/writer/vhive_partition_writer.cpp
index e9e816219b6..f88a695b3bf 100644
--- a/be/src/vec/sink/writer/vhive_partition_writer.cpp
+++ b/be/src/vec/sink/writer/vhive_partition_writer.cpp
@@ -66,7 +66,9 @@ Status VHivePartitionWriter::open(RuntimeState* state,
RuntimeProfile* profile)
std::vector<std::string> column_names;
column_names.reserve(_columns.size());
for (int i = 0; i < _columns.size(); i++) {
- column_names.emplace_back(_columns[i].name);
+ if (_non_write_columns_indices.find(i) ==
_non_write_columns_indices.end()) {
+ column_names.emplace_back(_columns[i].name);
+ }
}
switch (_file_format_type) {
@@ -157,7 +159,6 @@ Status VHivePartitionWriter::write(vectorized::Block&
block, vectorized::IColumn
RETURN_IF_ERROR(_projection_and_filter_block(block, filter,
&output_block));
RETURN_IF_ERROR(_file_format_transformer->write(output_block));
_row_count += output_block.rows();
- _input_size_in_bytes += output_block.bytes();
return Status::OK();
}
@@ -200,7 +201,7 @@ THivePartitionUpdate
VHivePartitionWriter::_build_partition_update() {
hive_partition_update.__set_location(location);
hive_partition_update.__set_file_names({_get_target_file_name()});
hive_partition_update.__set_row_count(_row_count);
- hive_partition_update.__set_file_size(_input_size_in_bytes);
+
hive_partition_update.__set_file_size(_file_format_transformer->written_len());
if (_write_info.file_type == TFileType::FILE_S3) {
doris::io::S3FileWriter* s3_mpu_file_writer =
diff --git a/be/src/vec/sink/writer/vhive_partition_writer.h
b/be/src/vec/sink/writer/vhive_partition_writer.h
index e4fc2ebc24b..912ac8b1e49 100644
--- a/be/src/vec/sink/writer/vhive_partition_writer.h
+++ b/be/src/vec/sink/writer/vhive_partition_writer.h
@@ -88,7 +88,6 @@ private:
TUpdateMode::type _update_mode;
size_t _row_count = 0;
- size_t _input_size_in_bytes = 0;
const VExprContextSPtrs& _vec_output_expr_ctxs;
const VExprContextSPtrs& _write_output_expr_ctxs;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]