This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new f211eb58c7f [fix](ubsan) reinterpret_cast fix length types to int8 is
not safe (#35912)
f211eb58c7f is described below
commit f211eb58c7f825fda220e35e416849bd243474cf
Author: Ashin Gau <[email protected]>
AuthorDate: Sun Jun 9 21:53:03 2024 +0800
[fix](ubsan) reinterpret_cast fix length types to int8 is not safe (#35912)
Fix type check of ubsan.
```
/root/doris/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h:75:78:
runtime error: member call on address 0x5582f35db5c0 which does not point to an
object of type 'doris::vectorized::ColumnVector<signed char>'
0x5582f35db5c0: note: object is of type
'doris::vectorized::ColumnVector<int>'
83 55 00 00 78 c0 b0 5a 82 55 00 00 02 00 00 00 00 00 00 00 10 a0 00 d7
83 55 00 00 10 a0 00 d7
^~~~~~~~~~~~~~~~~~~~~~~
vptr for 'doris::vectorized::ColumnVector<int>'
doris::Status
doris::vectorized::FixLengthPlainDecoder::_decode_values<false>(COW<doris::vectorized::IColumn>::mutable_ptr<doris::vectorized::IColumn>&,
std::shared_ptr<doris::vectorized::IDataType const>&,
doris::vectorized::ColumnSelectVector&, bool) at
fix_length_plain_decoder.h:75:78
```
---
.../vec/exec/format/parquet/fix_length_dict_decoder.hpp | 17 ++++++++++-------
.../vec/exec/format/parquet/fix_length_plain_decoder.h | 13 +++++++------
2 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
index 115ca68bc1e..65e329ae89b 100644
--- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
+++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
@@ -60,23 +60,26 @@ public:
return _decode_dict_values<has_filter>(doris_column,
select_vector, is_dict_filter);
}
- return _decode_fixed_values<has_filter>(doris_column, select_vector);
+ return _decode_fixed_values<has_filter>(doris_column, data_type,
select_vector);
}
protected:
template <bool has_filter>
- Status _decode_fixed_values(MutableColumnPtr& doris_column,
ColumnSelectVector& select_vector) {
- auto& column_data =
reinterpret_cast<ColumnVector<Int8>&>(*doris_column).get_data();
- size_t data_index = column_data.size();
- column_data.resize(data_index + _type_length *
(select_vector.num_values() -
-
select_vector.num_filtered()));
+ Status _decode_fixed_values(MutableColumnPtr& doris_column, DataTypePtr&
data_type,
+ ColumnSelectVector& select_vector) {
+ size_t primitive_length =
remove_nullable(data_type)->get_size_of_value_in_memory();
+ size_t data_index = doris_column->size() * primitive_length;
+ size_t scale_size = (select_vector.num_values() -
select_vector.num_filtered()) *
+ (_type_length / primitive_length);
+ doris_column->resize(doris_column->size() + scale_size);
+ char* raw_data = const_cast<char*>(doris_column->get_raw_data().data);
size_t dict_index = 0;
ColumnSelectVector::DataReadType read_type;
while (size_t run_length =
select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
case ColumnSelectVector::CONTENT: {
for (size_t i = 0; i < run_length; ++i) {
- memcpy(column_data.data() + data_index,
_dict_items[_indexes[dict_index++]],
+ memcpy(raw_data + data_index,
_dict_items[_indexes[dict_index++]],
_type_length);
data_index += _type_length;
}
diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
index 72cb283f3f9..40e4c54a822 100644
--- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
+++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.h
@@ -72,16 +72,17 @@ Status
FixLengthPlainDecoder::_decode_values(MutableColumnPtr& doris_column, Dat
return Status::IOError("Out-of-bounds access in parquet data decoder");
}
- auto& column_data =
reinterpret_cast<ColumnVector<Int8>&>(*doris_column).get_data();
- size_t data_index = column_data.size();
- column_data.resize(data_index +
- _type_length * (select_vector.num_values() -
select_vector.num_filtered()));
+ size_t primitive_length =
remove_nullable(data_type)->get_size_of_value_in_memory();
+ size_t data_index = doris_column->size() * primitive_length;
+ size_t scale_size = (select_vector.num_values() -
select_vector.num_filtered()) *
+ (_type_length / primitive_length);
+ doris_column->resize(doris_column->size() + scale_size);
+ char* raw_data = const_cast<char*>(doris_column->get_raw_data().data);
ColumnSelectVector::DataReadType read_type;
while (size_t run_length =
select_vector.get_next_run<has_filter>(&read_type)) {
switch (read_type) {
case ColumnSelectVector::CONTENT: {
- memcpy(column_data.data() + data_index, _data->data + _offset,
- run_length * _type_length);
+ memcpy(raw_data + data_index, _data->data + _offset, run_length *
_type_length);
_offset += run_length * _type_length;
data_index += run_length * _type_length;
break;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]