This is an automated email from the ASF dual-hosted git repository.
lihaopeng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8b4e572 ow num is more accurate than column num in data_types (#8628)
8b4e572 is described below
commit 8b4e57287fe0397086ad55be9d0ed7133add7e11
Author: dataroaring <[email protected]>
AuthorDate: Fri Mar 25 14:38:27 2022 +0800
ow num is more accurate than column num in data_types (#8628)
---
be/src/util/quantile_state.h | 4 +--
.../aggregate_function_stddev.h | 2 ++
be/src/vec/data_types/data_type_array.cpp | 12 ++++-----
be/src/vec/data_types/data_type_bitmap.cpp | 30 +++++++++++-----------
be/src/vec/data_types/data_type_decimal.cpp | 24 ++++++++---------
be/src/vec/data_types/data_type_hll.cpp | 30 +++++++++++-----------
be/src/vec/data_types/data_type_nullable.cpp | 14 +++++-----
be/src/vec/data_types/data_type_number_base.cpp | 22 ++++++++--------
be/src/vec/data_types/data_type_string.cpp | 14 +++++-----
9 files changed, 77 insertions(+), 75 deletions(-)
diff --git a/be/src/util/quantile_state.h b/be/src/util/quantile_state.h
index db618d5..ef1b13d 100644
--- a/be/src/util/quantile_state.h
+++ b/be/src/util/quantile_state.h
@@ -26,7 +26,7 @@
namespace doris {
-class Slice;
+struct Slice;
class TDigest;
const static int QUANTILE_STATE_EXPLICIT_NUM = 2048;
@@ -68,4 +68,4 @@ private:
} // namespace doris
-#endif // DORIS_BE_SRC_OLAP_QUANTILE_STATE_H
\ No newline at end of file
+#endif // DORIS_BE_SRC_OLAP_QUANTILE_STATE_H
diff --git a/be/src/vec/aggregate_functions/aggregate_function_stddev.h
b/be/src/vec/aggregate_functions/aggregate_function_stddev.h
index 50c4064..83c4041 100644
--- a/be/src/vec/aggregate_functions/aggregate_function_stddev.h
+++ b/be/src/vec/aggregate_functions/aggregate_function_stddev.h
@@ -28,6 +28,7 @@ namespace doris::vectorized {
template <typename T, bool is_stddev>
struct BaseData {
BaseData() : mean(0.0), m2(0.0), count(0) {}
+ virtual ~BaseData() {}
void write(BufferWritable& buf) const {
write_binary(mean, buf);
@@ -102,6 +103,7 @@ struct BaseData {
template <bool is_stddev>
struct BaseDatadecimal {
BaseDatadecimal() : mean(0), m2(0), count(0) {}
+ virtual ~BaseDatadecimal() {}
void write(BufferWritable& buf) const {
write_binary(mean, buf);
diff --git a/be/src/vec/data_types/data_type_array.cpp
b/be/src/vec/data_types/data_type_array.cpp
index b10c5ca..d7d9d6d 100644
--- a/be/src/vec/data_types/data_type_array.cpp
+++ b/be/src/vec/data_types/data_type_array.cpp
@@ -63,7 +63,7 @@ char* DataTypeArray::serialize(const IColumn& column, char*
buf) const {
auto ptr = column.convert_to_full_column_if_const();
const auto& data_column = assert_cast<const ColumnArray&>(*ptr.get());
- // column num
+ // row num
*reinterpret_cast<uint32_t*>(buf) = column.size();
buf += sizeof(IColumn::Offset);
// offsets
@@ -77,13 +77,13 @@ const char* DataTypeArray::deserialize(const char* buf,
IColumn* column) const {
auto* data_column = assert_cast<ColumnArray*>(column);
auto& offsets = data_column->get_offsets();
- // column num
- uint32_t column_num = *reinterpret_cast<const IColumn::Offset*>(buf);
+ // row num
+ uint32_t row_num = *reinterpret_cast<const IColumn::Offset*>(buf);
buf += sizeof(IColumn::Offset);
// offsets
- offsets.resize(column_num);
- memcpy(offsets.data(), buf, sizeof(IColumn::Offset) * column_num);
- buf += sizeof(IColumn::Offset) * column_num;
+ offsets.resize(row_num);
+ memcpy(offsets.data(), buf, sizeof(IColumn::Offset) * row_num);
+ buf += sizeof(IColumn::Offset) * row_num;
// children
return get_nested_type()->deserialize(buf,
data_column->get_data_ptr()->assume_mutable());
}
diff --git a/be/src/vec/data_types/data_type_bitmap.cpp
b/be/src/vec/data_types/data_type_bitmap.cpp
index 88a0837..97f34ac 100644
--- a/be/src/vec/data_types/data_type_bitmap.cpp
+++ b/be/src/vec/data_types/data_type_bitmap.cpp
@@ -24,7 +24,7 @@
namespace doris::vectorized {
// binary: <size array> | <bitmap array>
-// <size array>: column num | bitmap1 size | bitmap2 size | ...
+// <size array>: row num | bitmap1 size | bitmap2 size | ...
// <bitmap array>: bitmap1 | bitmap2 | ...
int64_t DataTypeBitMap::get_uncompressed_serialized_bytes(const IColumn&
column) const {
auto ptr = column.convert_to_full_column_if_const();
@@ -44,19 +44,19 @@ char* DataTypeBitMap::serialize(const IColumn& column,
char* buf) const {
auto ptr = column.convert_to_full_column_if_const();
auto& data_column = assert_cast<const ColumnBitmap&>(*ptr);
- // serialize the bitmap size array, column num saves at index 0
- const auto column_num = column.size();
- size_t bitmap_size_array[column_num + 1];
- bitmap_size_array[0] = column_num;
- for (size_t i = 0; i < column.size(); ++i) {
+ // serialize the bitmap size array, row num saves at index 0
+ const auto row_num = column.size();
+ size_t bitmap_size_array[row_num + 1];
+ bitmap_size_array[0] = row_num;
+ for (size_t i = 0; i < row_num; ++i) {
auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i));
bitmap_size_array[i + 1] = bitmap.getSizeInBytes();
}
- auto allocate_len_size = sizeof(size_t) * (column_num + 1);
+ auto allocate_len_size = sizeof(size_t) * (row_num + 1);
memcpy(buf, bitmap_size_array, allocate_len_size);
buf += allocate_len_size;
// serialize each bitmap
- for (size_t i = 0; i < column_num; ++i) {
+ for (size_t i = 0; i < row_num; ++i) {
auto& bitmap = const_cast<BitmapValue&>(data_column.get_element(i));
bitmap.write(buf);
buf += bitmap_size_array[i + 1];
@@ -70,18 +70,18 @@ const char* DataTypeBitMap::deserialize(const char* buf,
IColumn* column) const
auto& data = data_column.get_data();
// deserialize the bitmap size array
- size_t column_num = *reinterpret_cast<const size_t*>(buf);
+ size_t row_num = *reinterpret_cast<const size_t*>(buf);
buf += sizeof(size_t);
- size_t bitmap_size_array[column_num];
- memcpy(bitmap_size_array, buf, sizeof(size_t) * column_num);
- buf += sizeof(size_t) * column_num;
+ size_t bitmap_size_array[row_num];
+ memcpy(bitmap_size_array, buf, sizeof(size_t) * row_num);
+ buf += sizeof(size_t) * row_num;
// deserialize each bitmap
- data.resize(column_num);
- for (int i = 0; i < column_num ; ++i) {
+ data.resize(row_num);
+ for (int i = 0; i < row_num ; ++i) {
data[i].deserialize(buf);
buf += bitmap_size_array[i];
}
-
+
return buf;
}
diff --git a/be/src/vec/data_types/data_type_decimal.cpp
b/be/src/vec/data_types/data_type_decimal.cpp
index dee7955..8cd97a3 100644
--- a/be/src/vec/data_types/data_type_decimal.cpp
+++ b/be/src/vec/data_types/data_type_decimal.cpp
@@ -62,7 +62,7 @@ void DataTypeDecimal<T>::to_string(const IColumn& column,
size_t row_num,
ostr.write(str.data(), str.size());
}
-// binary: column_num | value1 | value2 | ...
+// binary: row_num | value1 | value2 | ...
template <typename T>
int64_t DataTypeDecimal<T>::get_uncompressed_serialized_bytes(const IColumn&
column) const {
return sizeof(uint32_t) + column.size() * sizeof(FieldType);
@@ -70,28 +70,28 @@ int64_t
DataTypeDecimal<T>::get_uncompressed_serialized_bytes(const IColumn& col
template <typename T>
char* DataTypeDecimal<T>::serialize(const IColumn& column, char* buf) const {
- // column num
- const auto column_num = column.size();
- *reinterpret_cast<uint32_t*>(buf) = column_num;
- buf += sizeof(uint32_t);
+ // row num
+ const auto row_num = column.size();
+ *reinterpret_cast<uint32_t*>(buf) = row_num;
+ buf += sizeof(uint32_t);
// column values
auto ptr = column.convert_to_full_column_if_const();
const auto* origin_data = assert_cast<const
ColumnType&>(*ptr.get()).get_data().data();
- memcpy(buf, origin_data, column_num * sizeof(FieldType));
- buf += column_num * sizeof(FieldType);
+ memcpy(buf, origin_data, row_num * sizeof(FieldType));
+ buf += row_num * sizeof(FieldType);
return buf;
}
template <typename T>
const char* DataTypeDecimal<T>::deserialize(const char* buf, IColumn* column)
const {
- // column num
- uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf);
+ // row num
+ uint32_t row_num = *reinterpret_cast<const uint32_t*>(buf);
buf += sizeof(uint32_t);
// column values
auto& container = assert_cast<ColumnType*>(column)->get_data();
- container.resize(column_num);
- memcpy(container.data(), buf, column_num * sizeof(FieldType));
- buf += column_num * sizeof(FieldType);
+ container.resize(row_num);
+ memcpy(container.data(), buf, row_num * sizeof(FieldType));
+ buf += row_num * sizeof(FieldType);
return buf;
}
diff --git a/be/src/vec/data_types/data_type_hll.cpp
b/be/src/vec/data_types/data_type_hll.cpp
index b9d6c5b..fde7a2a 100644
--- a/be/src/vec/data_types/data_type_hll.cpp
+++ b/be/src/vec/data_types/data_type_hll.cpp
@@ -23,22 +23,22 @@
namespace doris::vectorized {
-// Two part of binary: <column num > + <size array> | <hll data array>
-// first: column num | hll1 size | hll2 size | ...
+// Two part of binary: <row num > + <size array> | <hll data array>
+// first: row num | hll1 size | hll2 size | ...
// second: hll1 | hll2 | ...
char* DataTypeHLL::serialize(const IColumn& column, char* buf) const {
auto ptr = column.convert_to_full_column_if_const();
auto& data_column = assert_cast<const ColumnHLL&>(*ptr);
- size_t column_num = column.size();
- size_t hll_size_array[column_num + 1];
- hll_size_array[0] = column_num;
+ size_t row_num = column.size();
+ size_t hll_size_array[row_num + 1];
+ hll_size_array[0] = row_num;
- auto allocate_len_size = sizeof(size_t) * (column_num + 1);
+ auto allocate_len_size = sizeof(size_t) * (row_num + 1);
char* buf_start = buf;
buf += allocate_len_size;
- for (size_t i = 0; i < column_num; ++i) {
+ for (size_t i = 0; i < row_num; ++i) {
auto& hll = const_cast<HyperLogLog&>(data_column.get_element(i));
size_t actual_size = hll.serialize(reinterpret_cast<uint8_t*>(buf));
hll_size_array[i + 1] = actual_size;
@@ -49,21 +49,21 @@ char* DataTypeHLL::serialize(const IColumn& column, char*
buf) const {
return buf;
}
-// Two part of binary: <column num > + <size array> | <hll data array>
-// first: column num | hll1 size | hll2 size | ...
+// Two part of binary: <row num > + <size array> | <hll data array>
+// first: row num | hll1 size | hll2 size | ...
// second: hll1 | hll2 | ...
const char* DataTypeHLL::deserialize(const char* buf, IColumn* column) const {
auto& data_column = assert_cast<ColumnHLL&>(*column);
auto& data = data_column.get_data();
- size_t column_num = *reinterpret_cast<const size_t*>(buf);
+ size_t row_num = *reinterpret_cast<const size_t*>(buf);
buf += sizeof(size_t);
- size_t hll_size_array[column_num];
- memcpy(hll_size_array, buf, sizeof(size_t) * column_num);
- buf += sizeof(size_t) * column_num;
+ size_t hll_size_array[row_num];
+ memcpy(hll_size_array, buf, sizeof(size_t) * row_num);
+ buf += sizeof(size_t) * row_num;
- data.resize(column_num);
- for (int i = 0; i < column_num; ++i) {
+ data.resize(row_num);
+ for (int i = 0; i < row_num; ++i) {
data[i].deserialize(Slice(buf, hll_size_array[i]));
buf += hll_size_array[i];
}
diff --git a/be/src/vec/data_types/data_type_nullable.cpp
b/be/src/vec/data_types/data_type_nullable.cpp
index 9adfaaf..52a7f02 100644
--- a/be/src/vec/data_types/data_type_nullable.cpp
+++ b/be/src/vec/data_types/data_type_nullable.cpp
@@ -53,7 +53,7 @@ std::string DataTypeNullable::to_string(const IColumn&
column, size_t row_num) c
}
}
-// binary: column num | <null array> | <values array>
+// binary: row num | <null array> | <values array>
// <null array>: is_null1 | is_null2 | ...
// <values array>: value1 | value2 | ...>
int64_t DataTypeNullable::get_uncompressed_serialized_bytes(const IColumn&
column) const {
@@ -68,7 +68,7 @@ char* DataTypeNullable::serialize(const IColumn& column,
char* buf) const {
auto ptr = column.convert_to_full_column_if_const();
const ColumnNullable& col = assert_cast<const ColumnNullable&>(*ptr.get());
- // column num
+ // row num
*reinterpret_cast<uint32_t*>(buf) = column.size();
buf += sizeof(uint32_t);
// null flags
@@ -80,13 +80,13 @@ char* DataTypeNullable::serialize(const IColumn& column,
char* buf) const {
const char* DataTypeNullable::deserialize(const char* buf, IColumn* column)
const {
ColumnNullable* col = assert_cast<ColumnNullable*>(column);
- // column num
- uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf);
+ // row num
+ uint32_t row_num = *reinterpret_cast<const uint32_t*>(buf);
buf += sizeof(uint32_t);
// null flags
- col->get_null_map_data().resize(column_num);
- memcpy(col->get_null_map_data().data(), buf, column_num * sizeof(bool));
- buf += column_num * sizeof(bool);
+ col->get_null_map_data().resize(row_num);
+ memcpy(col->get_null_map_data().data(), buf, row_num * sizeof(bool));
+ buf += row_num * sizeof(bool);
// data values
IColumn& nested = col->get_nested_column();
return nested_data_type->deserialize(buf, &nested);
diff --git a/be/src/vec/data_types/data_type_number_base.cpp
b/be/src/vec/data_types/data_type_number_base.cpp
index e828c83..d8aa5a3 100644
--- a/be/src/vec/data_types/data_type_number_base.cpp
+++ b/be/src/vec/data_types/data_type_number_base.cpp
@@ -76,7 +76,7 @@ std::string DataTypeNumberBase<T>::to_string(const IColumn&
column, size_t row_n
}
}
-// binary: column num | value1 | value2 | ...
+// binary: row num | value1 | value2 | ...
template <typename T>
int64_t DataTypeNumberBase<T>::get_uncompressed_serialized_bytes(const
IColumn& column) const {
return sizeof(uint32_t) + column.size() * sizeof(FieldType);
@@ -84,29 +84,29 @@ int64_t
DataTypeNumberBase<T>::get_uncompressed_serialized_bytes(const IColumn&
template <typename T>
char* DataTypeNumberBase<T>::serialize(const IColumn& column, char* buf) const
{
- // column num
- const auto column_num = column.size();
- *reinterpret_cast<uint32_t*>(buf) = column_num;
+ // row num
+ const auto row_num = column.size();
+ *reinterpret_cast<uint32_t*>(buf) = row_num;
buf += sizeof(uint32_t);
// column data
auto ptr = column.convert_to_full_column_if_const();
const auto* origin_data = assert_cast<const
ColumnVector<T>&>(*ptr.get()).get_data().data();
- memcpy(buf, origin_data, column_num * sizeof(FieldType));
- buf += column_num * sizeof(FieldType);
+ memcpy(buf, origin_data, row_num * sizeof(FieldType));
+ buf += row_num * sizeof(FieldType);
return buf;
}
template <typename T>
const char* DataTypeNumberBase<T>::deserialize(const char* buf, IColumn*
column) const {
- // column num
- uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf);
+ // row num
+ uint32_t row_num = *reinterpret_cast<const uint32_t*>(buf);
buf += sizeof(uint32_t);
// column data
auto& container = assert_cast<ColumnVector<T>*>(column)->get_data();
- container.resize(column_num);
- memcpy(container.data(), buf, column_num * sizeof(FieldType));
- buf += column_num * sizeof(FieldType);
+ container.resize(row_num);
+ memcpy(container.data(), buf, row_num * sizeof(FieldType));
+ buf += row_num * sizeof(FieldType);
return buf;
}
diff --git a/be/src/vec/data_types/data_type_string.cpp
b/be/src/vec/data_types/data_type_string.cpp
index 38e86ff..8a63f9b 100644
--- a/be/src/vec/data_types/data_type_string.cpp
+++ b/be/src/vec/data_types/data_type_string.cpp
@@ -79,7 +79,7 @@ bool DataTypeString::equals(const IDataType& rhs) const {
}
// binary: <size array> | total length | <value array>
-// <size array> : column num | offset1 |offset2 | ...
+// <size array> : row num | offset1 |offset2 | ...
// <value array> : <value1> | <value2 | ...
int64_t DataTypeString::get_uncompressed_serialized_bytes(const IColumn&
column) const {
auto ptr = column.convert_to_full_column_if_const();
@@ -91,7 +91,7 @@ char* DataTypeString::serialize(const IColumn& column, char*
buf) const {
auto ptr = column.convert_to_full_column_if_const();
const auto& data_column = assert_cast<const ColumnString&>(*ptr.get());
- // column num
+ // row num
*reinterpret_cast<uint32_t*>(buf) = column.size();
buf += sizeof(uint32_t);
// offsets
@@ -113,13 +113,13 @@ const char* DataTypeString::deserialize(const char* buf,
IColumn* column) const
ColumnString::Chars& data = column_string->get_chars();
ColumnString::Offsets& offsets = column_string->get_offsets();
- // column num
- uint32_t column_num = *reinterpret_cast<const uint32_t*>(buf);
+ // row num
+ uint32_t row_num = *reinterpret_cast<const uint32_t*>(buf);
buf += sizeof(uint32_t);
// offsets
- offsets.resize(column_num);
- memcpy(offsets.data(), buf, sizeof(uint32_t) * column_num);
- buf += sizeof(uint32_t) * column_num;
+ offsets.resize(row_num);
+ memcpy(offsets.data(), buf, sizeof(uint32_t) * row_num);
+ buf += sizeof(uint32_t) * row_num;
// total length
uint64_t value_len = *reinterpret_cast<const uint64_t*>(buf);
buf += sizeof(uint64_t);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]