This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 991fdff9ad0 [Improve](TabletSchemaCache) reduce duplicated memory
consumption for column name and column path (#31141)
991fdff9ad0 is described below
commit 991fdff9ad0c990ba2f74962174c5fbfe8cc138f
Author: lihangyu <[email protected]>
AuthorDate: Thu Mar 7 20:31:59 2024 +0800
[Improve](TabletSchemaCache) reduce duplicated memory consumption for
column name and column path (#31141)
Both could be reference to related field in TabletColumn.And use shared_ptr
for TabletColumn in TabletSchema for later memory reuse
---
be/src/olap/delta_writer.cpp | 2 +-
be/src/olap/field.h | 7 +-
be/src/olap/in_list_predicate.h | 7 -
be/src/olap/row_cursor.cpp | 7 +-
be/src/olap/row_cursor.h | 6 +-
be/src/olap/rowset/beta_rowset.cpp | 6 +-
be/src/olap/rowset/beta_rowset_writer.cpp | 4 +-
be/src/olap/rowset/segcompaction.cpp | 4 +-
be/src/olap/rowset/segment_creator.cpp | 4 +-
.../rowset/segment_v2/hierarchical_data_reader.cpp | 2 +-
be/src/olap/rowset/segment_v2/segment.cpp | 53 +++++---
be/src/olap/rowset/segment_v2/segment.h | 2 +-
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 12 +-
be/src/olap/rowset/segment_v2/segment_writer.cpp | 5 +-
.../rowset/segment_v2/vertical_segment_writer.cpp | 5 +-
be/src/olap/schema.cpp | 4 +-
be/src/olap/schema.h | 24 ++--
be/src/olap/tablet_schema.cpp | 142 +++++++++++----------
be/src/olap/tablet_schema.h | 54 ++++----
be/src/service/internal_service.cpp | 12 +-
be/src/vec/common/schema_util.cpp | 70 +++++-----
be/src/vec/exec/scan/new_olap_scanner.cpp | 4 +-
be/src/vec/json/path_in_data.cpp | 7 +-
be/src/vec/json/path_in_data.h | 16 +++
be/src/vec/jsonb/serialize.cpp | 2 +-
be/src/vec/olap/olap_data_convertor.cpp | 2 +-
be/test/olap/delete_bitmap_calculator_test.cpp | 34 ++---
be/test/olap/memtable_flush_executor_test.cpp | 17 ++-
.../olap/rowset/segment_v2/zone_map_index_test.cpp | 18 +--
be/test/olap/tablet_schema_helper.cpp | 111 ++++++++--------
be/test/olap/tablet_schema_helper.h | 29 +++--
be/test/vec/exec/vgeneric_iterators_test.cpp | 17 ++-
32 files changed, 374 insertions(+), 315 deletions(-)
diff --git a/be/src/olap/delta_writer.cpp b/be/src/olap/delta_writer.cpp
index a405e6afa03..a6bed0f40c9 100644
--- a/be/src/olap/delta_writer.cpp
+++ b/be/src/olap/delta_writer.cpp
@@ -253,7 +253,7 @@ void DeltaWriter::_request_slave_tablet_pull_rowset(const
PNodeInfo& node_info)
auto tablet_schema = cur_rowset->rowset_meta()->tablet_schema();
if (!tablet_schema->skip_write_index_on_load()) {
for (auto& column : tablet_schema->columns()) {
- const TabletIndex* index_meta =
tablet_schema->get_inverted_index(column);
+ const TabletIndex* index_meta =
tablet_schema->get_inverted_index(*column);
if (index_meta) {
indices_ids.emplace_back(index_meta->index_id(),
index_meta->get_index_suffix());
}
diff --git a/be/src/olap/field.h b/be/src/olap/field.h
index be95f1a0e34..6a2d407ff6c 100644
--- a/be/src/olap/field.h
+++ b/be/src/olap/field.h
@@ -32,6 +32,7 @@
#include "util/hash_util.hpp"
#include "util/slice.h"
#include "vec/common/arena.h"
+#include "vec/json/path_in_data.h"
namespace doris {
@@ -48,7 +49,7 @@ public:
_index_size(column.index_length()),
_is_nullable(column.is_nullable()),
_unique_id(column.unique_id()),
- _path(column.path_info()) {}
+ _path(column.path_info_ptr()) {}
virtual ~Field() = default;
@@ -58,7 +59,7 @@ public:
size_t index_size() const { return _index_size; }
int32_t unique_id() const { return _unique_id; }
const std::string& name() const { return _name; }
- const vectorized::PathInData& path() const { return _path; }
+ const vectorized::PathInDataPtr& path() const { return _path; }
virtual void set_to_max(char* buf) const { return
_type_info->set_to_max(buf); }
virtual void set_to_zone_map_max(char* buf) const { set_to_max(buf); }
@@ -257,7 +258,7 @@ private:
int32_t _precision;
int32_t _scale;
int32_t _unique_id;
- vectorized::PathInData _path;
+ vectorized::PathInDataPtr _path;
};
class MapField : public Field {
diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h
index 5d7fb783239..6800b563fae 100644
--- a/be/src/olap/in_list_predicate.h
+++ b/be/src/olap/in_list_predicate.h
@@ -36,13 +36,6 @@
#include "vec/common/string_ref.h"
#include "vec/core/types.h"
-template <>
-struct std::equal_to<doris::StringRef> {
- bool operator()(const doris::StringRef& lhs, const doris::StringRef& rhs)
const {
- return lhs == rhs;
- }
-};
-
// for uint24_t
template <>
struct std::hash<doris::uint24_t> {
diff --git a/be/src/olap/row_cursor.cpp b/be/src/olap/row_cursor.cpp
index 3bbd699ff27..bf3ae2c5219 100644
--- a/be/src/olap/row_cursor.cpp
+++ b/be/src/olap/row_cursor.cpp
@@ -28,6 +28,7 @@
#include "olap/field.h"
#include "olap/olap_common.h"
#include "olap/olap_define.h"
+#include "olap/tablet_schema.h"
#include "util/slice.h"
using std::nothrow;
@@ -78,7 +79,7 @@ Status RowCursor::_init(const std::shared_ptr<Schema>&
shared_schema,
return _init(columns);
}
-Status RowCursor::_init(const std::vector<TabletColumn>& schema,
+Status RowCursor::_init(const std::vector<TabletColumnPtr>& schema,
const std::vector<uint32_t>& columns) {
_schema.reset(new Schema(schema, columns));
return _init(columns);
@@ -137,7 +138,7 @@ Status RowCursor::init(TabletSchemaSPtr schema) {
return init(schema->columns(), schema->num_columns());
}
-Status RowCursor::init(const std::vector<TabletColumn>& schema) {
+Status RowCursor::init(const std::vector<TabletColumnPtr>& schema) {
return init(schema, schema.size());
}
@@ -157,7 +158,7 @@ Status RowCursor::init(TabletSchemaSPtr schema, size_t
column_count) {
return Status::OK();
}
-Status RowCursor::init(const std::vector<TabletColumn>& schema, size_t
column_count) {
+Status RowCursor::init(const std::vector<TabletColumnPtr>& schema, size_t
column_count) {
if (column_count > schema.size()) {
return Status::Error<INVALID_ARGUMENT>(
"Input param are invalid. Column count is bigger than
num_columns of schema. "
diff --git a/be/src/olap/row_cursor.h b/be/src/olap/row_cursor.h
index 9f39b5d4f7b..e7d2d016bde 100644
--- a/be/src/olap/row_cursor.h
+++ b/be/src/olap/row_cursor.h
@@ -46,10 +46,10 @@ public:
// Create a RowCursor based on the schema
Status init(TabletSchemaSPtr schema);
- Status init(const std::vector<TabletColumn>& schema);
+ Status init(const std::vector<TabletColumnPtr>& schema);
// Create a RowCursor based on the first n columns of the schema
- Status init(const std::vector<TabletColumn>& schema, size_t column_count);
+ Status init(const std::vector<TabletColumnPtr>& schema, size_t
column_count);
Status init(TabletSchemaSPtr schema, size_t column_count);
// Create a RowCursor based on the schema and column id list
@@ -123,7 +123,7 @@ private:
Status _init(const std::shared_ptr<Schema>& shared_schema,
const std::vector<uint32_t>& columns);
// common init function
- Status _init(const std::vector<TabletColumn>& schema, const
std::vector<uint32_t>& columns);
+ Status _init(const std::vector<TabletColumnPtr>& schema, const
std::vector<uint32_t>& columns);
Status _alloc_buf();
Status _init_scan_key(TabletSchemaSPtr schema, const
std::vector<std::string>& scan_keys);
diff --git a/be/src/olap/rowset/beta_rowset.cpp
b/be/src/olap/rowset/beta_rowset.cpp
index 057e3411f4f..e3f28726c4d 100644
--- a/be/src/olap/rowset/beta_rowset.cpp
+++ b/be/src/olap/rowset/beta_rowset.cpp
@@ -192,7 +192,7 @@ Status BetaRowset::remove() {
success = false;
}
for (auto& column : _schema->columns()) {
- const TabletIndex* index_meta =
_schema->get_inverted_index(column);
+ const TabletIndex* index_meta =
_schema->get_inverted_index(*column);
if (index_meta) {
std::string inverted_index_file =
InvertedIndexDescriptor::get_index_file_name(
seg_path, index_meta->index_id(),
index_meta->get_index_suffix());
@@ -320,7 +320,7 @@ Status BetaRowset::copy_files_to(const std::string& dir,
const RowsetId& new_row
RETURN_IF_ERROR(io::global_local_filesystem()->copy_path(src_path,
dst_path));
for (auto& column : _schema->columns()) {
// if (column.has_inverted_index()) {
- const TabletIndex* index_meta =
_schema->get_inverted_index(column);
+ const TabletIndex* index_meta =
_schema->get_inverted_index(*column);
if (index_meta) {
std::string inverted_index_src_file_path =
InvertedIndexDescriptor::get_index_file_name(
@@ -355,7 +355,7 @@ Status BetaRowset::upload_to(io::RemoteFileSystem* dest_fs,
const RowsetId& new_
local_paths.push_back(local_seg_path);
for (auto& column : _schema->columns()) {
// if (column.has_inverted_index()) {
- const TabletIndex* index_meta =
_schema->get_inverted_index(column);
+ const TabletIndex* index_meta =
_schema->get_inverted_index(*column);
if (index_meta) {
std::string remote_inverted_index_file =
InvertedIndexDescriptor::get_index_file_name(
diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 2b260b7acae..d75f7e44e6e 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -351,8 +351,8 @@ Status BetaRowsetWriter::_rename_compacted_indices(int64_t
begin, int64_t end, u
int ret;
// rename remaining inverted index files
for (auto column : _context.tablet_schema->columns()) {
- if (_context.tablet_schema->has_inverted_index(column)) {
- auto index_info =
_context.tablet_schema->get_inverted_index(column);
+ if (_context.tablet_schema->has_inverted_index(*column)) {
+ auto index_info =
_context.tablet_schema->get_inverted_index(*column);
auto index_id = index_info->index_id();
auto src_idx_path =
begin < 0 ?
InvertedIndexDescriptor::inverted_index_file_path(
diff --git a/be/src/olap/rowset/segcompaction.cpp
b/be/src/olap/rowset/segcompaction.cpp
index 81a73b99ebb..57e6e867205 100644
--- a/be/src/olap/rowset/segcompaction.cpp
+++ b/be/src/olap/rowset/segcompaction.cpp
@@ -134,8 +134,8 @@ Status
SegcompactionWorker::_delete_original_segments(uint32_t begin, uint32_t e
strings::Substitute("Failed to delete
file=$0", seg_path));
// Delete inverted index files
for (auto column : schema->columns()) {
- if (schema->has_inverted_index(column)) {
- auto index_info = schema->get_inverted_index(column);
+ if (schema->has_inverted_index(*column)) {
+ auto index_info = schema->get_inverted_index(*column);
auto index_id = index_info->index_id();
auto idx_path =
InvertedIndexDescriptor::inverted_index_file_path(
ctx.rowset_dir, ctx.rowset_id, i, index_id,
index_info->get_index_suffix());
diff --git a/be/src/olap/rowset/segment_creator.cpp
b/be/src/olap/rowset/segment_creator.cpp
index 7ee11c2bcf3..8f2553ade59 100644
--- a/be/src/olap/rowset/segment_creator.cpp
+++ b/be/src/olap/rowset/segment_creator.cpp
@@ -95,7 +95,7 @@ Status
SegmentFlusher::_expand_variant_to_subcolumns(vectorized::Block& block,
if (_context->partial_update_info &&
_context->partial_update_info->is_partial_update) {
// check columns that used to do partial updates should not include
variant
for (int i : _context->partial_update_info->update_cids) {
- const auto& col = _context->original_tablet_schema->columns()[i];
+ const auto& col = *_context->original_tablet_schema->columns()[i];
if (!col.is_key() && col.name() != DELETE_SIGN) {
return Status::InvalidArgument(
"Not implement partial update for variant only support
delete currently");
@@ -104,7 +104,7 @@ Status
SegmentFlusher::_expand_variant_to_subcolumns(vectorized::Block& block,
} else {
// find positions of variant columns
for (int i = 0; i <
_context->original_tablet_schema->columns().size(); ++i) {
- if
(_context->original_tablet_schema->columns()[i].is_variant_type()) {
+ if
(_context->original_tablet_schema->columns()[i]->is_variant_type()) {
variant_column_pos.push_back(i);
}
}
diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
index 1deae3a57dd..2feb865de28 100644
--- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
+++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp
@@ -182,7 +182,7 @@ Status
ExtractReader::extract_to(vectorized::MutableColumnPtr& dst, size_t nrows
// since some other column may depend on it.
vectorized::MutableColumnPtr extracted_column;
RETURN_IF_ERROR(root.extract_root( // trim the root name, eg. v.a.b -> a.b
- _col.path_info().copy_pop_front(), extracted_column));
+ _col.path_info_ptr()->copy_pop_front(), extracted_column));
if (_target_type_hint != nullptr) {
variant.create_root(_target_type_hint,
_target_type_hint->create_column());
diff --git a/be/src/olap/rowset/segment_v2/segment.cpp
b/be/src/olap/rowset/segment_v2/segment.cpp
index 3d78cd5bd4d..9c392e6088c 100644
--- a/be/src/olap/rowset/segment_v2/segment.cpp
+++ b/be/src/olap/rowset/segment_v2/segment.cpp
@@ -128,7 +128,7 @@ Status Segment::new_iterator(SchemaSPtr schema, const
StorageReadOptions& read_o
const TabletColumn& col =
read_options.tablet_schema->column(column_id);
ColumnReader* reader = nullptr;
if (col.is_extracted_column()) {
- const auto* node = _sub_column_tree.find_exact(col.path_info());
+ const auto* node =
_sub_column_tree.find_exact(*col.path_info_ptr());
reader = node != nullptr ? node->data.reader.get() : nullptr;
} else {
reader = _column_readers.contains(col.unique_id())
@@ -341,12 +341,12 @@ Status Segment::_load_index_impl() {
// Return the storage datatype of related column to field.
// Return nullptr meaning no such storage infomation for this column
-vectorized::DataTypePtr Segment::get_data_type_of(vectorized::PathInData path,
bool is_nullable,
+vectorized::DataTypePtr Segment::get_data_type_of(vectorized::PathInDataPtr
path, bool is_nullable,
bool ignore_children) const {
// Path has higher priority
- if (!path.empty()) {
- auto node = _sub_column_tree.find_leaf(path);
- auto sparse_node = _sparse_column_tree.find_exact(path);
+ if (path != nullptr && !path->empty()) {
+ auto node = _sub_column_tree.find_leaf(*path);
+ auto sparse_node = _sparse_column_tree.find_exact(*path);
if (node) {
if (ignore_children || (node->children.empty() && sparse_node ==
nullptr)) {
return node->data.file_column_type;
@@ -398,7 +398,10 @@ Status Segment::_create_column_readers(const
SegmentFooterPB& footer) {
// init by column path
for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns();
++ordinal) {
auto& column = _tablet_schema->column(ordinal);
- auto iter = column_path_to_footer_ordinal.find(column.path_info());
+ if (!column.has_path_info()) {
+ continue;
+ }
+ auto iter =
column_path_to_footer_ordinal.find(*column.path_info_ptr());
if (iter == column_path_to_footer_ordinal.end()) {
continue;
}
@@ -469,18 +472,22 @@ Status Segment::new_column_iterator_with_path(const
TabletColumn& tablet_column,
std::unique_ptr<ColumnIterator>*
iter,
const StorageReadOptions* opt) {
vectorized::PathInData root_path;
- if (tablet_column.path_info().empty()) {
+ if (!tablet_column.has_path_info()) {
// Missing path info, but need read the whole variant column
root_path = vectorized::PathInData(tablet_column.name_lower_case());
} else {
- root_path =
vectorized::PathInData({tablet_column.path_info().get_parts()[0]});
+ root_path =
vectorized::PathInData({tablet_column.path_info_ptr()->get_parts()[0]});
}
auto root = _sub_column_tree.find_leaf(root_path);
- auto node = _sub_column_tree.find_exact(tablet_column.path_info());
- auto sparse_node =
_sparse_column_tree.find_exact(tablet_column.path_info());
+ auto node = tablet_column.has_path_info()
+ ?
_sub_column_tree.find_exact(*tablet_column.path_info_ptr())
+ : nullptr;
+ auto sparse_node = tablet_column.has_path_info()
+ ?
_sparse_column_tree.find_exact(*tablet_column.path_info_ptr())
+ : nullptr;
if (opt != nullptr && opt->io_ctx.reader_type ==
ReaderType::READER_ALTER_TABLE) {
CHECK(tablet_column.is_variant_type());
- if (node == nullptr) {
+ if (root == nullptr) {
// No such variant column in this segment, get a default one
RETURN_IF_ERROR(new_default_iterator(tablet_column, iter));
return Status::OK();
@@ -490,13 +497,15 @@ Status Segment::new_column_iterator_with_path(const
TabletColumn& tablet_column,
// subcolumns of variant during processing rewriting rowsets.
// This is slow, since it needs to read all sub columns and merge them
into a single column
RETURN_IF_ERROR(
- HierarchicalDataReader::create(iter, root_path, node, root,
output_as_raw_json));
+ HierarchicalDataReader::create(iter, root_path, root, root,
output_as_raw_json));
return Status::OK();
}
if (opt == nullptr || opt->io_ctx.reader_type != ReaderType::READER_QUERY)
{
// Could be compaction ..etc and read flat leaves nodes data
- const auto* node =
_sub_column_tree.find_leaf(tablet_column.path_info());
+ const auto* node = tablet_column.has_path_info()
+ ?
_sub_column_tree.find_leaf(*tablet_column.path_info_ptr())
+ : nullptr;
if (!node) {
// sparse_columns have this path, read from root
if (sparse_node != nullptr && sparse_node->is_leaf_node()) {
@@ -517,15 +526,15 @@ Status Segment::new_column_iterator_with_path(const
TabletColumn& tablet_column,
if (node->is_leaf_node() && sparse_node == nullptr) {
// Node contains column without any child sub columns and no
corresponding sparse columns
// Direct read extracted columns
- const auto* node =
_sub_column_tree.find_leaf(tablet_column.path_info());
+ const auto* node =
_sub_column_tree.find_leaf(*tablet_column.path_info_ptr());
ColumnIterator* it;
RETURN_IF_ERROR(node->data.reader->new_iterator(&it));
iter->reset(it);
} else {
// Node contains column with children columns or has correspoding
sparse columns
// Create reader with hirachical data
- RETURN_IF_ERROR(
- HierarchicalDataReader::create(iter,
tablet_column.path_info(), node, root));
+ RETURN_IF_ERROR(HierarchicalDataReader::create(iter,
*tablet_column.path_info_ptr(),
+ node, root));
}
} else {
// No such node, read from either sparse column or default column
@@ -553,7 +562,7 @@ Status Segment::new_column_iterator(const TabletColumn&
tablet_column,
std::unique_ptr<ColumnIterator>* iter,
const StorageReadOptions* opt) {
// init column iterator by path info
- if (!tablet_column.path_info().empty() || tablet_column.is_variant_type())
{
+ if (tablet_column.has_path_info() || tablet_column.is_variant_type()) {
return new_column_iterator_with_path(tablet_column, iter, opt);
}
// init default iterator
@@ -587,8 +596,9 @@ Status Segment::new_column_iterator(int32_t unique_id,
std::unique_ptr<ColumnIte
ColumnReader* Segment::_get_column_reader(const TabletColumn& col) {
// init column iterator by path info
- if (!col.path_info().empty() || col.is_variant_type()) {
- auto node = _sub_column_tree.find_exact(col.path_info());
+ if (col.has_path_info() || col.is_variant_type()) {
+ auto node =
+ col.has_path_info() ?
_sub_column_tree.find_exact(*col.path_info_ptr()) : nullptr;
if (node != nullptr) {
return node->data.reader.get();
}
@@ -770,8 +780,9 @@ Status Segment::seek_and_read_by_rowid(const TabletSchema&
schema, SlotDescripto
};
std::vector<segment_v2::rowid_t> single_row_loc {row_id};
if (!slot->column_paths().empty()) {
- vectorized::PathInData
path(schema.column_by_uid(slot->col_unique_id()).name_lower_case(),
- slot->column_paths());
+ vectorized::PathInDataPtr path =
std::make_shared<vectorized::PathInData>(
+ schema.column_by_uid(slot->col_unique_id()).name_lower_case(),
+ slot->column_paths());
auto storage_type = get_data_type_of(path, slot->is_nullable(), false);
vectorized::MutableColumnPtr file_storage_column =
storage_type->create_column();
DCHECK(storage_type != nullptr);
diff --git a/be/src/olap/rowset/segment_v2/segment.h
b/be/src/olap/rowset/segment_v2/segment.h
index dc0f18ff02d..af5d1896f47 100644
--- a/be/src/olap/rowset/segment_v2/segment.h
+++ b/be/src/olap/rowset/segment_v2/segment.h
@@ -153,7 +153,7 @@ public:
// ignore_chidren set to false will treat field as variant
// when it contains children with field paths.
// nullptr will returned if storage type does not contains such column
- std::shared_ptr<const vectorized::IDataType>
get_data_type_of(vectorized::PathInData path,
+ std::shared_ptr<const vectorized::IDataType>
get_data_type_of(vectorized::PathInDataPtr path,
bool
is_nullable,
bool
ignore_children) const;
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index e8e1aa386a5..fe2ee5c7312 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -1782,7 +1782,8 @@ void SegmentIterator::_init_current_block(
"Recreate column with expected type {}, file column type
{}, col_name {}, "
"col_path {}",
block->get_by_position(i).type->get_name(),
file_column_type->get_name(),
- column_desc->name(), column_desc->path().get_path());
+ column_desc->name(),
+ column_desc->path() == nullptr ? "" :
column_desc->path()->get_path());
// TODO reuse
current_columns[cid] = file_column_type->create_column();
current_columns[cid]->reserve(_opts.block_row_max);
@@ -2104,10 +2105,11 @@ Status SegmentIterator::_convert_to_expected_type(const
std::vector<ColumnId>& c
expected_type, &expected));
_current_return_columns[i] = expected->assume_mutable();
_converted_column_ids[i] = 1;
- VLOG_DEBUG << fmt::format("Convert {} fom file column type {} to
{}, num_rows {}",
- field_type->path().get_path(),
file_column_type->get_name(),
- expected_type->get_name(),
- _current_return_columns[i]->size());
+ VLOG_DEBUG << fmt::format(
+ "Convert {} fom file column type {} to {}, num_rows {}",
+ field_type->path() == nullptr ? "" :
field_type->path()->get_path(),
+ file_column_type->get_name(), expected_type->get_name(),
+ _current_return_columns[i]->size());
}
}
return Status::OK();
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 27a744278df..283b96080ab 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -149,8 +149,9 @@ void SegmentWriter::init_column_meta(ColumnMetaPB* meta,
uint32_t column_id,
meta->set_default_value(column.default_value());
meta->set_precision(column.precision());
meta->set_frac(column.frac());
- if (!column.path_info().empty()) {
- column.path_info().to_protobuf(meta->mutable_column_path_info(),
column.parent_unique_id());
+ if (column.has_path_info()) {
+ column.path_info_ptr()->to_protobuf(meta->mutable_column_path_info(),
+ column.parent_unique_id());
}
meta->set_unique_id(column.unique_id());
for (uint32_t i = 0; i < column.get_subtype_count(); ++i) {
diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
index a7c47197473..a7e0dc9f241 100644
--- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
@@ -121,8 +121,9 @@ void VerticalSegmentWriter::_init_column_meta(ColumnMetaPB*
meta, uint32_t colum
meta->set_default_value(column.default_value());
meta->set_precision(column.precision());
meta->set_frac(column.frac());
- if (!column.path_info().empty()) {
- column.path_info().to_protobuf(meta->mutable_column_path_info(),
column.parent_unique_id());
+ if (column.has_path_info()) {
+ column.path_info_ptr()->to_protobuf(meta->mutable_column_path_info(),
+ column.parent_unique_id());
}
meta->set_unique_id(column.unique_id());
for (uint32_t i = 0; i < column.get_subtype_count(); ++i) {
diff --git a/be/src/olap/schema.cpp b/be/src/olap/schema.cpp
index 5cfa1d5e381..28c12b999d0 100644
--- a/be/src/olap/schema.cpp
+++ b/be/src/olap/schema.cpp
@@ -67,7 +67,7 @@ void Schema::_copy_from(const Schema& other) {
}
}
-void Schema::_init(const std::vector<TabletColumn>& cols, const
std::vector<ColumnId>& col_ids,
+void Schema::_init(const std::vector<TabletColumnPtr>& cols, const
std::vector<ColumnId>& col_ids,
size_t num_key_columns) {
_col_ids = col_ids;
_num_key_columns = num_key_columns;
@@ -81,7 +81,7 @@ void Schema::_init(const std::vector<TabletColumn>& cols,
const std::vector<Colu
if (col_id_set.find(cid) == col_id_set.end()) {
continue;
}
- _cols[cid] = FieldFactory::create(cols[cid]);
+ _cols[cid] = FieldFactory::create(*cols[cid]);
_col_offsets[cid] = offset;
// Plus 1 byte for null byte
diff --git a/be/src/olap/schema.h b/be/src/olap/schema.h
index 64d6a7544b5..6414db4153a 100644
--- a/be/src/olap/schema.h
+++ b/be/src/olap/schema.h
@@ -52,12 +52,12 @@ public:
Schema(TabletSchemaSPtr tablet_schema) {
size_t num_columns = tablet_schema->num_columns();
// ignore this column
- if (tablet_schema->columns().back().name() == BeConsts::ROW_STORE_COL)
{
+ if (tablet_schema->columns().back()->name() ==
BeConsts::ROW_STORE_COL) {
--num_columns;
}
std::vector<ColumnId> col_ids(num_columns);
_unique_ids.resize(num_columns);
- std::vector<TabletColumn> columns;
+ std::vector<TabletColumnPtr> columns;
columns.reserve(num_columns);
size_t num_key_columns = 0;
@@ -74,7 +74,7 @@ public:
if (column.name() == VERSION_COL) {
_version_col_idx = cid;
}
- columns.push_back(column);
+ columns.push_back(std::make_shared<TabletColumn>(column));
}
_delete_sign_idx = tablet_schema->delete_sign_idx();
if (tablet_schema->has_sequence_col()) {
@@ -84,34 +84,34 @@ public:
}
// All the columns of one table may exist in the columns param, but
col_ids is only a subset.
- Schema(const std::vector<TabletColumn>& columns, const
std::vector<ColumnId>& col_ids) {
+ Schema(const std::vector<TabletColumnPtr>& columns, const
std::vector<ColumnId>& col_ids) {
size_t num_key_columns = 0;
_unique_ids.resize(columns.size());
for (size_t i = 0; i < columns.size(); ++i) {
- if (columns[i].is_key()) {
+ if (columns[i]->is_key()) {
++num_key_columns;
}
- if (columns[i].name() == DELETE_SIGN) {
+ if (columns[i]->name() == DELETE_SIGN) {
_delete_sign_idx = i;
}
- if (columns[i].name() == BeConsts::ROWID_COL) {
+ if (columns[i]->name() == BeConsts::ROWID_COL) {
_rowid_col_idx = i;
}
- if (columns[i].name() == VERSION_COL) {
+ if (columns[i]->name() == VERSION_COL) {
_version_col_idx = i;
}
- _unique_ids[i] = columns[i].unique_id();
+ _unique_ids[i] = columns[i]->unique_id();
}
_init(columns, col_ids, num_key_columns);
}
// Only for UT
- Schema(const std::vector<TabletColumn>& columns, size_t num_key_columns) {
+ Schema(const std::vector<TabletColumnPtr>& columns, size_t
num_key_columns) {
std::vector<ColumnId> col_ids(columns.size());
_unique_ids.resize(columns.size());
for (uint32_t cid = 0; cid < columns.size(); ++cid) {
col_ids[cid] = cid;
- _unique_ids[cid] = columns[cid].unique_id();
+ _unique_ids[cid] = columns[cid]->unique_id();
}
_init(columns, col_ids, num_key_columns);
@@ -183,7 +183,7 @@ public:
int64_t mem_size() const { return _mem_size; }
private:
- void _init(const std::vector<TabletColumn>& cols, const
std::vector<ColumnId>& col_ids,
+ void _init(const std::vector<TabletColumnPtr>& cols, const
std::vector<ColumnId>& col_ids,
size_t num_key_columns);
void _init(const std::vector<const Field*>& cols, const
std::vector<ColumnId>& col_ids,
size_t num_key_columns);
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index 80108d41f54..19c107971bc 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -45,9 +45,11 @@
#include "vec/aggregate_functions/aggregate_function_simple_factory.h"
#include "vec/aggregate_functions/aggregate_function_state_union.h"
#include "vec/common/hex.h"
+#include "vec/common/string_ref.h"
#include "vec/core/block.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_factory.hpp"
+#include "vec/json/path_in_data.h"
namespace doris {
@@ -554,13 +556,14 @@ void TabletColumn::init_from_pb(const ColumnPB& column) {
add_sub_column(child_column);
}
if (column.has_column_path_info()) {
- _column_path.from_protobuf(column.column_path_info());
+ _column_path = std::make_shared<vectorized::PathInData>();
+ _column_path->from_protobuf(column.column_path_info());
_parent_col_unique_id =
column.column_path_info().parrent_column_unique_id();
}
for (auto& column_pb : column.sparse_columns()) {
TabletColumn column;
column.init_from_pb(column_pb);
- _sparse_cols.emplace_back(std::move(column));
+
_sparse_cols.emplace_back(std::make_shared<TabletColumn>(std::move(column)));
_num_sparse_columns++;
}
}
@@ -615,13 +618,13 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const {
for (size_t i = 0; i < _sub_columns.size(); i++) {
ColumnPB* child = column->add_children_columns();
- _sub_columns[i].to_schema_pb(child);
+ _sub_columns[i]->to_schema_pb(child);
}
// set parts info
- if (!_column_path.empty()) {
+ if (has_path_info()) {
// CHECK_GT(_parent_col_unique_id, 0);
- _column_path.to_protobuf(column->mutable_column_path_info(),
_parent_col_unique_id);
+ _column_path->to_protobuf(column->mutable_column_path_info(),
_parent_col_unique_id);
// Update unstable information for variant columns. Some of the fields
in the tablet schema
// are irrelevant for variant sub-columns, but retaining them may lead
to an excessive growth
// in the number of tablet schema cache entries.
@@ -632,12 +635,12 @@ void TabletColumn::to_schema_pb(ColumnPB* column) const {
}
for (auto& col : _sparse_cols) {
ColumnPB* sparse_column = column->add_sparse_columns();
- col.to_schema_pb(sparse_column);
+ col->to_schema_pb(sparse_column);
}
}
void TabletColumn::add_sub_column(TabletColumn& sub_column) {
- _sub_columns.push_back(sub_column);
+ _sub_columns.push_back(std::make_shared<TabletColumn>(sub_column));
sub_column._parent_col_unique_id = this->_unique_id;
_sub_column_count += 1;
}
@@ -674,7 +677,7 @@ vectorized::AggregateFunctionPtr
TabletColumn::get_aggregate_function(std::strin
}
void TabletColumn::set_path_info(const vectorized::PathInData& path) {
- _column_path = path;
+ _column_path = std::make_shared<vectorized::PathInData>(path);
}
vectorized::DataTypePtr TabletColumn::get_vec_type() const {
@@ -807,12 +810,11 @@ void TabletSchema::append_column(TabletColumn column,
ColumnType col_type) {
}
if (column.is_variant_type()) {
++_num_variant_columns;
- if (column.path_info().empty()) {
+ if (!column.has_path_info()) {
const std::string& col_name = column.name_lower_case();
vectorized::PathInData path(col_name);
column.set_path_info(path);
}
- _field_path_to_index[column.path_info()] = _num_columns;
}
if (UNLIKELY(column.name() == DELETE_SIGN)) {
_delete_sign_idx = _num_columns;
@@ -821,21 +823,21 @@ void TabletSchema::append_column(TabletColumn column,
ColumnType col_type) {
} else if (UNLIKELY(column.name() == VERSION_COL)) {
_version_col_idx = _num_columns;
}
+ _field_id_to_index[column.unique_id()] = _num_columns;
+ _cols.push_back(std::make_shared<TabletColumn>(std::move(column)));
// The dropped column may have same name with exsiting column, so that
// not add to name to index map, only for uid to index map
- if (col_type == ColumnType::NORMAL) {
- _field_name_to_index[column.name()] = _num_columns;
- } else if (col_type == ColumnType::VARIANT) {
- _field_name_to_index[column.name()] = _num_columns;
- _field_path_to_index[column.path_info()] = _num_columns;
+ if (col_type == ColumnType::VARIANT || _cols.back()->is_variant_type()) {
+ _field_name_to_index.emplace(StringRef(_cols.back()->name()),
_num_columns);
+ _field_path_to_index[_cols.back()->path_info_ptr().get()] =
_num_columns;
+ } else if (col_type == ColumnType::NORMAL) {
+ _field_name_to_index.emplace(StringRef(_cols.back()->name()),
_num_columns);
}
- _field_id_to_index[column.unique_id()] = _num_columns;
- _cols.push_back(std::move(column));
_num_columns++;
}
void TabletColumn::append_sparse_column(TabletColumn column) {
- _sparse_cols.push_back(std::move(column));
+ _sparse_cols.push_back(std::make_shared<TabletColumn>(column));
_num_sparse_columns++;
}
@@ -846,7 +848,7 @@ void TabletSchema::append_index(TabletIndex index) {
void TabletSchema::update_index(const TabletColumn& col, TabletIndex index) {
int32_t col_unique_id = col.unique_id();
const std::string& suffix_path =
- !col.path_info().empty() ?
escape_for_path_name(col.path_info().get_path()) : "";
+ col.has_path_info() ?
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
for (size_t i = 0; i < _indexes.size(); i++) {
for (int32_t id : _indexes[i].col_unique_ids()) {
if (id == col_unique_id && _indexes[i].get_index_suffix() ==
suffix_path) {
@@ -856,6 +858,11 @@ void TabletSchema::update_index(const TabletColumn& col,
TabletIndex index) {
}
}
+void TabletSchema::replace_column(size_t pos, TabletColumn new_col) {
+ CHECK_LT(pos, num_columns()) << " outof range";
+ _cols[pos] = std::make_shared<TabletColumn>(std::move(new_col));
+}
+
void TabletSchema::clear_index() {
_indexes.clear();
}
@@ -911,9 +918,9 @@ void TabletSchema::init_from_pb(const TabletSchemaPB&
schema, bool ignore_extrac
if (column.is_variant_type()) {
++_num_variant_columns;
}
- _field_name_to_index[column.name()] = _num_columns;
- _field_id_to_index[column.unique_id()] = _num_columns;
- _cols.emplace_back(std::move(column));
+ _cols.emplace_back(std::make_shared<TabletColumn>(std::move(column)));
+ _field_name_to_index.emplace(StringRef(_cols.back()->name()),
_num_columns);
+ _field_id_to_index[_cols.back()->unique_id()] = _num_columns;
_num_columns++;
}
for (auto& index_pb : schema.index()) {
@@ -1016,9 +1023,9 @@ void TabletSchema::build_current_tablet_schema(int64_t
index_id, int32_t version
} else if (UNLIKELY(column->name() == VERSION_COL)) {
_version_col_idx = _num_columns;
}
- _field_name_to_index[column->name()] = _num_columns;
- _field_id_to_index[column->unique_id()] = _num_columns;
- _cols.emplace_back(*column);
+ _cols.emplace_back(std::make_shared<TabletColumn>(*column));
+ _field_name_to_index.emplace(StringRef(_cols.back()->name()),
_num_columns);
+ _field_id_to_index[_cols.back()->unique_id()] = _num_columns;
_num_columns++;
}
@@ -1041,12 +1048,13 @@ void TabletSchema::merge_dropped_columns(const
TabletSchema& src_schema) {
return;
}
for (const auto& src_col : src_schema.columns()) {
- if (_field_id_to_index.find(src_col.unique_id()) ==
_field_id_to_index.end()) {
- CHECK(!src_col.is_key()) << src_col.name() << " is key column,
should not be dropped.";
+ if (_field_id_to_index.find(src_col->unique_id()) ==
_field_id_to_index.end()) {
+ CHECK(!src_col->is_key())
+ << src_col->name() << " is key column, should not be
dropped.";
ColumnPB src_col_pb;
// There are some pointer in tablet column, not sure the reference
relation, so
// that deep copy it.
- src_col.to_schema_pb(&src_col_pb);
+ src_col->to_schema_pb(&src_col_pb);
TabletColumn new_col(src_col_pb);
append_column(new_col, TabletSchema::ColumnType::DROPPED);
}
@@ -1067,21 +1075,21 @@ bool TabletSchema::is_dropped_column(const
TabletColumn& col) const {
CHECK(_field_id_to_index.find(col.unique_id()) != _field_id_to_index.end())
<< "could not find col with unique id = " << col.unique_id()
<< " and name = " << col.name();
- return _field_name_to_index.find(col.name()) == _field_name_to_index.end()
||
+ return _field_name_to_index.find(StringRef(col.name())) ==
_field_name_to_index.end() ||
column(col.name()).unique_id() != col.unique_id();
}
void TabletSchema::copy_extracted_columns(const TabletSchema& src_schema) {
std::unordered_set<int32_t> variant_columns;
for (const auto& col : columns()) {
- if (col.is_variant_type()) {
- variant_columns.insert(col.unique_id());
+ if (col->is_variant_type()) {
+ variant_columns.insert(col->unique_id());
}
}
- for (const TabletColumn& col : src_schema.columns()) {
- if (col.is_extracted_column() &&
variant_columns.contains(col.parent_unique_id())) {
+ for (const TabletColumnPtr& col : src_schema.columns()) {
+ if (col->is_extracted_column() &&
variant_columns.contains(col->parent_unique_id())) {
ColumnPB col_pb;
- col.to_schema_pb(&col_pb);
+ col->to_schema_pb(&col_pb);
TabletColumn new_col(col_pb);
append_column(new_col, ColumnType::VARIANT);
}
@@ -1090,7 +1098,7 @@ void TabletSchema::copy_extracted_columns(const
TabletSchema& src_schema) {
void TabletSchema::reserve_extracted_columns() {
for (auto it = _cols.begin(); it != _cols.end();) {
- if (!it->is_extracted_column()) {
+ if (!(*it)->is_extracted_column()) {
it = _cols.erase(it);
} else {
++it;
@@ -1103,12 +1111,12 @@ void TabletSchema::to_schema_pb(TabletSchemaPB*
tablet_schema_pb) const {
tablet_schema_pb->add_cluster_key_idxes(i);
}
tablet_schema_pb->set_keys_type(_keys_type);
- for (auto& col : _cols) {
+ for (const auto& col : _cols) {
ColumnPB* column = tablet_schema_pb->add_column();
- col.to_schema_pb(column);
+ col->to_schema_pb(column);
}
- for (auto& index : _indexes) {
- auto index_pb = tablet_schema_pb->add_index();
+ for (const auto& index : _indexes) {
+ auto* index_pb = tablet_schema_pb->add_index();
index.to_schema_pb(index_pb);
}
tablet_schema_pb->set_num_short_key_columns(_num_short_key_columns);
@@ -1134,8 +1142,8 @@ void TabletSchema::to_schema_pb(TabletSchemaPB*
tablet_schema_pb) const {
size_t TabletSchema::row_size() const {
size_t size = 0;
- for (auto& column : _cols) {
- size += column.length();
+ for (const auto& column : _cols) {
+ size += column->length();
}
size += (_num_columns + 7) / 8;
@@ -1143,12 +1151,12 @@ size_t TabletSchema::row_size() const {
}
int32_t TabletSchema::field_index(const std::string& field_name) const {
- const auto& found = _field_name_to_index.find(field_name);
+ const auto& found = _field_name_to_index.find(StringRef(field_name));
return (found == _field_name_to_index.end()) ? -1 : found->second;
}
int32_t TabletSchema::field_index(const vectorized::PathInData& path) const {
- const auto& found = _field_path_to_index.find(path);
+ const auto& found =
_field_path_to_index.find(vectorized::PathInDataRef(&path));
return (found == _field_path_to_index.end()) ? -1 : found->second;
}
@@ -1157,35 +1165,31 @@ int32_t TabletSchema::field_index(int32_t
col_unique_id) const {
return (found == _field_id_to_index.end()) ? -1 : found->second;
}
-const std::vector<TabletColumn>& TabletSchema::columns() const {
+const std::vector<TabletColumnPtr>& TabletSchema::columns() const {
return _cols;
}
-const std::vector<TabletColumn>& TabletColumn::sparse_columns() const {
+const std::vector<TabletColumnPtr>& TabletColumn::sparse_columns() const {
return _sparse_cols;
}
-std::vector<TabletColumn>& TabletSchema::mutable_columns() {
- return _cols;
-}
-
const TabletColumn& TabletSchema::column(size_t ordinal) const {
DCHECK(ordinal < _num_columns) << "ordinal:" << ordinal << ",
_num_columns:" << _num_columns;
- return _cols[ordinal];
+ return *_cols[ordinal];
}
const TabletColumn& TabletColumn::sparse_column_at(size_t ordinal) const {
DCHECK(ordinal < _sparse_cols.size())
<< "ordinal:" << ordinal << ", _num_columns:" <<
_sparse_cols.size();
- return _sparse_cols[ordinal];
+ return *_sparse_cols[ordinal];
}
const TabletColumn& TabletSchema::column_by_uid(int32_t col_unique_id) const {
- return _cols.at(_field_id_to_index.at(col_unique_id));
+ return *_cols.at(_field_id_to_index.at(col_unique_id));
}
TabletColumn& TabletSchema::mutable_column_by_uid(int32_t col_unique_id) {
- return _cols.at(_field_id_to_index.at(col_unique_id));
+ return *_cols.at(_field_id_to_index.at(col_unique_id));
}
void TabletSchema::update_indexes_from_thrift(const
std::vector<doris::TOlapTableIndex>& tindexes) {
@@ -1199,7 +1203,7 @@ void TabletSchema::update_indexes_from_thrift(const
std::vector<doris::TOlapTabl
}
Status TabletSchema::have_column(const std::string& field_name) const {
- if (!_field_name_to_index.contains(field_name)) {
+ if (!_field_name_to_index.contains(StringRef(field_name))) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"Not found field_name, field_name:{}, schema:{}", field_name,
get_all_field_names());
@@ -1208,10 +1212,10 @@ Status TabletSchema::have_column(const std::string&
field_name) const {
}
const TabletColumn& TabletSchema::column(const std::string& field_name) const {
- DCHECK(_field_name_to_index.contains(field_name))
+ DCHECK(_field_name_to_index.contains(StringRef(field_name)) != 0)
<< ", field_name=" << field_name << ", field_name_to_index=" <<
get_all_field_names();
- const auto& found = _field_name_to_index.find(field_name);
- return _cols[found->second];
+ const auto& found = _field_name_to_index.find(StringRef(field_name));
+ return *_cols[found->second];
}
std::vector<const TabletIndex*> TabletSchema::get_indexes_for_column(
@@ -1219,7 +1223,7 @@ std::vector<const TabletIndex*>
TabletSchema::get_indexes_for_column(
std::vector<const TabletIndex*> indexes_for_column;
int32_t col_unique_id = col.unique_id();
const std::string& suffix_path =
- !col.path_info().empty() ?
escape_for_path_name(col.path_info().get_path()) : "";
+ col.has_path_info() ?
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
// TODO use more efficient impl
for (size_t i = 0; i < _indexes.size(); i++) {
for (int32_t id : _indexes[i].col_unique_ids()) {
@@ -1247,7 +1251,7 @@ bool TabletSchema::has_inverted_index(const TabletColumn&
col) const {
// TODO use more efficient impl
int32_t col_unique_id = col.unique_id();
const std::string& suffix_path =
- !col.path_info().empty() ?
escape_for_path_name(col.path_info().get_path()) : "";
+ col.has_path_info() ?
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
for (size_t i = 0; i < _indexes.size(); i++) {
if (_indexes[i].index_type() == IndexType::INVERTED) {
for (int32_t id : _indexes[i].col_unique_ids()) {
@@ -1269,7 +1273,6 @@ bool
TabletSchema::has_inverted_index_with_index_id(int32_t index_id,
return true;
}
}
-
return false;
}
@@ -1305,7 +1308,7 @@ const TabletIndex* TabletSchema::get_inverted_index(const
TabletColumn& col) con
// Use parent id if unique not assigned, this could happend when accessing
subcolumns of variants
int32_t col_unique_id = col.unique_id() < 0 ? col.parent_unique_id() :
col.unique_id();
const std::string& suffix_path =
- !col.path_info().empty() ?
escape_for_path_name(col.path_info().get_path()) : "";
+ col.has_path_info() ?
escape_for_path_name(col.path_info_ptr()->get_path()) : "";
return get_inverted_index(col_unique_id, suffix_path);
}
@@ -1344,7 +1347,7 @@ vectorized::Block TabletSchema::create_block(
const std::unordered_set<uint32_t>* tablet_columns_need_convert_null)
const {
vectorized::Block block;
for (int i = 0; i < return_columns.size(); ++i) {
- const auto& col = _cols[return_columns[i]];
+ const auto& col = *_cols[return_columns[i]];
bool is_nullable = (tablet_columns_need_convert_null != nullptr &&
tablet_columns_need_convert_null->find(return_columns[i]) !=
tablet_columns_need_convert_null->end());
@@ -1358,11 +1361,11 @@ vectorized::Block TabletSchema::create_block(
vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const {
vectorized::Block block;
for (const auto& col : _cols) {
- if (ignore_dropped_col && is_dropped_column(col)) {
+ if (ignore_dropped_col && is_dropped_column(*col)) {
continue;
}
- auto data_type =
vectorized::DataTypeFactory::instance().create_data_type(col);
- block.insert({data_type->create_column(), data_type, col.name()});
+ auto data_type =
vectorized::DataTypeFactory::instance().create_data_type(*col);
+ block.insert({data_type->create_column(), data_type, col->name()});
}
return block;
}
@@ -1370,7 +1373,7 @@ vectorized::Block TabletSchema::create_block(bool
ignore_dropped_col) const {
vectorized::Block TabletSchema::create_block_by_cids(const
std::vector<uint32_t>& cids) {
vectorized::Block block;
for (const auto& cid : cids) {
- auto col = _cols[cid];
+ const auto& col = *_cols[cid];
auto data_type =
vectorized::DataTypeFactory::instance().create_data_type(col);
block.insert({data_type->create_column(), data_type, col.name()});
}
@@ -1397,6 +1400,11 @@ bool operator==(const TabletColumn& a, const
TabletColumn& b) {
if (a._index_length != b._index_length) return false;
if (a._is_bf_column != b._is_bf_column) return false;
if (a._has_bitmap_index != b._has_bitmap_index) return false;
+ if (a._column_path == nullptr && a._column_path != nullptr) return false;
+ if (b._column_path == nullptr && a._column_path != nullptr) return false;
+ if (b._column_path != nullptr && a._column_path != nullptr &&
+ *a._column_path != *b._column_path)
+ return false;
return true;
}
@@ -1408,7 +1416,7 @@ bool operator==(const TabletSchema& a, const
TabletSchema& b) {
if (a._keys_type != b._keys_type) return false;
if (a._cols.size() != b._cols.size()) return false;
for (int i = 0; i < a._cols.size(); ++i) {
- if (a._cols[i] != b._cols[i]) return false;
+ if (*a._cols[i] != *b._cols[i]) return false;
}
if (a._num_columns != b._num_columns) return false;
if (a._num_key_columns != b._num_key_columns) return false;
diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h
index 78f74a761f0..40857ab7427 100644
--- a/be/src/olap/tablet_schema.h
+++ b/be/src/olap/tablet_schema.h
@@ -21,6 +21,7 @@
#include <gen_cpp/olap_common.pb.h>
#include <gen_cpp/olap_file.pb.h>
#include <gen_cpp/segment_v2.pb.h>
+#include <parallel_hashmap/phmap.h>
#include <stddef.h>
#include <stdint.h>
@@ -39,7 +40,9 @@
#include "runtime/descriptors.h"
#include "util/string_util.h"
#include "vec/aggregate_functions/aggregate_function.h"
+#include "vec/common/string_ref.h"
#include "vec/common/string_utils/string_utils.h"
+#include "vec/core/types.h"
#include "vec/json/path_in_data.h"
namespace doris {
@@ -52,6 +55,9 @@ class IDataType;
struct OlapTableIndexSchema;
class TColumn;
class TOlapTableIndex;
+class TabletColumn;
+
+using TabletColumnPtr = std::shared_ptr<TabletColumn>;
class TabletColumn {
public:
@@ -129,8 +135,8 @@ public:
void add_sub_column(TabletColumn& sub_column);
uint32_t get_subtype_count() const { return _sub_column_count; }
- const TabletColumn& get_sub_column(uint32_t i) const { return
_sub_columns[i]; }
- const std::vector<TabletColumn>& get_sub_columns() const { return
_sub_columns; }
+ const TabletColumn& get_sub_column(uint32_t i) const { return
*_sub_columns[i]; }
+ const std::vector<TabletColumnPtr>& get_sub_columns() const { return
_sub_columns; }
friend bool operator==(const TabletColumn& a, const TabletColumn& b);
friend bool operator!=(const TabletColumn& a, const TabletColumn& b);
@@ -144,16 +150,19 @@ public:
bool is_row_store_column() const;
std::string get_aggregation_name() const { return _aggregation_name; }
bool get_result_is_nullable() const { return _result_is_nullable; }
- const vectorized::PathInData& path_info() const { return _column_path; }
+ bool has_path_info() const { return _column_path != nullptr &&
!_column_path->empty(); }
+ const vectorized::PathInDataPtr& path_info_ptr() const { return
_column_path; }
// If it is an extracted column from variant column
- bool is_extracted_column() const { return !_column_path.empty() &&
_parent_col_unique_id > 0; };
+ bool is_extracted_column() const {
+ return _column_path != nullptr && !_column_path->empty() &&
_parent_col_unique_id > 0;
+ };
int32_t parent_unique_id() const { return _parent_col_unique_id; }
void set_parent_unique_id(int32_t col_unique_id) { _parent_col_unique_id =
col_unique_id; }
std::shared_ptr<const vectorized::IDataType> get_vec_type() const;
void append_sparse_column(TabletColumn column);
const TabletColumn& sparse_column_at(size_t oridinal) const;
- const std::vector<TabletColumn>& sparse_columns() const;
+ const std::vector<TabletColumnPtr>& sparse_columns() const;
size_t num_sparse_columns() const { return _num_sparse_columns; }
private:
@@ -175,29 +184,30 @@ private:
std::string _default_value;
bool _is_decimal = false;
- int32_t _precision;
- int32_t _frac;
+ int32_t _precision = -1;
+ int32_t _frac = -1;
- int32_t _length;
- int32_t _index_length;
+ int32_t _length = -1;
+ int32_t _index_length = -1;
bool _is_bf_column = false;
bool _has_bitmap_index = false;
bool _visible = true;
int32_t _parent_col_unique_id = -1;
- std::vector<TabletColumn> _sub_columns;
+ std::vector<TabletColumnPtr> _sub_columns;
uint32_t _sub_column_count = 0;
bool _result_is_nullable = false;
- vectorized::PathInData _column_path;
+ vectorized::PathInDataPtr _column_path;
// Record information about columns merged into a sparse column within a
variant
// `{"id": 100, "name" : "jack", "point" : 3.9}`
// If the information mentioned above is inserted into the variant column,
// 'id' and 'name' are correctly extracted, while 'point' is merged into
the sparse column due to its sparsity.
// The path_info and type of 'point' will be recorded using the
TabletColumn.
- std::vector<TabletColumn> _sparse_cols;
+ // Use shared_ptr for reuse and reducing column memory usage
+ std::vector<TabletColumnPtr> _sparse_cols;
size_t _num_sparse_columns = 0;
};
@@ -239,7 +249,7 @@ public:
void set_escaped_escaped_index_suffix_path(const std::string& name);
private:
- int64_t _index_id;
+ int64_t _index_id = -1;
// Identify the different index with the same _index_id
std::string _escaped_index_suffix_path;
std::string _index_name;
@@ -282,8 +292,8 @@ public:
Status have_column(const std::string& field_name) const;
const TabletColumn& column_by_uid(int32_t col_unique_id) const;
TabletColumn& mutable_column_by_uid(int32_t col_unique_id);
- const std::vector<TabletColumn>& columns() const;
- std::vector<TabletColumn>& mutable_columns();
+ void replace_column(size_t pos, TabletColumn new_col);
+ const std::vector<TabletColumnPtr>& columns() const;
size_t num_columns() const { return _num_columns; }
size_t num_key_columns() const { return _num_key_columns; }
const std::vector<uint32_t>& cluster_key_idxes() const { return
_cluster_key_idxes; }
@@ -380,7 +390,7 @@ public:
if (str.size() > 1) {
str += ", ";
}
- str += p.first + "(" + std::to_string(_cols[p.second].unique_id())
+ ")";
+ str += p.first.to_string() + "(" +
std::to_string(_cols[p.second]->unique_id()) + ")";
}
str += "]";
return str;
@@ -394,12 +404,12 @@ public:
str += ", ";
}
str += "(";
- str += p.name();
+ str += p->name();
str += ", ";
- str += TabletColumn::get_string_by_field_type(p.type());
+ str += TabletColumn::get_string_by_field_type(p->type());
str += ", ";
str += "is_nullable:";
- str += (p.is_nullable() ? "true" : "false");
+ str += (p->is_nullable() ? "true" : "false");
str += ")";
}
str += "]";
@@ -420,12 +430,12 @@ private:
KeysType _keys_type = DUP_KEYS;
SortType _sort_type = SortType::LEXICAL;
size_t _sort_col_num = 0;
- std::vector<TabletColumn> _cols;
+ std::vector<TabletColumnPtr> _cols;
std::vector<TabletIndex> _indexes;
- std::unordered_map<std::string, int32_t> _field_name_to_index;
+ std::unordered_map<StringRef, int32_t, StringRefHash> _field_name_to_index;
std::unordered_map<int32_t, int32_t> _field_id_to_index;
- std::unordered_map<vectorized::PathInData, int32_t,
vectorized::PathInData::Hash>
+ std::unordered_map<vectorized::PathInDataRef, int32_t,
vectorized::PathInDataRef::Hash>
_field_path_to_index;
size_t _num_columns = 0;
size_t _num_variant_columns = 0;
diff --git a/be/src/service/internal_service.cpp
b/be/src/service/internal_service.cpp
index f06446123d0..c81a5e90c3c 100644
--- a/be/src/service/internal_service.cpp
+++ b/be/src/service/internal_service.cpp
@@ -816,24 +816,24 @@ void PInternalServiceImpl::_get_column_ids_by_tablet_ids(
std::set<int32_t> column_ids;
for (const auto& col : columns) {
- column_ids.insert(col.unique_id());
+ column_ids.insert(col->unique_id());
}
filter_set.insert(std::move(column_ids));
if (id_to_column.empty()) {
for (const auto& col : columns) {
- id_to_column.insert(std::pair {col.unique_id(), &col});
+ id_to_column.insert(std::pair {col->unique_id(),
col.get()});
}
} else {
for (const auto& col : columns) {
- auto it = id_to_column.find(col.unique_id());
- if (it == id_to_column.end() || *(it->second) != col) {
+ auto it = id_to_column.find(col->unique_id());
+ if (it == id_to_column.end() || *(it->second) != *col) {
ColumnPB prev_col_pb;
ColumnPB curr_col_pb;
if (it != id_to_column.end()) {
it->second->to_schema_pb(&prev_col_pb);
}
- col.to_schema_pb(&curr_col_pb);
+ col->to_schema_pb(&curr_col_pb);
std::stringstream ss;
ss << "consistency check failed: index{ " << index_id
<< " }"
<< " got inconsistent schema, prev column: " <<
prev_col_pb.DebugString()
@@ -864,7 +864,7 @@ void PInternalServiceImpl::_get_column_ids_by_tablet_ids(
entry->set_index_id(index_id);
auto col_name_to_id = entry->mutable_col_name_to_id();
for (const auto& column : columns) {
- (*col_name_to_id)[column.name()] = column.unique_id();
+ (*col_name_to_id)[column->name()] = column->unique_id();
}
}
response->mutable_status()->set_status_code(TStatusCode::OK);
diff --git a/be/src/vec/common/schema_util.cpp
b/be/src/vec/common/schema_util.cpp
index b3fa9234cb3..98148fa55bc 100644
--- a/be/src/vec/common/schema_util.cpp
+++ b/be/src/vec/common/schema_util.cpp
@@ -314,12 +314,12 @@ void update_least_common_schema(const
std::vector<TabletSchemaSPtr>& schemas,
// Types of subcolumns by path from all tuples.
std::map<PathInData, DataTypes> subcolumns_types;
for (const TabletSchemaSPtr& schema : schemas) {
- for (const TabletColumn& col : schema->columns()) {
+ for (const TabletColumnPtr& col : schema->columns()) {
// Get subcolumns of this variant
- if (!col.path_info().empty() && col.parent_unique_id() > 0 &&
- col.parent_unique_id() == variant_col_unique_id) {
- subcolumns_types[col.path_info()].push_back(
- DataTypeFactory::instance().create_data_type(col,
col.is_nullable()));
+ if (col->has_path_info() && col->parent_unique_id() > 0 &&
+ col->parent_unique_id() == variant_col_unique_id) {
+ subcolumns_types[*col->path_info_ptr()].push_back(
+ DataTypeFactory::instance().create_data_type(*col,
col->is_nullable()));
}
}
}
@@ -328,15 +328,15 @@ void update_least_common_schema(const
std::vector<TabletSchemaSPtr>& schemas,
// maybe dropped
continue;
}
- for (const TabletColumn& col :
-
schema->mutable_column_by_uid(variant_col_unique_id).sparse_columns()) {
+ for (const TabletColumnPtr& col :
+ schema->column_by_uid(variant_col_unique_id).sparse_columns()) {
// Get subcolumns of this variant
- if (!col.path_info().empty() && col.parent_unique_id() > 0 &&
- col.parent_unique_id() == variant_col_unique_id &&
+ if (col->has_path_info() && col->parent_unique_id() > 0 &&
+ col->parent_unique_id() == variant_col_unique_id &&
// this column have been found in origin columns
- subcolumns_types.find(col.path_info()) !=
subcolumns_types.end()) {
- subcolumns_types[col.path_info()].push_back(
- DataTypeFactory::instance().create_data_type(col,
col.is_nullable()));
+ subcolumns_types.find(*col->path_info_ptr()) !=
subcolumns_types.end()) {
+ subcolumns_types[*col->path_info_ptr()].push_back(
+ DataTypeFactory::instance().create_data_type(*col,
col->is_nullable()));
}
}
}
@@ -354,14 +354,14 @@ void update_least_sparse_column(const
std::vector<TabletSchemaSPtr>& schemas,
// maybe dropped
continue;
}
- for (const TabletColumn& col :
-
schema->mutable_column_by_uid(variant_col_unique_id).sparse_columns()) {
+ for (const TabletColumnPtr& col :
+ schema->column_by_uid(variant_col_unique_id).sparse_columns()) {
// Get subcolumns of this variant
- if (!col.path_info().empty() && col.parent_unique_id() > 0 &&
- col.parent_unique_id() == variant_col_unique_id &&
- path_set.find(col.path_info()) == path_set.end()) {
- subcolumns_types[col.path_info()].push_back(
- DataTypeFactory::instance().create_data_type(col,
col.is_nullable()));
+ if (col->has_path_info() && col->parent_unique_id() > 0 &&
+ col->parent_unique_id() == variant_col_unique_id &&
+ path_set.find(*col->path_info_ptr()) == path_set.end()) {
+ subcolumns_types[*col->path_info_ptr()].push_back(
+ DataTypeFactory::instance().create_data_type(*col,
col->is_nullable()));
}
}
}
@@ -372,29 +372,29 @@ void inherit_tablet_index(TabletSchemaSPtr& schema) {
std::unordered_map<int32_t, TabletIndex> variants_index_meta;
// Get all variants tablet index metas if exist
for (const auto& col : schema->columns()) {
- auto index_meta = schema->get_inverted_index(col.unique_id(), "");
- if (col.is_variant_type() && index_meta != nullptr) {
- variants_index_meta.emplace(col.unique_id(), *index_meta);
+ auto index_meta = schema->get_inverted_index(col->unique_id(), "");
+ if (col->is_variant_type() && index_meta != nullptr) {
+ variants_index_meta.emplace(col->unique_id(), *index_meta);
}
}
// Add index meta if extracted column is missing index meta
for (const auto& col : schema->columns()) {
- if (!col.is_extracted_column()) {
+ if (!col->is_extracted_column()) {
continue;
}
- auto it = variants_index_meta.find(col.parent_unique_id());
+ auto it = variants_index_meta.find(col->parent_unique_id());
// variant has no index meta, ignore
if (it == variants_index_meta.end()) {
continue;
}
- auto index_meta = schema->get_inverted_index(col);
+ auto index_meta = schema->get_inverted_index(*col);
// add index meta
TabletIndex index_info = it->second;
-
index_info.set_escaped_escaped_index_suffix_path(col.path_info().get_path());
+
index_info.set_escaped_escaped_index_suffix_path(col->path_info_ptr()->get_path());
if (index_meta != nullptr) {
// already exist
- schema->update_index(col, index_info);
+ schema->update_index(*col, index_info);
} else {
schema->append_index(index_info);
}
@@ -415,12 +415,12 @@ Status get_least_common_schema(const
std::vector<TabletSchemaSPtr>& schemas,
// Merge columns from other schemas
output_schema->clear_columns();
// Get all columns without extracted columns and collect variant col
unique id
- for (const TabletColumn& col : base_schema->columns()) {
- if (col.is_variant_type()) {
- variant_column_unique_id.push_back(col.unique_id());
+ for (const TabletColumnPtr& col : base_schema->columns()) {
+ if (col->is_variant_type()) {
+ variant_column_unique_id.push_back(col->unique_id());
}
- if (!col.is_extracted_column()) {
- output_schema->append_column(col);
+ if (!col->is_extracted_column()) {
+ output_schema->append_column(*col);
}
}
};
@@ -646,7 +646,7 @@ void rebuild_schema_and_block(const TabletSchemaSPtr&
original,
bool is_nullable = column_ref->is_nullable();
const vectorized::ColumnObject& object_column =
assert_cast<vectorized::ColumnObject&>(
remove_nullable(column_ref)->assume_mutable_ref());
- const TabletColumn& parent_column = original->columns()[variant_pos];
+ const TabletColumn& parent_column = *original->columns()[variant_pos];
CHECK(object_column.is_finalized());
std::shared_ptr<vectorized::ColumnObject::Subcolumns::Node> root;
// common extracted columns
@@ -689,7 +689,9 @@ void rebuild_schema_and_block(const TabletSchemaSPtr&
original,
vectorized::PathInDataBuilder full_root_path_builder;
auto full_root_path =
full_root_path_builder.append(parent_column.name_lower_case(),
false).build();
-
flush_schema->mutable_columns()[variant_pos].set_path_info(full_root_path);
+ TabletColumn new_col = flush_schema->column(variant_pos);
+ new_col.set_path_info(full_root_path);
+ flush_schema->replace_column(variant_pos, new_col);
VLOG_DEBUG << "set root_path : " << full_root_path.get_path();
}
diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp
b/be/src/vec/exec/scan/new_olap_scanner.cpp
index 18b79523fe3..8f2e99a5614 100644
--- a/be/src/vec/exec/scan/new_olap_scanner.cpp
+++ b/be/src/vec/exec/scan/new_olap_scanner.cpp
@@ -95,7 +95,7 @@ static std::string read_columns_to_string(TabletSchemaSPtr
tablet_schema,
if (it != read_columns.cbegin()) {
read_columns_string += ", ";
}
- read_columns_string += tablet_schema->columns().at(*it).name();
+ read_columns_string += tablet_schema->columns().at(*it)->name();
if (i >= col_per_line) {
read_columns_string += "\n";
i = 0;
@@ -433,7 +433,7 @@ Status NewOlapScanner::_init_variant_columns() {
TabletColumn subcol =
TabletColumn::create_materialized_variant_column(
tablet_schema->column_by_uid(slot->col_unique_id()).name_lower_case(),
slot->column_paths(), slot->col_unique_id());
- if (tablet_schema->field_index(subcol.path_info()) < 0) {
+ if (tablet_schema->field_index(*subcol.path_info_ptr()) < 0) {
tablet_schema->append_column(subcol,
TabletSchema::ColumnType::VARIANT);
}
}
diff --git a/be/src/vec/json/path_in_data.cpp b/be/src/vec/json/path_in_data.cpp
index ae91b444994..4b3692f4776 100644
--- a/be/src/vec/json/path_in_data.cpp
+++ b/be/src/vec/json/path_in_data.cpp
@@ -22,6 +22,8 @@
#include <assert.h>
+#include <string_view>
+
#include "vec/common/sip_hash.h"
namespace doris::vectorized {
@@ -118,12 +120,15 @@ void PathInData::from_protobuf(const
segment_v2::ColumnPathInfo& pb) {
path = pb.path();
has_nested = pb.has_has_nested();
parts.reserve(pb.path_part_infos().size());
+ const char* begin = path.data();
for (const segment_v2::ColumnPathPartInfo& part_info :
pb.path_part_infos()) {
Part part;
part.is_nested = part_info.is_nested();
part.anonymous_array_level = part_info.anonymous_array_level();
- part.key = part_info.key();
+ // use string_view to ref data in path
+ part.key = std::string_view {begin, part_info.key().length()};
parts.push_back(part);
+ begin += part.key.length() + 1;
}
}
diff --git a/be/src/vec/json/path_in_data.h b/be/src/vec/json/path_in_data.h
index aba700bb2f7..1367970f10d 100644
--- a/be/src/vec/json/path_in_data.h
+++ b/be/src/vec/json/path_in_data.h
@@ -23,6 +23,7 @@
#include <stddef.h>
#include <algorithm>
+#include <memory>
#include <string>
#include <string_view>
#include <vector>
@@ -35,6 +36,8 @@
namespace doris::vectorized {
/// Class that represents path in document, e.g. JSON.
+class PathInData;
+using PathInDataPtr = std::shared_ptr<PathInData>;
class PathInData {
public:
struct Part {
@@ -100,6 +103,7 @@ private:
/// Cached to avoid linear complexity at 'has_nested'.
bool has_nested = false;
};
+
class PathInDataBuilder {
public:
const PathInData::Parts& get_parts() const { return parts; }
@@ -126,4 +130,16 @@ struct ParseResult {
std::vector<PathInData> paths;
std::vector<Field> values;
};
+
+struct PathInDataRef {
+ const PathInData* ref;
+ struct Hash {
+ size_t operator()(const PathInDataRef& value) const {
+ return PathInData::Hash {}(*value.ref);
+ }
+ };
+ PathInDataRef(const PathInData* ptr) : ref(ptr) {}
+ bool operator==(const PathInDataRef& other) const { return *this->ref ==
*other.ref; }
+};
+
} // namespace doris::vectorized
diff --git a/be/src/vec/jsonb/serialize.cpp b/be/src/vec/jsonb/serialize.cpp
index 0251dc12974..006cb6b8f79 100644
--- a/be/src/vec/jsonb/serialize.cpp
+++ b/be/src/vec/jsonb/serialize.cpp
@@ -55,7 +55,7 @@ void JsonbSerializeUtil::block_to_jsonb(const TabletSchema&
schema, const Block&
jsonb_writer.writeStartObject();
for (int j = 0; j < num_cols; ++j) {
const auto& column = block.get_by_position(j).column;
- const auto& tablet_column = schema.columns()[j];
+ const auto& tablet_column = *schema.columns()[j];
if (tablet_column.is_row_store_column()) {
// ignore dst row store column
continue;
diff --git a/be/src/vec/olap/olap_data_convertor.cpp
b/be/src/vec/olap/olap_data_convertor.cpp
index 7213ded16e1..3da1f7c8678 100644
--- a/be/src/vec/olap/olap_data_convertor.cpp
+++ b/be/src/vec/olap/olap_data_convertor.cpp
@@ -57,7 +57,7 @@ OlapBlockDataConvertor::OlapBlockDataConvertor(const
TabletSchema* tablet_schema
assert(tablet_schema);
const auto& columns = tablet_schema->columns();
for (const auto& col : columns) {
- _convertors.emplace_back(create_olap_column_data_convertor(col));
+ _convertors.emplace_back(create_olap_column_data_convertor(*col));
}
}
diff --git a/be/test/olap/delete_bitmap_calculator_test.cpp
b/be/test/olap/delete_bitmap_calculator_test.cpp
index 4941ce39fd6..00856f69f10 100644
--- a/be/test/olap/delete_bitmap_calculator_test.cpp
+++ b/be/test/olap/delete_bitmap_calculator_test.cpp
@@ -50,20 +50,20 @@ static RowsetId rowset_id {0};
using Generator = std::function<void(size_t rid, int cid, RowCursorCell&
cell)>;
-static TabletColumn create_int_sequence_value(int32_t id, bool is_nullable =
true,
- bool is_bf_column = false,
- bool has_bitmap_index = false) {
- TabletColumn column;
- column._unique_id = id;
- column._col_name = std::to_string(id);
- column._type = FieldType::OLAP_FIELD_TYPE_INT;
- column._is_key = false;
- column._is_nullable = is_nullable;
- column._length = 4;
- column._index_length = 4;
- column._is_bf_column = is_bf_column;
- column._has_bitmap_index = has_bitmap_index;
- column.set_name(SEQUENCE_COL);
+static TabletColumnPtr create_int_sequence_value(int32_t id, bool is_nullable
= true,
+ bool is_bf_column = false,
+ bool has_bitmap_index =
false) {
+ TabletColumnPtr column = std::make_shared<TabletColumn>();
+ column->_unique_id = id;
+ column->_col_name = std::to_string(id);
+ column->_type = FieldType::OLAP_FIELD_TYPE_INT;
+ column->_is_key = false;
+ column->_is_nullable = is_nullable;
+ column->_length = 4;
+ column->_index_length = 4;
+ column->_is_bf_column = is_bf_column;
+ column->_has_bitmap_index = has_bitmap_index;
+ column->set_name(SEQUENCE_COL);
return column;
}
@@ -82,12 +82,12 @@ public:
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(kSegmentDir).ok());
}
- TabletSchemaSPtr create_schema(const std::vector<TabletColumn>& columns,
+ TabletSchemaSPtr create_schema(const std::vector<TabletColumnPtr>& columns,
KeysType keys_type = UNIQUE_KEYS) {
TabletSchemaSPtr res = std::make_shared<TabletSchema>();
for (auto& col : columns) {
- res->append_column(col);
+ res->append_column(*col);
}
res->_keys_type = keys_type;
return res;
@@ -144,7 +144,7 @@ public:
size_t const num_columns = num_key_columns + has_sequence_col +
num_value_columns;
size_t const seq_col_idx = has_sequence_col ? num_key_columns : -1;
- std::vector<TabletColumn> columns;
+ std::vector<TabletColumnPtr> columns;
for (int i = 0; i < num_key_columns; ++i) {
columns.emplace_back(create_int_key(i));
diff --git a/be/test/olap/memtable_flush_executor_test.cpp
b/be/test/olap/memtable_flush_executor_test.cpp
index 23d142ed7e0..687511a47d1 100644
--- a/be/test/olap/memtable_flush_executor_test.cpp
+++ b/be/test/olap/memtable_flush_executor_test.cpp
@@ -69,13 +69,16 @@ void tear_down() {
}
Schema create_schema() {
- std::vector<TabletColumn> col_schemas;
-
col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
- FieldType::OLAP_FIELD_TYPE_SMALLINT, true);
-
col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
- FieldType::OLAP_FIELD_TYPE_INT, true);
-
col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM,
- FieldType::OLAP_FIELD_TYPE_BIGINT, true);
+ std::vector<TabletColumnPtr> col_schemas;
+ col_schemas.emplace_back(
+
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
+
FieldType::OLAP_FIELD_TYPE_SMALLINT, true));
+ col_schemas.emplace_back(
+
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
+ FieldType::OLAP_FIELD_TYPE_INT,
true));
+ col_schemas.emplace_back(
+
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM,
+ FieldType::OLAP_FIELD_TYPE_BIGINT,
true));
Schema schema(col_schemas, 2);
return schema;
}
diff --git a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp
b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp
index f4dcb69b048..bf1cc69e900 100644
--- a/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp
+++ b/be/test/olap/rowset/segment_v2/zone_map_index_test.cpp
@@ -156,8 +156,8 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) {
std::string filename = kTestDir + "/NormalTestIntPage";
auto fs = io::global_local_filesystem();
- TabletColumn int_column = create_int_key(0);
- Field* field = FieldFactory::create(int_column);
+ TabletColumnPtr int_column = create_int_key(0);
+ Field* field = FieldFactory::create(*int_column);
std::unique_ptr<ZoneMapIndexWriter> builder(nullptr);
static_cast<void>(ZoneMapIndexWriter::create(field, builder));
@@ -210,25 +210,25 @@ TEST_F(ColumnZoneMapTest, NormalTestIntPage) {
// Test for string
TEST_F(ColumnZoneMapTest, NormalTestVarcharPage) {
- TabletColumn varchar_column = create_varchar_key(0);
- Field* field = FieldFactory::create(varchar_column);
+ TabletColumnPtr varchar_column = create_varchar_key(0);
+ Field* field = FieldFactory::create(*varchar_column);
test_string("NormalTestVarcharPage", field);
delete field;
}
// Test for string
TEST_F(ColumnZoneMapTest, NormalTestCharPage) {
- TabletColumn char_column = create_char_key(0);
- Field* field = FieldFactory::create(char_column);
+ TabletColumnPtr char_column = create_char_key(0);
+ Field* field = FieldFactory::create(*char_column);
test_string("NormalTestCharPage", field);
delete field;
}
// Test for zone map limit
TEST_F(ColumnZoneMapTest, ZoneMapCut) {
- TabletColumn varchar_column = create_varchar_key(0);
- varchar_column.set_index_length(1024);
- Field* field = FieldFactory::create(varchar_column);
+ TabletColumnPtr varchar_column = create_varchar_key(0);
+ varchar_column->set_index_length(1024);
+ Field* field = FieldFactory::create(*varchar_column);
test_string("ZoneMapCut", field);
delete field;
}
diff --git a/be/test/olap/tablet_schema_helper.cpp
b/be/test/olap/tablet_schema_helper.cpp
index f5ff2295357..da6720b384a 100644
--- a/be/test/olap/tablet_schema_helper.cpp
+++ b/be/test/olap/tablet_schema_helper.cpp
@@ -19,80 +19,81 @@
#include <string.h>
+#include "olap/tablet_schema.h"
#include "util/slice.h"
#include "vec/common/arena.h"
namespace doris {
-TabletColumn create_int_key(int32_t id, bool is_nullable, bool is_bf_column,
- bool has_bitmap_index) {
- TabletColumn column;
- column._unique_id = id;
- column._col_name = std::to_string(id);
- column._type = FieldType::OLAP_FIELD_TYPE_INT;
- column._is_key = true;
- column._is_nullable = is_nullable;
- column._length = 4;
- column._index_length = 4;
- column._is_bf_column = is_bf_column;
- column._has_bitmap_index = has_bitmap_index;
+TabletColumnPtr create_int_key(int32_t id, bool is_nullable, bool is_bf_column,
+ bool has_bitmap_index) {
+ auto column = std::make_shared<TabletColumn>();
+ column->_unique_id = id;
+ column->_col_name = std::to_string(id);
+ column->_type = FieldType::OLAP_FIELD_TYPE_INT;
+ column->_is_key = true;
+ column->_is_nullable = is_nullable;
+ column->_length = 4;
+ column->_index_length = 4;
+ column->_is_bf_column = is_bf_column;
+ column->_has_bitmap_index = has_bitmap_index;
return column;
}
-TabletColumn create_int_value(int32_t id, FieldAggregationMethod agg_method,
bool is_nullable,
- const std::string default_value, bool
is_bf_column,
- bool has_bitmap_index) {
- TabletColumn column;
- column._unique_id = id;
- column._col_name = std::to_string(id);
- column._type = FieldType::OLAP_FIELD_TYPE_INT;
- column._is_key = false;
- column._aggregation = agg_method;
- column._is_nullable = is_nullable;
- column._length = 4;
- column._index_length = 4;
+TabletColumnPtr create_int_value(int32_t id, FieldAggregationMethod
agg_method, bool is_nullable,
+ const std::string default_value, bool
is_bf_column,
+ bool has_bitmap_index) {
+ auto column = std::make_shared<TabletColumn>();
+ column->_unique_id = id;
+ column->_col_name = std::to_string(id);
+ column->_type = FieldType::OLAP_FIELD_TYPE_INT;
+ column->_is_key = false;
+ column->_aggregation = agg_method;
+ column->_is_nullable = is_nullable;
+ column->_length = 4;
+ column->_index_length = 4;
if (default_value != "") {
- column._has_default_value = true;
- column._default_value = default_value;
+ column->_has_default_value = true;
+ column->_default_value = default_value;
}
- column._is_bf_column = is_bf_column;
- column._has_bitmap_index = has_bitmap_index;
+ column->_is_bf_column = is_bf_column;
+ column->_has_bitmap_index = has_bitmap_index;
return column;
}
-TabletColumn create_char_key(int32_t id, bool is_nullable) {
- TabletColumn column;
- column._unique_id = id;
- column._col_name = std::to_string(id);
- column._type = FieldType::OLAP_FIELD_TYPE_CHAR;
- column._is_key = true;
- column._is_nullable = is_nullable;
- column._length = 8;
- column._index_length = 1;
+TabletColumnPtr create_char_key(int32_t id, bool is_nullable) {
+ auto column = std::make_shared<TabletColumn>();
+ column->_unique_id = id;
+ column->_col_name = std::to_string(id);
+ column->_type = FieldType::OLAP_FIELD_TYPE_CHAR;
+ column->_is_key = true;
+ column->_is_nullable = is_nullable;
+ column->_length = 8;
+ column->_index_length = 1;
return column;
}
-TabletColumn create_varchar_key(int32_t id, bool is_nullable) {
- TabletColumn column;
- column._unique_id = id;
- column._col_name = std::to_string(id);
- column._type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
- column._is_key = true;
- column._is_nullable = is_nullable;
- column._length = 65533;
- column._index_length = 4;
+TabletColumnPtr create_varchar_key(int32_t id, bool is_nullable) {
+ auto column = std::make_shared<TabletColumn>();
+ column->_unique_id = id;
+ column->_col_name = std::to_string(id);
+ column->_type = FieldType::OLAP_FIELD_TYPE_VARCHAR;
+ column->_is_key = true;
+ column->_is_nullable = is_nullable;
+ column->_length = 65533;
+ column->_index_length = 4;
return column;
}
-TabletColumn create_string_key(int32_t id, bool is_nullable) {
- TabletColumn column;
- column._unique_id = id;
- column._col_name = std::to_string(id);
- column._type = FieldType::OLAP_FIELD_TYPE_STRING;
- column._is_key = true;
- column._is_nullable = is_nullable;
- column._length = 2147483643;
- column._index_length = 4;
+TabletColumnPtr create_string_key(int32_t id, bool is_nullable) {
+ auto column = std::make_shared<TabletColumn>();
+ column->_unique_id = id;
+ column->_col_name = std::to_string(id);
+ column->_type = FieldType::OLAP_FIELD_TYPE_STRING;
+ column->_is_key = true;
+ column->_is_nullable = is_nullable;
+ column->_length = 2147483643;
+ column->_index_length = 4;
return column;
}
diff --git a/be/test/olap/tablet_schema_helper.h
b/be/test/olap/tablet_schema_helper.h
index 547882a18cb..1e2a7ba7e79 100644
--- a/be/test/olap/tablet_schema_helper.h
+++ b/be/test/olap/tablet_schema_helper.h
@@ -20,6 +20,7 @@
#include <stddef.h>
#include <stdint.h>
+#include <memory>
#include <string>
#include "olap/olap_common.h"
@@ -30,30 +31,30 @@ namespace vectorized {
class Arena;
} // namespace vectorized
-TabletColumn create_int_key(int32_t id, bool is_nullable = true, bool
is_bf_column = false,
- bool has_bitmap_index = false);
+TabletColumnPtr create_int_key(int32_t id, bool is_nullable = true, bool
is_bf_column = false,
+ bool has_bitmap_index = false);
-TabletColumn create_int_value(
+TabletColumnPtr create_int_value(
int32_t id,
FieldAggregationMethod agg_method =
FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM,
bool is_nullable = true, const std::string default_value = "", bool
is_bf_column = false,
bool has_bitmap_index = false);
-TabletColumn create_char_key(int32_t id, bool is_nullable = true);
+TabletColumnPtr create_char_key(int32_t id, bool is_nullable = true);
-TabletColumn create_varchar_key(int32_t id, bool is_nullable = true);
+TabletColumnPtr create_varchar_key(int32_t id, bool is_nullable = true);
-TabletColumn create_string_key(int32_t id, bool is_nullable = true);
+TabletColumnPtr create_string_key(int32_t id, bool is_nullable = true);
template <FieldType type>
-TabletColumn create_with_default_value(std::string default_value) {
- TabletColumn column;
- column._type = type;
- column._is_nullable = true;
- column._aggregation = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
- column._has_default_value = true;
- column._default_value = default_value;
- column._length = 4;
+TabletColumnPtr create_with_default_value(std::string default_value) {
+ auto column = std::make_shared<TabletColumn>();
+ column->_type = type;
+ column->_is_nullable = true;
+ column->_aggregation = FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE;
+ column->_has_default_value = true;
+ column->_default_value = default_value;
+ column->_length = 4;
return column;
}
diff --git a/be/test/vec/exec/vgeneric_iterators_test.cpp
b/be/test/vec/exec/vgeneric_iterators_test.cpp
index 687fb3f36be..dac297448f1 100644
--- a/be/test/vec/exec/vgeneric_iterators_test.cpp
+++ b/be/test/vec/exec/vgeneric_iterators_test.cpp
@@ -45,15 +45,18 @@ public:
};
Schema create_schema() {
- std::vector<TabletColumn> col_schemas;
-
col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
- FieldType::OLAP_FIELD_TYPE_SMALLINT, true);
+ std::vector<TabletColumnPtr> col_schemas;
+ col_schemas.emplace_back(
+
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
+
FieldType::OLAP_FIELD_TYPE_SMALLINT, true));
// c2: int
-
col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
- FieldType::OLAP_FIELD_TYPE_INT, true);
+ col_schemas.emplace_back(
+
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_NONE,
+ FieldType::OLAP_FIELD_TYPE_INT,
true));
// c3: big int
-
col_schemas.emplace_back(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM,
- FieldType::OLAP_FIELD_TYPE_BIGINT, true);
+ col_schemas.emplace_back(
+
std::make_shared<TabletColumn>(FieldAggregationMethod::OLAP_FIELD_AGGREGATION_SUM,
+ FieldType::OLAP_FIELD_TYPE_BIGINT,
true));
Schema schema(col_schemas, 2);
return schema;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]