This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
new 060a5f2b39 [fix](merge-on-write) fix that be may coredump when
sequence column is null (#16832) (#17005)
060a5f2b39 is described below
commit 060a5f2b394380155663d993970bef1bf6da590f
Author: Xin Liao <[email protected]>
AuthorDate: Wed Feb 22 16:16:00 2023 +0800
[fix](merge-on-write) fix that be may coredump when sequence column is null
(#16832) (#17005)
To facilitate the use of the primary key index, encode the seq column to
the minimum value of the corresponding
length when the seq column is null.
cherry-pick from: #16832
---
be/src/olap/rowset/segment_v2/segment_writer.cpp | 41 ++++++----
be/src/olap/rowset/segment_v2/segment_writer.h | 6 +-
.../primary_index/test_mow_with_null_sequence.out | 14 ++++
.../test_mow_with_null_sequence.groovy | 90 ++++++++++++++++++++++
4 files changed, 137 insertions(+), 14 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp
b/be/src/olap/rowset/segment_v2/segment_writer.cpp
index 025bfed484..76c10ae31e 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp
@@ -63,7 +63,7 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer,
uint32_t segment_id,
if (_tablet_schema->has_sequence_col() && _tablet_schema->keys_type() ==
UNIQUE_KEYS &&
_opts.enable_unique_key_merge_on_write) {
const auto& column =
_tablet_schema->column(_tablet_schema->sequence_col_idx());
- _key_coders.push_back(get_key_coder(column.type()));
+ _seq_coder = get_key_coder(column.type());
}
}
@@ -183,6 +183,7 @@ Status SegmentWriter::append_block(const vectorized::Block*
block, size_t row_po
// convert column data from engine format to storage layer format
std::vector<vectorized::IOlapColumnDataAccessor*> key_columns;
+ vectorized::IOlapColumnDataAccessor* seq_column = nullptr;
for (size_t id = 0; id < _column_writers.size(); ++id) {
// olap data convertor alway start from id = 0
auto converted_result = _olap_data_convertor->convert_column_data(id);
@@ -190,11 +191,11 @@ Status SegmentWriter::append_block(const
vectorized::Block* block, size_t row_po
return converted_result.first;
}
auto cid = _column_ids[id];
- if (_has_key && (cid < _num_key_columns ||
(_tablet_schema->has_sequence_col() &&
-
_tablet_schema->keys_type() == UNIQUE_KEYS &&
-
_opts.enable_unique_key_merge_on_write &&
- cid ==
_tablet_schema->sequence_col_idx()))) {
+ if (_has_key && cid < _num_key_columns) {
key_columns.push_back(converted_result.second);
+ } else if (_has_key && _tablet_schema->has_sequence_col() &&
+ cid == _tablet_schema->sequence_col_idx()) {
+ seq_column = converted_result.second;
}
RETURN_IF_ERROR(_column_writers[id]->append(converted_result.second->get_nullmap(),
converted_result.second->get_data(), num_rows));
@@ -203,8 +204,11 @@ Status SegmentWriter::append_block(const
vectorized::Block* block, size_t row_po
if (_tablet_schema->keys_type() == UNIQUE_KEYS &&
_opts.enable_unique_key_merge_on_write) {
// create primary indexes
for (size_t pos = 0; pos < num_rows; pos++) {
- RETURN_IF_ERROR(
-
_primary_key_index_builder->add_item(_full_encode_keys(key_columns, pos)));
+ std::string key = _full_encode_keys(key_columns, pos);
+ if (_tablet_schema->has_sequence_col()) {
+ _encode_seq_column(seq_column, pos, &key);
+ }
+ RETURN_IF_ERROR(_primary_key_index_builder->add_item(key));
}
} else {
// create short key indexes'
@@ -240,12 +244,7 @@ std::string SegmentWriter::_full_encode_keys(
const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns,
size_t pos,
bool null_first) {
assert(_key_index_size.size() == _num_key_columns);
- if (_tablet_schema->has_sequence_col() &&
_opts.enable_unique_key_merge_on_write) {
- assert(key_columns.size() == _num_key_columns + 1 &&
- _key_coders.size() == _num_key_columns + 1);
- } else {
- assert(key_columns.size() == _num_key_columns && _key_coders.size() ==
_num_key_columns);
- }
+ assert(key_columns.size() == _num_key_columns && _key_coders.size() ==
_num_key_columns);
std::string encoded_keys;
size_t cid = 0;
@@ -267,6 +266,22 @@ std::string SegmentWriter::_full_encode_keys(
return encoded_keys;
}
+void SegmentWriter::_encode_seq_column(const
vectorized::IOlapColumnDataAccessor* seq_column,
+ size_t pos, string* encoded_keys) {
+ auto field = seq_column->get_data_at(pos);
+ // To facilitate the use of the primary key index, encode the seq column
+ // to the minimum value of the corresponding length when the seq column
+ // is null
+ if (UNLIKELY(!field)) {
+ encoded_keys->push_back(KEY_NULL_FIRST_MARKER);
+ size_t seq_col_length =
_tablet_schema->column(_tablet_schema->sequence_col_idx()).length();
+ encoded_keys->append(seq_col_length, KEY_MINIMAL_MARKER);
+ return;
+ }
+ encoded_keys->push_back(KEY_NORMAL_MARKER);
+ _seq_coder->full_encode_ascending(field, encoded_keys);
+}
+
std::string SegmentWriter::_encode_keys(
const std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns,
size_t pos,
bool null_first) {
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h
b/be/src/olap/rowset/segment_v2/segment_writer.h
index 9e15356ff0..43c74eb2c9 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -113,10 +113,13 @@ private:
Status _write_raw_data(const std::vector<Slice>& slices);
std::string _encode_keys(const
std::vector<vectorized::IOlapColumnDataAccessor*>& key_columns,
size_t pos, bool null_first = true);
- // for unique-key merge on write and segment min_max key
+ // used for unique-key with merge on write and segment min_max key
std::string _full_encode_keys(
const std::vector<vectorized::IOlapColumnDataAccessor*>&
key_columns, size_t pos,
bool null_first = true);
+ // used for unique-key with merge on write
+ void _encode_seq_column(const vectorized::IOlapColumnDataAccessor*
seq_column, size_t pos,
+ string* encoded_keys);
void set_min_max_key(const Slice& key);
void set_min_key(const Slice& key);
void set_max_key(const Slice& key);
@@ -144,6 +147,7 @@ private:
std::unique_ptr<vectorized::OlapBlockDataConvertor> _olap_data_convertor;
// used for building short key index or primary key index during
vectorized write.
std::vector<const KeyCoder*> _key_coders;
+ const KeyCoder* _seq_coder = nullptr;
std::vector<uint16_t> _key_index_size;
size_t _short_key_row_pos = 0;
diff --git a/regression-test/data/primary_index/test_mow_with_null_sequence.out
b/regression-test/data/primary_index/test_mow_with_null_sequence.out
new file mode 100644
index 0000000000..2c14cb05ec
--- /dev/null
+++ b/regression-test/data/primary_index/test_mow_with_null_sequence.out
@@ -0,0 +1,14 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+a abc address2 2022-10-20
+aa1234 abc address4 2022-12-11
+aa1235 abc address6 \N
+ab abc address6 2022-11-20
+
+-- !sql --
+a abc address2 100
+aa1234 abc address4 -1
+aa1235 abc address6 -1
+aa1236 abc address6 0
+ab abc address6 110
+
diff --git
a/regression-test/suites/primary_index/test_mow_with_null_sequence.groovy
b/regression-test/suites/primary_index/test_mow_with_null_sequence.groovy
new file mode 100644
index 0000000000..8a7a12e1e4
--- /dev/null
+++ b/regression-test/suites/primary_index/test_mow_with_null_sequence.groovy
@@ -0,0 +1,90 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_mow_with_null_sequence") {
+ def tableName = "test_null_sequence"
+ sql """ DROP TABLE IF EXISTS $tableName """
+ sql """
+ CREATE TABLE `$tableName` (
+ `c_custkey` varchar(20) NOT NULL COMMENT "",
+ `c_name` varchar(20) NOT NULL COMMENT "",
+ `c_address` varchar(20) NOT NULL COMMENT "",
+ `c_date` date NULL COMMENT ""
+ )
+ UNIQUE KEY (`c_custkey`)
+ DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 10
+ PROPERTIES (
+ "function_column.sequence_col" = 'c_date',
+ "replication_num" = "1",
+ "enable_unique_key_merge_on_write" = "true"
+ );
+ """
+
+
+ sql """ insert into $tableName values('a', 'abc', 'address1', NULL) """
+ sql """ insert into $tableName values('a', 'abc', 'address2',
'2022-10-20') """
+ sql """ insert into $tableName values('a', 'abc', 'address3', NULL) """
+ sql """ insert into $tableName values('ab', 'abc', 'address4', NULL)
"""
+ sql """ insert into $tableName values('ab', 'abc', 'address5',
'2022-10-20') """
+ sql """ insert into $tableName values('ab', 'abc', 'address6',
'2022-11-20') """
+ sql """ insert into $tableName values('ab', 'abc', 'address6',
'2022-11-15') """
+ sql """ insert into $tableName values('aa1234', 'abc', 'address4',
'2022-12-11') """
+ sql """ insert into $tableName values('aa1234', 'abc', 'address5',
NULL) """
+ sql """ insert into $tableName values('aa1235', 'abc', 'address6',
NULL) """
+
+ order_qt_sql "select * from $tableName"
+
+ sql """ DROP TABLE IF EXISTS $tableName """
+
+ sql """
+ CREATE TABLE `$tableName` (
+ `c_custkey` varchar(20) NOT NULL COMMENT "",
+ `c_name` varchar(20) NOT NULL COMMENT "",
+ `c_address` varchar(20) NOT NULL COMMENT "",
+ `c_int` int NULL COMMENT ""
+ )
+ UNIQUE KEY (`c_custkey`)
+ DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 10
+ PROPERTIES (
+ "function_column.sequence_col" = 'c_int',
+ "replication_num" = "1",
+ "enable_unique_key_merge_on_write" = "true"
+ );
+ """
+ sql """ insert into $tableName values('a', 'abc', 'address1', NULL) """
+ sql """ insert into $tableName values('a', 'abc', 'address2', 100) """
+ sql """ insert into $tableName values('a', 'abc', 'address3', NULL) """
+
+ sql """ insert into $tableName values('ab', 'abc', 'address4', NULL)
"""
+ sql """ insert into $tableName values('ab', 'abc', 'address5', -10) """
+ sql """ insert into $tableName values('ab', 'abc', 'address6', 110) """
+ sql """ insert into $tableName values('ab', 'abc', 'address6', 100) """
+
+ sql """ insert into $tableName values('aa1234', 'abc', 'address4', -1)
"""
+ sql """ insert into $tableName values('aa1234', 'abc', 'address5',
NULL) """
+
+ sql """ insert into $tableName values('aa1235', 'abc', 'address6',
NULL) """
+ sql """ insert into $tableName values('aa1235', 'abc', 'address6', -1)
"""
+
+ sql """ insert into $tableName values('aa1236', 'abc', 'address6',
NULL) """
+ sql """ insert into $tableName values('aa1236', 'abc', 'address6', 0)
"""
+ sql """ insert into $tableName values('aa1236', 'abc', 'address6',
NULL) """
+
+ order_qt_sql "select * from $tableName"
+
+ sql """ DROP TABLE IF EXISTS $tableName """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]