This is an automated email from the ASF dual-hosted git repository.
gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 75c8291e5c4 [chore](UT) Add UT for SegmentWriter::_full_encode_keys
(#57653)
75c8291e5c4 is described below
commit 75c8291e5c4c214087dfc1dbdc4a3e687f740cdb
Author: Gavin Chou <[email protected]>
AuthorDate: Tue Nov 4 16:41:36 2025 +0800
[chore](UT) Add UT for SegmentWriter::_full_encode_keys (#57653)
---
be/src/olap/rowset/segment_v2/segment_writer.h | 2 +-
.../olap/segment_writer_full_encode_keys_test.cpp | 101 +++++++++++++++++++++
2 files changed, 102 insertions(+), 1 deletion(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h
b/be/src/olap/rowset/segment_v2/segment_writer.h
index d7f58dfe5dd..176aa7e5b5e 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -180,7 +180,7 @@ private:
const std::vector<vectorized::IOlapColumnDataAccessor*>&
key_columns, size_t pos,
bool null_first = true);
- std::string _full_encode_keys(
+ static std::string _full_encode_keys(
const std::vector<const KeyCoder*>& key_coders,
const std::vector<vectorized::IOlapColumnDataAccessor*>&
key_columns, size_t pos,
bool null_first = true);
diff --git a/be/test/olap/segment_writer_full_encode_keys_test.cpp
b/be/test/olap/segment_writer_full_encode_keys_test.cpp
new file mode 100644
index 00000000000..0ef1014efa6
--- /dev/null
+++ b/be/test/olap/segment_writer_full_encode_keys_test.cpp
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <vector>
+
+#include "olap/key_coder.h"
+#include "olap/olap_common.h"
+#include "olap/rowset/segment_v2/segment_writer.h"
+#include "util/key_util.h"
+#include "util/slice.h"
+#include "vec/common/string_view.h" // hex_dump
+#include "vec/olap/olap_data_convertor.h"
+
+namespace doris {
+namespace segment_v2 {
+using namespace doris::vectorized;
+
+auto create_string_accessor(const std::vector<std::string>& str) {
+ ColumnString::MutablePtr column = ColumnString::create();
+ // ASSERT_TRUE(!str.empty());
+ for (auto& s : str) column->insert_value(s);
+ DataTypePtr data_type =
+
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_VARCHAR,
0, 0);
+ ColumnWithTypeAndName typed_column(column->get_ptr(), data_type,
"test_string_column");
+
+ // Create a VARCHAR convertor, a convertor is an accessor
+ auto convertor =
+
std::make_shared<OlapBlockDataConvertor::OlapColumnDataConvertorVarChar>(false);
+ convertor->set_source_column(typed_column, 0, str.size()); // row_pos=0,
num_rows=str.size()
+
+ // Convert to OLAP format
+ auto status = convertor->convert_to_olap();
+ EXPECT_TRUE(status.ok());
+ if (status.ok()) {
+ // Get the converted data
+ const void* data = convertor->get_data_at(0);
+ // const UInt8* nullmap = convertor->get_nullmap();
+ std::cout << ((StringRef*)data)->to_string() << std::endl;
+ std::cout << column->get_data_at(0) << std::endl;
+ // Use the converted data as needed
+ }
+ return convertor;
+}
+
+auto create_int_accessor(const
std::vector<PrimitiveTypeTraits<TYPE_BIGINT>::CppType>& values) {
+ // ASSERT_TRUE(!values.empty());
+ auto column = ColumnInt64::create();
+ for (auto value : values) column->insert_value(value);
+ DataTypePtr data_type =
DataTypeFactory::instance().create_data_type(TYPE_INT, 0, 0);
+ ColumnWithTypeAndName typed_column(column->get_ptr(), data_type,
"test_int_column");
+ auto convertor =
+
std::make_shared<OlapBlockDataConvertor::OlapColumnDataConvertorSimple<TYPE_BIGINT>>();
+ convertor->set_source_column(typed_column, 0,
+ values.size()); // row_pos=0,
num_rows=values.size()
+ auto status = convertor->convert_to_olap();
+ EXPECT_TRUE(status.ok());
+ return convertor;
+}
+
+TEST(SegmentWriterFullEncodeKeysTest, TestSegmentWriterKeyEncoding) {
+ // 2 rows of key columns(int,string,string), expect encode bytes of row1 <
row2
+ // 0x05050505, a, bb
+ // 0x05050505, a\x01, cc
+ // however the ending byte of 2nd row is \x01 (smaller than
KEY_NORMAL_MARKER)
+ // will be in reversed order after encoding
+ auto int_accessor = create_int_accessor({0x05050505, 0x05050505});
+ auto str_accessor0 = create_string_accessor({"a", "a\x01"});
+ auto str_accessor1 = create_string_accessor({"bb", "cc"});
+ std::vector<vectorized::IOlapColumnDataAccessor*> key_columns = {
+ int_accessor.get(), str_accessor0.get(), str_accessor1.get()};
+ auto int_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_INT);
+ auto str_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_VARCHAR);
+ std::vector<const KeyCoder*> key_coders = {int_coder, str_coder,
str_coder};
+
////////////////////////////////////////////////////////////////////////////
+ std::string encoded0 = SegmentWriter::_full_encode_keys(key_coders,
key_columns, 0);
+ std::string encoded1 = SegmentWriter::_full_encode_keys(key_coders,
key_columns, 1);
+
////////////////////////////////////////////////////////////////////////////
+ std::cout << StringView(encoded0).dump_hex() << std::endl; //
X'02850505050261026262'
+ std::cout << StringView(encoded1).dump_hex() << std::endl; //
X'0285050505026101026363'
+ // EXPECT_LT(encoded0, encoded1); // BANG! not satisfied
+}
+
+} // namespace segment_v2
+} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]