This is an automated email from the ASF dual-hosted git repository.

gavinchou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 75c8291e5c4 [chore](UT) Add UT for SegmentWriter::_full_encode_keys 
(#57653)
75c8291e5c4 is described below

commit 75c8291e5c4c214087dfc1dbdc4a3e687f740cdb
Author: Gavin Chou <[email protected]>
AuthorDate: Tue Nov 4 16:41:36 2025 +0800

    [chore](UT) Add UT for SegmentWriter::_full_encode_keys (#57653)
---
 be/src/olap/rowset/segment_v2/segment_writer.h     |   2 +-
 .../olap/segment_writer_full_encode_keys_test.cpp  | 101 +++++++++++++++++++++
 2 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h 
b/be/src/olap/rowset/segment_v2/segment_writer.h
index d7f58dfe5dd..176aa7e5b5e 100644
--- a/be/src/olap/rowset/segment_v2/segment_writer.h
+++ b/be/src/olap/rowset/segment_v2/segment_writer.h
@@ -180,7 +180,7 @@ private:
             const std::vector<vectorized::IOlapColumnDataAccessor*>& 
key_columns, size_t pos,
             bool null_first = true);
 
-    std::string _full_encode_keys(
+    static std::string _full_encode_keys(
             const std::vector<const KeyCoder*>& key_coders,
             const std::vector<vectorized::IOlapColumnDataAccessor*>& 
key_columns, size_t pos,
             bool null_first = true);
diff --git a/be/test/olap/segment_writer_full_encode_keys_test.cpp 
b/be/test/olap/segment_writer_full_encode_keys_test.cpp
new file mode 100644
index 00000000000..0ef1014efa6
--- /dev/null
+++ b/be/test/olap/segment_writer_full_encode_keys_test.cpp
@@ -0,0 +1,101 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+
+#include <string>
+#include <vector>
+
+#include "olap/key_coder.h"
+#include "olap/olap_common.h"
+#include "olap/rowset/segment_v2/segment_writer.h"
+#include "util/key_util.h"
+#include "util/slice.h"
+#include "vec/common/string_view.h" // hex_dump
+#include "vec/olap/olap_data_convertor.h"
+
+namespace doris {
+namespace segment_v2 {
+using namespace doris::vectorized;
+
+auto create_string_accessor(const std::vector<std::string>& str) {
+    ColumnString::MutablePtr column = ColumnString::create();
+    // ASSERT_TRUE(!str.empty());
+    for (auto& s : str) column->insert_value(s);
+    DataTypePtr data_type =
+            
DataTypeFactory::instance().create_data_type(FieldType::OLAP_FIELD_TYPE_VARCHAR,
 0, 0);
+    ColumnWithTypeAndName typed_column(column->get_ptr(), data_type, 
"test_string_column");
+
+    // Create a VARCHAR convertor, a convertor is an accessor
+    auto convertor =
+            
std::make_shared<OlapBlockDataConvertor::OlapColumnDataConvertorVarChar>(false);
+    convertor->set_source_column(typed_column, 0, str.size()); // row_pos=0, 
num_rows=str.size()
+
+    // Convert to OLAP format
+    auto status = convertor->convert_to_olap();
+    EXPECT_TRUE(status.ok());
+    if (status.ok()) {
+        // Get the converted data
+        const void* data = convertor->get_data_at(0);
+        // const UInt8* nullmap = convertor->get_nullmap();
+        std::cout << ((StringRef*)data)->to_string() << std::endl;
+        std::cout << column->get_data_at(0) << std::endl;
+        // Use the converted data as needed
+    }
+    return convertor;
+}
+
+auto create_int_accessor(const 
std::vector<PrimitiveTypeTraits<TYPE_BIGINT>::CppType>& values) {
+    // ASSERT_TRUE(!values.empty());
+    auto column = ColumnInt64::create();
+    for (auto value : values) column->insert_value(value);
+    DataTypePtr data_type = 
DataTypeFactory::instance().create_data_type(TYPE_INT, 0, 0);
+    ColumnWithTypeAndName typed_column(column->get_ptr(), data_type, 
"test_int_column");
+    auto convertor =
+            
std::make_shared<OlapBlockDataConvertor::OlapColumnDataConvertorSimple<TYPE_BIGINT>>();
+    convertor->set_source_column(typed_column, 0,
+                                 values.size()); // row_pos=0, 
num_rows=values.size()
+    auto status = convertor->convert_to_olap();
+    EXPECT_TRUE(status.ok());
+    return convertor;
+}
+
+TEST(SegmentWriterFullEncodeKeysTest, TestSegmentWriterKeyEncoding) {
+    // 2 rows of key columns(int,string,string), expect encode bytes of row1 < 
row2
+    //               0x05050505, a,     bb
+    //               0x05050505, a\x01, cc
+    // however the ending byte of 2nd row is \x01 (smaller than 
KEY_NORMAL_MARKER)
+    // will be in reversed order after encoding
+    auto int_accessor = create_int_accessor({0x05050505, 0x05050505});
+    auto str_accessor0 = create_string_accessor({"a", "a\x01"});
+    auto str_accessor1 = create_string_accessor({"bb", "cc"});
+    std::vector<vectorized::IOlapColumnDataAccessor*> key_columns = {
+            int_accessor.get(), str_accessor0.get(), str_accessor1.get()};
+    auto int_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_INT);
+    auto str_coder = get_key_coder(FieldType::OLAP_FIELD_TYPE_VARCHAR);
+    std::vector<const KeyCoder*> key_coders = {int_coder, str_coder, 
str_coder};
+    
////////////////////////////////////////////////////////////////////////////
+    std::string encoded0 = SegmentWriter::_full_encode_keys(key_coders, 
key_columns, 0);
+    std::string encoded1 = SegmentWriter::_full_encode_keys(key_coders, 
key_columns, 1);
+    
////////////////////////////////////////////////////////////////////////////
+    std::cout << StringView(encoded0).dump_hex() << std::endl; // 
X'02850505050261026262'
+    std::cout << StringView(encoded1).dump_hex() << std::endl; // 
X'0285050505026101026363'
+    // EXPECT_LT(encoded0, encoded1); // BANG! not satisfied
+}
+
+} // namespace segment_v2
+} // namespace doris


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to