This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new d92b9c803e6 [branch-2.0](cherry-pick) Add more indexed column reader
be unit test #25652 (#26430)
d92b9c803e6 is described below
commit d92b9c803e6a06b947c5ad60c71958761752f549
Author: abmdocrt <[email protected]>
AuthorDate: Mon Nov 6 22:58:05 2023 +0800
[branch-2.0](cherry-pick) Add more indexed column reader be unit test
#25652 (#26430)
---
be/src/olap/primary_key_index.h | 3 +
.../olap/rowset/segment_v2/indexed_column_writer.h | 3 +
be/test/olap/primary_key_index_test.cpp | 151 +++++++++++++++++++++
3 files changed, 157 insertions(+)
diff --git a/be/src/olap/primary_key_index.h b/be/src/olap/primary_key_index.h
index 65cc64f0cd6..911a17ea058 100644
--- a/be/src/olap/primary_key_index.h
+++ b/be/src/olap/primary_key_index.h
@@ -61,6 +61,9 @@ public:
uint64_t size() const { return _size; }
+ // used for be ut
+ uint32_t data_page_num() const { return
_primary_key_index_builder->data_page_num(); }
+
Slice min_key() { return Slice(_min_key.data(), _min_key.size() -
_seq_col_length); }
Slice max_key() { return Slice(_max_key.data(), _max_key.size() -
_seq_col_length); }
diff --git a/be/src/olap/rowset/segment_v2/indexed_column_writer.h
b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
index a95a9fce7f7..7cd1bc656e6 100644
--- a/be/src/olap/rowset/segment_v2/indexed_column_writer.h
+++ b/be/src/olap/rowset/segment_v2/indexed_column_writer.h
@@ -22,6 +22,7 @@
#include <stdint.h>
#include <cstddef>
+#include <cstdint>
#include <memory>
#include "common/status.h"
@@ -83,6 +84,8 @@ public:
Status finish(IndexedColumnMetaPB* meta);
+ uint32_t data_page_num() const { return _num_data_pages + 1; }
+
private:
Status _finish_current_data_page(size_t& num_val);
diff --git a/be/test/olap/primary_key_index_test.cpp
b/be/test/olap/primary_key_index_test.cpp
index d643ab501e8..64a49f010d9 100644
--- a/be/test/olap/primary_key_index_test.cpp
+++ b/be/test/olap/primary_key_index_test.cpp
@@ -167,4 +167,155 @@ TEST_F(PrimaryKeyIndexTest, builder) {
}
}
+TEST_F(PrimaryKeyIndexTest, multiple_pages) {
+ std::string filename = kTestDir + "/multiple_pages";
+ io::FileWriterPtr file_writer;
+ auto fs = io::global_local_filesystem();
+ EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
+
+ config::primary_key_data_page_size = 5 * 5;
+ PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
+ static_cast<void>(builder.init());
+ size_t num_rows = 0;
+ std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
+ "00010", "00012", "00014", "00016",
"00018"};
+ for (const std::string& key : keys) {
+ static_cast<void>(builder.add_item(key));
+ num_rows++;
+ }
+ EXPECT_EQ("00000", builder.min_key().to_string());
+ EXPECT_EQ("00018", builder.max_key().to_string());
+ EXPECT_EQ(builder.size(), 2 * 5 * 5);
+ EXPECT_GT(builder.data_page_num(), 1);
+ segment_v2::PrimaryKeyIndexMetaPB index_meta;
+ EXPECT_TRUE(builder.finalize(&index_meta));
+ EXPECT_TRUE(file_writer->close().ok());
+ EXPECT_EQ(num_rows, builder.num_rows());
+
+ PrimaryKeyIndexReader index_reader;
+ io::FileReaderSPtr file_reader;
+ EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
+ EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
+ EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
+ EXPECT_EQ(num_rows, index_reader.num_rows());
+
+ std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
+ EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
+ bool exact_match = false;
+ uint32_t row_id;
+ for (size_t i = 0; i < keys.size(); i++) {
+ bool exists = index_reader.check_present(keys[i]);
+ EXPECT_TRUE(exists);
+ auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_TRUE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i, row_id);
+ }
+ for (size_t i = 0; i < keys.size(); i++) {
+ bool exists = index_reader.check_present(keys[i]);
+ EXPECT_TRUE(exists);
+ auto status = index_iterator->seek_to_ordinal(i);
+ EXPECT_TRUE(status.ok());
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i, row_id);
+ }
+ {
+ auto status = index_iterator->seek_to_ordinal(10);
+ EXPECT_TRUE(status.ok());
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(10, row_id);
+ }
+
+ std::vector<std::string> non_exist_keys {"00001", "00003", "00005",
"00007", "00009",
+ "00011", "00013", "00015",
"00017"};
+ for (size_t i = 0; i < non_exist_keys.size(); i++) {
+ Slice slice(non_exist_keys[i]);
+ bool exists = index_reader.check_present(slice);
+ EXPECT_FALSE(exists);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_FALSE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i + 1, row_id);
+ }
+ {
+ string key("00019");
+ Slice slice(key);
+ bool exists = index_reader.check_present(slice);
+ EXPECT_FALSE(exists);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_FALSE(exact_match);
+ EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
+ }
+}
+
+TEST_F(PrimaryKeyIndexTest, single_page) {
+ std::string filename = kTestDir + "/single_page";
+ io::FileWriterPtr file_writer;
+ auto fs = io::global_local_filesystem();
+ EXPECT_TRUE(fs->create_file(filename, &file_writer).ok());
+ config::primary_key_data_page_size = 32768;
+
+ PrimaryKeyIndexBuilder builder(file_writer.get(), 0);
+ static_cast<void>(builder.init());
+ size_t num_rows = 0;
+ std::vector<std::string> keys {"00000", "00002", "00004", "00006", "00008",
+ "00010", "00012", "00014", "00016",
"00018"};
+ for (const std::string& key : keys) {
+ static_cast<void>(builder.add_item(key));
+ num_rows++;
+ }
+ EXPECT_EQ("00000", builder.min_key().to_string());
+ EXPECT_EQ("00018", builder.max_key().to_string());
+ EXPECT_EQ(builder.size(), 2 * 5 * 5);
+ EXPECT_EQ(builder.data_page_num(), 1);
+ segment_v2::PrimaryKeyIndexMetaPB index_meta;
+ EXPECT_TRUE(builder.finalize(&index_meta));
+ EXPECT_TRUE(file_writer->close().ok());
+ EXPECT_EQ(num_rows, builder.num_rows());
+
+ PrimaryKeyIndexReader index_reader;
+ io::FileReaderSPtr file_reader;
+ EXPECT_TRUE(fs->open_file(filename, &file_reader).ok());
+ EXPECT_TRUE(index_reader.parse_index(file_reader, index_meta).ok());
+ EXPECT_TRUE(index_reader.parse_bf(file_reader, index_meta).ok());
+ EXPECT_EQ(num_rows, index_reader.num_rows());
+
+ std::unique_ptr<segment_v2::IndexedColumnIterator> index_iterator;
+ EXPECT_TRUE(index_reader.new_iterator(&index_iterator).ok());
+ bool exact_match = false;
+ uint32_t row_id;
+ for (size_t i = 0; i < keys.size(); i++) {
+ bool exists = index_reader.check_present(keys[i]);
+ EXPECT_TRUE(exists);
+ auto status = index_iterator->seek_at_or_after(&keys[i], &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_TRUE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i, row_id);
+ }
+
+ std::vector<std::string> non_exist_keys {"00001", "00003", "00005",
"00007", "00009",
+ "00011", "00013", "00015",
"00017"};
+ for (size_t i = 0; i < non_exist_keys.size(); i++) {
+ Slice slice(non_exist_keys[i]);
+ bool exists = index_reader.check_present(slice);
+ EXPECT_FALSE(exists);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_TRUE(status.ok());
+ EXPECT_FALSE(exact_match);
+ row_id = index_iterator->get_current_ordinal();
+ EXPECT_EQ(i + 1, row_id);
+ }
+ {
+ string key("00019");
+ Slice slice(key);
+ bool exists = index_reader.check_present(slice);
+ EXPECT_FALSE(exists);
+ auto status = index_iterator->seek_at_or_after(&slice, &exact_match);
+ EXPECT_FALSE(exact_match);
+ EXPECT_TRUE(status.is<ErrorCode::ENTRY_NOT_FOUND>());
+ }
+}
} // namespace doris
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]