This is an automated email from the ASF dual-hosted git repository.
morrysnow pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.1 by this push:
new 9ef1b5d8638 branch-3.1: [fix](inverted index) create empty idx file
when creating a index on variant-type column #53814 (#54148)
9ef1b5d8638 is described below
commit 9ef1b5d86381bf0f6082db8e36ef461693ef5077
Author: Sun Chenyang <[email protected]>
AuthorDate: Fri Aug 1 14:47:34 2025 +0800
branch-3.1: [fix](inverted index) create empty idx file when creating a
index on variant-type column #53814 (#54148)
pick from master #53814
---
.../segment_v2/inverted_index_file_writer.cpp | 4 +++-
be/test/io/fs/s3_file_writer_test.cpp | 25 +++++++++++++++++++++
.../test_variant_empty_index_file.out | Bin 116 -> 113 bytes
.../test_variant_empty_index_file.groovy | 15 ++++++-------
4 files changed, 35 insertions(+), 9 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
index e4be9bf64cb..febe923a700 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp
@@ -24,6 +24,7 @@
#include "common/exception.h"
#include "common/status.h"
+#include "io/fs/s3_file_writer.h"
#include "io/fs/stream_sink_file_writer.h"
#include "olap/rowset/segment_v2/inverted_index_compound_reader.h"
#include "olap/rowset/segment_v2/inverted_index_desc.h"
@@ -185,7 +186,8 @@ Status InvertedIndexFileWriter::close() {
_closed = true;
if (_indices_dirs.empty()) {
// An empty file must still be created even if there are no indexes to
write
- if (dynamic_cast<io::StreamSinkFileWriter*>(_idx_v2_writer.get()) !=
nullptr) {
+ if (dynamic_cast<io::StreamSinkFileWriter*>(_idx_v2_writer.get()) !=
nullptr ||
+ dynamic_cast<io::S3FileWriter*>(_idx_v2_writer.get()) != nullptr) {
return _idx_v2_writer->close();
}
return Status::OK();
diff --git a/be/test/io/fs/s3_file_writer_test.cpp
b/be/test/io/fs/s3_file_writer_test.cpp
index 0662565ec6a..755c78b0cbb 100644
--- a/be/test/io/fs/s3_file_writer_test.cpp
+++ b/be/test/io/fs/s3_file_writer_test.cpp
@@ -57,6 +57,7 @@
#include "io/fs/s3_file_system.h"
#include "io/fs/s3_obj_storage_client.h"
#include "io/io_common.h"
+#include "olap/rowset/segment_v2/inverted_index_file_writer.h"
#include "runtime/exec_env.h"
#include "util/slice.h"
#include "util/threadpool.h"
@@ -1470,4 +1471,28 @@ TEST_F(S3FileWriterTest, write_buffer_boundary) {
// clang-format on
}
+TEST_F(S3FileWriterTest, test_empty_file) {
+ std::vector<StorePath> paths;
+ paths.emplace_back(std::string("tmp_dir"), 1024000000);
+ auto tmp_file_dirs = std::make_unique<segment_v2::TmpFileDirs>(paths);
+ EXPECT_TRUE(tmp_file_dirs->init().ok());
+ ExecEnv::GetInstance()->set_tmp_file_dir(std::move(tmp_file_dirs));
+ doris::io::FileWriterOptions opts;
+ io::FileWriterPtr file_writer;
+ auto st = s3_fs->create_file("test_empty_file.idx", &file_writer, &opts);
+ EXPECT_TRUE(st.ok()) << st;
+ auto holder = std::make_shared<ObjClientHolder>(S3ClientConf {});
+ auto mock_client = std::make_shared<SimpleMockObjStorageClient>();
+ holder->_client = mock_client;
+ dynamic_cast<io::S3FileWriter*>(file_writer.get())->_obj_client = holder;
+ auto fs = io::global_local_filesystem();
+ std::string index_path = "/tmp/empty_index_file_test";
+ std::string rowset_id = "1234567890";
+ int64_t seg_id = 1234567890;
+ auto index_file_writer =
std::make_unique<segment_v2::InvertedIndexFileWriter>(
+ fs, index_path, rowset_id, seg_id,
InvertedIndexStorageFormatPB::V2,
+ std::move(file_writer), false);
+ EXPECT_TRUE(index_file_writer->close().ok());
+}
+
} // namespace doris
diff --git
a/regression-test/data/inverted_index_p0/test_variant_empty_index_file.out
b/regression-test/data/inverted_index_p0/test_variant_empty_index_file.out
index e6e8da3fd6c..d06eb4ef1e3 100644
Binary files
a/regression-test/data/inverted_index_p0/test_variant_empty_index_file.out and
b/regression-test/data/inverted_index_p0/test_variant_empty_index_file.out
differ
diff --git
a/regression-test/suites/inverted_index_p0/test_variant_empty_index_file.groovy
b/regression-test/suites/inverted_index_p0/test_variant_empty_index_file.groovy
index dc98eed4802..50fc8fe2826 100644
---
a/regression-test/suites/inverted_index_p0/test_variant_empty_index_file.groovy
+++
b/regression-test/suites/inverted_index_p0/test_variant_empty_index_file.groovy
@@ -35,8 +35,9 @@ suite("test_variant_empty_index_file", "p0") {
"""
sql """ set enable_memtable_on_sink_node = true """
- sql """ insert into ${tableName} values (1, 'abcd') """
-
+ sql """ insert into ${tableName} values (1, NULL) """
+ qt_sql9 "select * from ${tableName}"
+ sql "sync"
def tablets = sql_return_maparray """ show tablets from ${tableName}; """
def backendId_to_backendIP = [:]
@@ -47,12 +48,10 @@ suite("test_variant_empty_index_file", "p0") {
String backend_id = tablets[0].BackendId
String ip = backendId_to_backendIP.get(backend_id)
String port = backendId_to_backendHttpPort.get(backend_id)
- if (!isCloudMode()) {
- def (code, out, err) = http_client("GET",
String.format("http://%s:%s/api/show_nested_index_file?tablet_id=%s", ip, port,
tablet_id))
- logger.info("Run show_nested_index_file_on_tablet: code=" + code + ",
out=" + out + ", err=" + err)
- assertEquals("E-6004", parseJson(out.trim()).status)
- assertTrue(out.contains(" is empty"))
- }
+ def (code, out, err) = http_client("GET",
String.format("http://%s:%s/api/show_nested_index_file?tablet_id=%s", ip, port,
tablet_id))
+ logger.info("Run show_nested_index_file_on_tablet: code=" + code + ",
out=" + out + ", err=" + err)
+ assertEquals("E-6004", parseJson(out.trim()).status)
+ assertTrue(out.contains(" is empty"))
try {
sql """ select /*+ SET_VAR(enable_match_without_inverted_index = 0) */
* from ${tableName} where v match 'abcd'; """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]