xiaokang commented on code in PR #35891:
URL: https://github.com/apache/doris/pull/35891#discussion_r1641863670
##########
gensrc/thrift/AgentService.thrift:
##########
@@ -174,7 +165,7 @@ struct TCreateTabletReq {
25: optional i64 time_series_compaction_time_threshold_seconds = 3600
26: optional i64 time_series_compaction_empty_rowsets_threshold = 5
27: optional i64 time_series_compaction_level_threshold = 1
- 28: optional TInvertedIndexStorageFormat inverted_index_storage_format =
TInvertedIndexStorageFormat.V1
+ 28: optional Types.TInvertedIndexStorageFormat
inverted_index_storage_format = TInvertedIndexStorageFormat.V1
Review Comment:
Does it cause compatibility problem?
##########
gensrc/proto/internal_service.proto:
##########
@@ -908,6 +913,7 @@ message PStreamHeader {
repeated PTabletID tablets = 10;
optional TabletSchemaPB flush_schema = 11;
optional uint64 offset = 12;
+ optional SinkFileType file_type = 13;
Review Comment:
Adding a OpCode may be enough.
##########
be/src/util/thrift_util.cpp:
##########
@@ -167,7 +167,11 @@ bool _has_inverted_index_or_partial_update(TOlapTableSink
sink) {
for (const auto& index_schema : schema.indexes()) {
for (const auto& index : index_schema->indexes) {
if (index->index_type() == INVERTED) {
- return true;
+ if (sink.schema.inverted_index_storage_format ==
TInvertedIndexStorageFormat::V1) {
+ return true;
+ } else {
+ return false;
Review Comment:
The return value is not consistent to function name. You should change the
function name.
##########
be/src/olap/rowset/segment_creator.h:
##########
@@ -61,6 +65,11 @@ class FileWriterCreatorT : public FileWriterCreator {
return _t->create_file_writer(segment_id, file_writer);
}
+ Status create_inverted_index_v2_file(uint32_t segment_id,
Review Comment:
You can add a argument type to create() and pass it to create_file_writer()
to avoid adding a similar function create_inverted_index_v2_file.
##########
be/src/runtime/load_stream.cpp:
##########
@@ -136,15 +136,25 @@ Status TabletStream::append_data(const PStreamHeader&
header, butil::IOBuf* data
// Each sender sends data in one segment sequential, so we also do not
// need a lock here.
bool eos = header.segment_eos();
+ SinkFileType file_type = header.file_type();
Review Comment:
We should consider upgrade compatibility.
##########
be/src/olap/rowset/vertical_beta_rowset_writer.cpp:
##########
@@ -179,22 +179,38 @@ Status
VerticalBetaRowsetWriter<T>::_create_segment_writer(
this->_context.file_cache_ttl_sec
: 0};
- auto path = context.segment_path(seg_id);
+ auto segment_path = context.segment_path(seg_id);
auto& fs = context.fs_ref();
- Status st = fs.create_file(path, &file_writer, &opts);
+ Status st = fs.create_file(segment_path, &segment_file_writer, &opts);
if (!st.ok()) {
- LOG(WARNING) << "failed to create writable file. path=" << path << ",
err: " << st;
+ LOG(WARNING) << "failed to create writable file. segment_path=" <<
segment_path
+ << ", err: " << st;
return st;
}
- DCHECK(file_writer != nullptr);
+ io::FileWriterPtr inverted_file_v2_writer;
+ if (context.tablet_schema->has_inverted_index() &&
+ context.tablet_schema->get_inverted_index_storage_format() ==
+ InvertedIndexStorageFormatPB::V2) {
+ auto path_prefix =
InvertedIndexDescriptor::get_index_path_prefix(segment_path);
+ auto idx_path =
InvertedIndexDescriptor::get_index_path_v2(path_prefix);
+ Status st = fs.create_file(idx_path, &inverted_file_v2_writer);
+ if (!st.ok()) {
+ LOG(WARNING) << "failed to create inverted idx file. idx_path=" <<
idx_path
+ << ", err: " << st;
+ return st;
+ }
+ }
+
+ DCHECK(segment_file_writer != nullptr);
segment_v2::SegmentWriterOptions writer_options;
writer_options.enable_unique_key_merge_on_write =
context.enable_unique_key_merge_on_write;
writer_options.rowset_ctx = &context;
*writer = std::make_unique<segment_v2::SegmentWriter>(
- file_writer.get(), seg_id, context.tablet_schema, context.tablet,
context.data_dir,
- context.max_rows_per_segment, writer_options, nullptr);
- RETURN_IF_ERROR(this->_seg_files.add(seg_id, std::move(file_writer)));
+ segment_file_writer.get(), seg_id, context.tablet_schema,
context.tablet,
+ context.data_dir, context.max_rows_per_segment, writer_options,
nullptr,
+ std::move(inverted_file_v2_writer));
Review Comment:
Just like segment_creater. BTW, is the code duplicate?
##########
be/src/runtime/load_stream_writer.cpp:
##########
@@ -130,6 +130,41 @@ Status LoadStreamWriter::append_data(uint32_t segid,
uint64_t offset, butil::IOB
return file_writer->append(buf.to_string());
}
+Status LoadStreamWriter::append_inverted_index_data(uint32_t segid, uint64_t
offset,
Review Comment:
append_inverted_index_data() can be merged to append_data() by adding a type
argument and use a std::vector<io::FileWriterPtr>* to reference
_segment_file_writers or _inverted_file_v2_writers based on type.
##########
be/src/olap/rowset/beta_rowset_writer_v2.cpp:
##########
@@ -74,13 +74,25 @@ Status BetaRowsetWriterV2::create_file_writer(uint32_t
segment_id, io::FileWrite
auto index_id = _context.index_id;
auto tablet_id = _context.tablet_id;
auto load_id = _context.load_id;
-
auto stream_writer = std::make_unique<io::StreamSinkFileWriter>(_streams);
stream_writer->init(load_id, partition_id, index_id, tablet_id,
segment_id);
file_writer = std::move(stream_writer);
return Status::OK();
}
+Status BetaRowsetWriterV2::create_inverted_index_v2_file_writer(uint32_t
segment_id,
Review Comment:
You can add a argument type to create_file_writer() to avoid adding a
similar function create_inverted_index_v2_file_writer and duplicate code.
##########
be/src/olap/rowset/segment_creator.cpp:
##########
@@ -173,9 +190,10 @@ Status SegmentFlusher::_create_segment_writer(
}
writer = std::make_unique<segment_v2::VerticalSegmentWriter>(
- file_writer.get(), segment_id, _context.tablet_schema,
_context.tablet,
- _context.data_dir, _context.max_rows_per_segment, writer_options,
_context.mow_context);
- RETURN_IF_ERROR(_seg_files.add(segment_id, std::move(file_writer)));
+ segment_file_writer.get(), segment_id, _context.tablet_schema,
_context.tablet,
+ _context.data_dir, _context.max_rows_per_segment, writer_options,
_context.mow_context,
+ std::move(inverted_file_v2_writer));
Review Comment:
You may need to do the same things for inverted_file_v2_writer as
segment_file_writer. Just use inverted_file_v2_writer.get() here and add
std::move(inverted_file_v2_writer) to _seg_files.
##########
be/src/io/fs/stream_sink_file_writer.h:
##########
@@ -69,6 +69,7 @@ class StreamSinkFileWriter final : public FileWriter {
int32_t _segment_id;
size_t _bytes_appended = 0;
State _state {State::OPENED};
+ SinkFileType _file_type = SinkFileType::SEGMENT_FILE;
Review Comment:
SinkFileType _file_type {SinkFileType::SEGMENT_FILE};
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]