amorynan commented on code in PR #32436:
URL: https://github.com/apache/doris/pull/32436#discussion_r1529997230
##########
be/src/olap/rowset/segment_v2/inverted_index_writer.cpp:
##########
@@ -369,39 +388,45 @@ class InvertedIndexColumnWriterImpl : public
InvertedIndexColumnWriter {
}
const auto* offsets = reinterpret_cast<const uint64_t*>(offsets_ptr);
if constexpr (field_is_slice_type(field_type)) {
- if (_field == nullptr || _index_writer == nullptr) {
- LOG(ERROR) << "field or index writer is null in inverted index
writer.";
- return Status::InternalError(
- "field or index writer is null in inverted index
writer");
+ if (_index_writer == nullptr) {
+ LOG(ERROR) << "index writer is null in inverted index writer.";
+ return Status::InternalError("index writer is null in inverted
index writer");
}
auto ignore_above_value =
get_parser_ignore_above_value_from_properties(_index_meta->properties());
auto ignore_above = std::stoi(ignore_above_value);
for (int i = 0; i < count; ++i) {
// offsets[i+1] is now row element count
- std::vector<std::string> strings;
// [0, 3, 6]
// [10,20,30] [20,30,40], [30,40,50]
auto start_off = offsets[i];
auto end_off = offsets[i + 1];
+ // TODO(Amory).later we use object pool to avoid field creation
+ lucene::document::Field* new_field = nullptr;
for (auto j = start_off; j < end_off; ++j) {
if (null_map[j] == 1) {
continue;
}
+ // now we temp create field . later make a pool
+ if (Status st = create_field(&new_field); st !=
Status::OK()) {
+ LOG(ERROR)
+ << "create field " <<
string(_field_name.begin(), _field_name.end())
+ << " error:" << st;
+ return st;
+ }
auto* v = (Slice*)((const uint8_t*)value_ptr + j *
field_size);
- strings.emplace_back(v->get_data(), v->get_size());
- }
-
- auto value = join(strings, " ");
- // only ignore_above UNTOKENIZED strings and empty strings not
tokenized
- if ((_parser_type == InvertedIndexParserType::PARSER_NONE &&
- value.length() > ignore_above) ||
- (_parser_type != InvertedIndexParserType::PARSER_NONE &&
value.empty())) {
- RETURN_IF_ERROR(add_null_document());
- } else {
- new_fulltext_field(value.c_str(), value.length());
- RETURN_IF_ERROR(add_document());
+ if ((_parser_type == InvertedIndexParserType::PARSER_NONE
&&
+ v->get_size() > ignore_above) ||
+ (_parser_type != InvertedIndexParserType::PARSER_NONE
&& v->empty())) {
+ // is here a null value?
+ RETURN_IF_ERROR(add_null_document());
+ } else {
+ new_fulltext_field(v->get_data(), v->get_size(),
new_field);
+ _doc->add(*new_field);
+ }
}
+ RETURN_IF_ERROR(add_document());
Review Comment:
so maybe we should not make this if branch?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]