Copilot commented on code in PR #56718:
URL: https://github.com/apache/doris/pull/56718#discussion_r2413915167
##########
be/src/vec/exprs/vsearch.cpp:
##########
@@ -48,39 +48,63 @@ Status collect_search_inputs(const VSearchExpr& expr,
VExprContext* context,
auto index_context = context->get_inverted_index_context();
if (index_context == nullptr) {
- return Status::OK();
+ LOG(WARNING) << "collect_search_inputs: No inverted index context
available";
+ return Status::InternalError("No inverted index context available");
}
+ // Get field bindings for variant subcolumn support
+ const auto& search_param = expr.get_search_param();
+ const auto& field_bindings = search_param.field_bindings;
+
+ int child_index = 0; // Index for iterating through children
for (const auto& child : expr.children()) {
if (child->is_slot_ref()) {
auto* column_slot_ref = assert_cast<VSlotRef*>(child.get());
int column_id = column_slot_ref->column_id();
auto* iterator =
index_context->get_inverted_index_iterator_by_column_id(column_id);
- if (iterator == nullptr) {
- continue;
+
+ // Determine the field_name from field_bindings (for variant
subcolumns)
+ // field_bindings and children should have the same order
+ std::string field_name;
+ if (child_index < field_bindings.size()) {
+ // Use field_name from binding (may include "parent.subcolumn"
for variant)
+ field_name = field_bindings[child_index].field_name;
+ } else {
+ // Fallback to column_name if binding not found
+ field_name = column_slot_ref->column_name();
}
- const auto* storage_name_type =
-
index_context->get_storage_name_and_type_by_column_id(column_id);
- if (storage_name_type == nullptr) {
- auto err_msg = fmt::format(
- "storage_name_type cannot be found for column {} while
in {} evaluate",
- column_id, expr.expr_name());
- LOG(ERROR) << err_msg;
- return Status::InternalError(err_msg);
+ // Only collect fields that have iterators (materialized columns
with indexes)
+ if (iterator != nullptr) {
+ const auto* storage_name_type =
+
index_context->get_storage_name_and_type_by_column_id(column_id);
+ if (storage_name_type == nullptr) {
+ return Status::InternalError("storage_name_type not found
for column {} in {}",
+ column_id, expr.expr_name());
+ }
+
+ bundle->iterators.emplace(field_name, iterator);
+ bundle->field_types.emplace(field_name, *storage_name_type);
+ bundle->column_ids.emplace_back(column_id);
}
- auto column_name = column_slot_ref->column_name();
- bundle->iterators.emplace(column_name, iterator);
- bundle->field_types.emplace(column_name, *storage_name_type);
- bundle->column_ids.emplace_back(column_id);
+ child_index++;
} else if (child->is_literal()) {
auto* literal = assert_cast<VLiteral*>(child.get());
bundle->literal_args.emplace_back(literal->get_column_ptr(),
literal->get_data_type(),
literal->expr_name());
} else {
- LOG(WARNING) << "VSearchExpr: Unsupported child node type
encountered";
- return Status::InvalidArgument("search expression child type
unsupported");
+ // Check if this is ElementAt expression (for variant subcolumn
access)
+ if (child->expr_name() == "element_at" && child_index <
field_bindings.size() &&
+ field_bindings[child_index].__isset.is_variant_subcolumn &&
+ field_bindings[child_index].is_variant_subcolumn) {
Review Comment:
The hardcoded string 'element_at' should be defined as a constant to avoid
magic strings and improve maintainability.
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java:
##########
@@ -233,18 +233,32 @@ public QsNode
visitAtomClause(SearchParser.AtomClauseContext ctx) {
@Override
public QsNode visitFieldQuery(SearchParser.FieldQueryContext ctx) {
- if (ctx.fieldName() == null) {
- throw new RuntimeException("Invalid field query: missing field
name");
+ if (ctx.fieldPath() == null) {
+ throw new RuntimeException("Invalid field query: missing field
path");
Review Comment:
The error message should be more descriptive and specify what constitutes a
valid field path.
```suggestion
throw new RuntimeException("Invalid field query: missing
field path. A valid field path consists of one or more segments (e.g., field,
field.subfield), where each segment is an identifier or a quoted string if it
contains special characters.");
```
##########
be/src/vec/functions/function_search.cpp:
##########
@@ -171,27 +192,31 @@ Status
FunctionSearch::evaluate_inverted_index_with_search_param(
data_type_with_names,
std::unordered_map<std::string, IndexIterator*> iterators, uint32_t
num_rows,
InvertedIndexResultBitmap& bitmap_result) const {
- VLOG_DEBUG << "search: Processing DSL '" << search_param.original_dsl <<
"' with "
- << data_type_with_names.size() << " indexed columns and " <<
iterators.size()
- << " iterators";
-
if (iterators.empty() || data_type_with_names.empty()) {
- LOG(INFO) << "No indexed columns or iterators available, returning
empty result";
+ LOG(INFO) << "No indexed columns or iterators available, returning
empty result, dsl:"
+ << search_param.original_dsl;
+ bitmap_result =
InvertedIndexResultBitmap(std::make_shared<roaring::Roaring>(),
+
std::make_shared<roaring::Roaring>());
Review Comment:
The creation of an empty InvertedIndexResultBitmap is duplicated in multiple
places. Consider extracting this into a helper function to reduce code
duplication.
##########
fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/RewriteSearchToSlots.java:
##########
@@ -127,6 +169,7 @@ private Expression rewriteSearch(Search search,
LogicalOlapScan scan) {
}
private Slot findSlotByName(String fieldName, LogicalOlapScan scan) {
+ // Direct match only - variant subcolumns are handled by caller
Review Comment:
The comment should explain why variant subcolumns are handled by the caller
and provide a brief explanation of the handling mechanism.
```suggestion
/*
* This method performs a direct match for slot names only and does
not attempt to resolve variant subcolumns.
* Variant subcolumns (e.g., fields within complex or nested types)
require special parsing and resolution logic,
* which is handled by the caller prior to invoking this method. The
caller is responsible for extracting the
* appropriate subcolumn name from the field path and ensuring that
the correct slot or sub-slot is passed here
* for direct matching. This separation keeps this method simple and
focused on direct slot lookup.
*/
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]