HappenLee commented on code in PR #59591:
URL: https://github.com/apache/doris/pull/59591#discussion_r2792106309
##########
be/src/pipeline/exec/join/process_hash_table_probe_impl.h:
##########
@@ -208,6 +220,233 @@ typename HashTableType::State
ProcessHashTableProbe<JoinOpType>::_init_probe_sid
return typename HashTableType::State(_parent->_probe_columns);
}
+// ASOF JOIN optimized: O(log K) binary search per probe row
+// Key design: execute ASOF expression directly to get column data, no column
name matching
+template <int JoinOpType>
+template <typename HashTableType>
+uint32_t ProcessHashTableProbe<JoinOpType>::
+ _find_batch_asof_optimized( //
NOLINT(readability-function-cognitive-complexity)
+ HashTableType& hash_table_ctx, const uint8_t* null_map,
uint32_t probe_rows) {
+ auto* shared_state = _parent->_shared_state;
+ constexpr bool is_outer_join = (JoinOpType ==
TJoinOp::ASOF_LEFT_OUTER_JOIN);
+ auto& probe_index = _parent->_probe_index;
+
+ // Empty build table handling
+ if (!shared_state->asof_index_ready) {
+ if constexpr (is_outer_join) {
+ uint32_t matched_cnt = 0;
+ for (; probe_index < probe_rows && matched_cnt < _batch_size;
++probe_index) {
+ _probe_indexs.get_element(matched_cnt) = probe_index;
+ _build_indexs.get_element(matched_cnt) = 0;
+ matched_cnt++;
+ }
+ return matched_cnt;
+ }
+ probe_index = probe_rows;
+ return 0;
+ }
+
+ // Get ASOF expression
+ auto& conjuncts = _parent->_other_join_conjuncts;
+ if (conjuncts.empty() || !conjuncts[0] || !conjuncts[0]->root() ||
+ conjuncts[0]->root()->get_num_children() != 2) {
+ probe_index = probe_rows;
+ return 0;
+ }
+
+ // Execute probe side expression once per probe block (probe_index==0
means new block).
+ // VExpr::execute appends a column each call, so caching avoids redundant
computation
+ // and column accumulation when processing the same block in multiple
batches.
+ auto& probe_block = _parent->_probe_block;
+ if (probe_index == 0) {
+ _asof_probe_col_cache.reset();
+ auto left_child = conjuncts[0]->root()->get_child(0);
+ if (left_child->is_slot_ref()) {
+ auto* slot_ref =
static_cast<vectorized::VSlotRef*>(left_child.get());
+ int col_id = slot_ref->column_id();
+ if (col_id >= 0 && col_id <
static_cast<int>(probe_block.columns())) {
+ _asof_probe_col_cache = probe_block.get_by_position(col_id)
+
.column->convert_to_full_column_if_const();
+ }
+ } else {
+ int probe_col_idx = -1;
+ auto status = left_child->execute(conjuncts[0].get(),
&probe_block, &probe_col_idx);
+ if (status.ok() && probe_col_idx >= 0 &&
+ probe_col_idx < static_cast<int>(probe_block.columns())) {
+ _asof_probe_col_cache =
probe_block.get_by_position(probe_col_idx)
+
.column->convert_to_full_column_if_const();
+ }
+ }
+ }
+ auto probe_col_ptr = _asof_probe_col_cache;
+
+ if (!probe_col_ptr) {
+ probe_index = probe_rows;
+ return 0;
+ }
+ // Remove nullable wrapper for comparison - keep original for null check
+ vectorized::ColumnPtr probe_col_for_compare = probe_col_ptr;
+ if (probe_col_ptr->is_nullable()) {
+ probe_col_for_compare = assert_cast<const
vectorized::ColumnNullable*>(probe_col_ptr.get())
+ ->get_nested_column_ptr();
+ }
+ const auto* probe_col = probe_col_for_compare.get();
+
+ // Get build ASOF column by matching column name from expression label
+ auto* build_block = shared_state->build_block.get();
+ if (!build_block || build_block->rows() <= 1) {
Review Comment:
why not do it in line 235 ? same work
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]