HappenLee commented on code in PR #59591:
URL: https://github.com/apache/doris/pull/59591#discussion_r2792079160


##########
be/src/pipeline/exec/join/process_hash_table_probe_impl.h:
##########
@@ -208,6 +220,233 @@ typename HashTableType::State 
ProcessHashTableProbe<JoinOpType>::_init_probe_sid
     return typename HashTableType::State(_parent->_probe_columns);
 }
 
+// ASOF JOIN optimized: O(log K) binary search per probe row
+// Key design: execute ASOF expression directly to get column data, no column 
name matching
+template <int JoinOpType>
+template <typename HashTableType>
+uint32_t ProcessHashTableProbe<JoinOpType>::
+        _find_batch_asof_optimized( // 
NOLINT(readability-function-cognitive-complexity)
+                HashTableType& hash_table_ctx, const uint8_t* null_map, 
uint32_t probe_rows) {
+    auto* shared_state = _parent->_shared_state;
+    constexpr bool is_outer_join = (JoinOpType == 
TJoinOp::ASOF_LEFT_OUTER_JOIN);
+    auto& probe_index = _parent->_probe_index;
+
+    // Empty build table handling
+    if (!shared_state->asof_index_ready) {
+        if constexpr (is_outer_join) {
+            uint32_t matched_cnt = 0;
+            for (; probe_index < probe_rows && matched_cnt < _batch_size; 
++probe_index) {
+                _probe_indexs.get_element(matched_cnt) = probe_index;
+                _build_indexs.get_element(matched_cnt) = 0;
+                matched_cnt++;
+            }
+            return matched_cnt;
+        }
+        probe_index = probe_rows;
+        return 0;
+    }
+
+    // Get ASOF expression
+    auto& conjuncts = _parent->_other_join_conjuncts;
+    if (conjuncts.empty() || !conjuncts[0] || !conjuncts[0]->root() ||
+        conjuncts[0]->root()->get_num_children() != 2) {
+        probe_index = probe_rows;
+        return 0;
+    }
+
+    // Execute probe side expression once per probe block (probe_index==0 
means new block).
+    // VExpr::execute appends a column each call, so caching avoids redundant 
computation
+    // and column accumulation when processing the same block in multiple 
batches.
+    auto& probe_block = _parent->_probe_block;
+    if (probe_index == 0) {
+        _asof_probe_col_cache.reset();
+        auto left_child = conjuncts[0]->root()->get_child(0);
+        if (left_child->is_slot_ref()) {

Review Comment:
   why the slot_ref need dispose different like other expr



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to