Mryange commented on code in PR #59591:
URL: https://github.com/apache/doris/pull/59591#discussion_r2781227204


##########
be/src/pipeline/exec/join/process_hash_table_probe_impl.h:
##########
@@ -208,6 +223,342 @@ typename HashTableType::State 
ProcessHashTableProbe<JoinOpType>::_init_probe_sid
     return typename HashTableType::State(_parent->_probe_columns);
 }
 
+// ASOF JOIN optimized: O(log K) binary search per probe row
+// Key design: execute ASOF expression directly to get column data, no column 
name matching
+template <int JoinOpType>
+template <typename HashTableType>
+uint32_t ProcessHashTableProbe<JoinOpType>::_find_batch_asof_optimized(
+        HashTableType& hash_table_ctx, const uint8_t* null_map, uint32_t 
probe_rows) {
+    auto* shared_state = _parent->_shared_state;
+    constexpr bool is_outer_join = (JoinOpType == 
TJoinOp::ASOF_LEFT_OUTER_JOIN ||
+                                    JoinOpType == 
TJoinOp::ASOF_RIGHT_OUTER_JOIN);
+    auto& probe_index = _parent->_probe_index;
+
+    // Empty build table handling
+    if (!shared_state->asof_index_ready) {
+        if constexpr (is_outer_join) {
+            uint32_t matched_cnt = 0;
+            for (; probe_index < probe_rows && matched_cnt < _batch_size; 
++probe_index) {
+                _probe_indexs.get_element(matched_cnt) = probe_index;
+                _build_indexs.get_element(matched_cnt) = 0;
+                matched_cnt++;
+            }
+            return matched_cnt;
+        }
+        probe_index = probe_rows;
+        return 0;
+    }
+
+    // Get ASOF expression
+    auto& conjuncts = _parent->_other_join_conjuncts;
+    if (conjuncts.empty() || !conjuncts[0] || !conjuncts[0]->root() ||
+        conjuncts[0]->root()->get_num_children() != 2) {
+        probe_index = probe_rows;
+        return 0;
+    }
+
+    // Execute probe side expression to get probe ASOF column
+    // Use a temporary block for expression execution to avoid modifying 
original probe_block
+    int probe_col_idx = -1;
+    auto& probe_block = _parent->_probe_block;
+    vectorized::ColumnPtr probe_col_ptr;
+
+    auto left_child = conjuncts[0]->root()->get_child(0);
+    if (left_child->is_slot_ref()) {
+        // Simple column reference: directly get from probe_block
+        auto* slot_ref = static_cast<vectorized::VSlotRef*>(left_child.get());
+        int col_id = slot_ref->column_id();
+        if (col_id >= 0 && col_id < static_cast<int>(probe_block.columns())) {
+            probe_col_ptr =
+                    
probe_block.get_by_position(col_id).column->convert_to_full_column_if_const();
+        }
+    } else {
+        // Expression: execute on a cloned block to avoid modifying original
+        vectorized::Block tmp_probe_block(probe_block);

Review Comment:
   用新的expr接口,这个的block是const的,直接传probe_block就行,slot ref的判断也可以删除
       virtual Status execute_column(VExprContext* context, const Block* block, 
Selector* selector,
                                     size_t count, ColumnPtr& result_column) 
const = 0;
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to