zclllyybb commented on code in PR #59591:
URL: https://github.com/apache/doris/pull/59591#discussion_r2780453399
##########
be/src/pipeline/exec/join/process_hash_table_probe_impl.h:
##########
@@ -208,6 +223,342 @@ typename HashTableType::State
ProcessHashTableProbe<JoinOpType>::_init_probe_sid
return typename HashTableType::State(_parent->_probe_columns);
}
+// ASOF JOIN optimized: O(log K) binary search per probe row
+// Key design: execute ASOF expression directly to get column data, no column
name matching
+template <int JoinOpType>
+template <typename HashTableType>
+uint32_t ProcessHashTableProbe<JoinOpType>::_find_batch_asof_optimized(
+ HashTableType& hash_table_ctx, const uint8_t* null_map, uint32_t
probe_rows) {
+ auto* shared_state = _parent->_shared_state;
+ constexpr bool is_outer_join = (JoinOpType ==
TJoinOp::ASOF_LEFT_OUTER_JOIN ||
+ JoinOpType ==
TJoinOp::ASOF_RIGHT_OUTER_JOIN);
+ auto& probe_index = _parent->_probe_index;
+
+ // Empty build table handling
+ if (!shared_state->asof_index_ready) {
+ if constexpr (is_outer_join) {
+ uint32_t matched_cnt = 0;
+ for (; probe_index < probe_rows && matched_cnt < _batch_size;
++probe_index) {
+ _probe_indexs.get_element(matched_cnt) = probe_index;
+ _build_indexs.get_element(matched_cnt) = 0;
+ matched_cnt++;
+ }
+ return matched_cnt;
+ }
+ probe_index = probe_rows;
+ return 0;
+ }
+
+ // Get ASOF expression
+ auto& conjuncts = _parent->_other_join_conjuncts;
+ if (conjuncts.empty() || !conjuncts[0] || !conjuncts[0]->root() ||
+ conjuncts[0]->root()->get_num_children() != 2) {
+ probe_index = probe_rows;
+ return 0;
+ }
+
+ // Execute probe side expression to get probe ASOF column
+ // Use a temporary block for expression execution to avoid modifying
original probe_block
+ int probe_col_idx = -1;
+ auto& probe_block = _parent->_probe_block;
+ vectorized::ColumnPtr probe_col_ptr;
+
+ auto left_child = conjuncts[0]->root()->get_child(0);
+ if (left_child->is_slot_ref()) {
+ // Simple column reference: directly get from probe_block
+ auto* slot_ref = static_cast<vectorized::VSlotRef*>(left_child.get());
+ int col_id = slot_ref->column_id();
+ if (col_id >= 0 && col_id < static_cast<int>(probe_block.columns())) {
+ probe_col_ptr =
+
probe_block.get_by_position(col_id).column->convert_to_full_column_if_const();
+ }
+ } else {
+ // Expression: execute on a cloned block to avoid modifying original
+ vectorized::Block tmp_probe_block(probe_block);
+ auto status = left_child->execute(conjuncts[0].get(),
&tmp_probe_block, &probe_col_idx);
+ if (status.ok() && probe_col_idx >= 0 &&
+ probe_col_idx < static_cast<int>(tmp_probe_block.columns())) {
+ probe_col_ptr = tmp_probe_block.get_by_position(probe_col_idx)
+ .column->convert_to_full_column_if_const();
+ }
+ }
+
+ if (!probe_col_ptr) {
+ probe_index = probe_rows;
+ return 0;
+ }
+ // Remove nullable wrapper for comparison - keep original for null check
+ vectorized::ColumnPtr probe_col_for_compare = probe_col_ptr;
+ if (probe_col_ptr->is_nullable()) {
+ probe_col_for_compare = assert_cast<const
vectorized::ColumnNullable*>(probe_col_ptr.get())
+ ->get_nested_column_ptr();
+ }
+ const auto* probe_col = probe_col_for_compare.get();
+
+ // Get build ASOF column by matching column name from expression label
+ auto* build_block = shared_state->build_block.get();
+ if (!build_block || build_block->rows() <= 1) {
+ if constexpr (is_outer_join) {
+ uint32_t matched_cnt = 0;
+ for (; probe_index < probe_rows && matched_cnt < _batch_size;
++probe_index) {
+ _probe_indexs.get_element(matched_cnt) = probe_index;
+ _build_indexs.get_element(matched_cnt) = 0;
+ matched_cnt++;
+ }
+ return matched_cnt;
+ }
+ probe_index = probe_rows;
+ return 0;
+ }
+
+ // One-time init: compute build ASOF column and sort bucket entries.
+ // std::call_once ensures thread-safe init; after first call, zero
overhead (atomic flag check).
+ std::call_once(shared_state->asof_init_once, [&]() {
Review Comment:
build side
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]