This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 266daf8b3e doc: Add comments to clarify algorithm for `MarkJoin`s 
(#16436)
266daf8b3e is described below

commit 266daf8b3ecaa489400dd0abe46af86a33b80367
Author: Jonathan Chen <chenleejonat...@gmail.com>
AuthorDate: Thu Jun 19 11:17:56 2025 -0400

    doc: Add comments to clarify algorithm for `MarkJoin`s (#16436)
    
    * doc: Add doc to clarify algorithm for `MarkJoin`s
    
    * fix: fmt
    
    * fix: NullEquality merge
---
 datafusion/physical-plan/src/joins/symmetric_hash_join.rs | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs 
b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
index d540b6d2a3..6dbe75cc0a 100644
--- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
+++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs
@@ -810,6 +810,21 @@ where
 {
     // Store the result in a tuple
     let result = match (build_side, join_type) {
+        // For a mark join we “mark” each build‐side row with a dummy 0 in the 
probe‐side index
+        // if it ever matched. For example, if
+        //
+        // prune_length = 5
+        // deleted_offset = 0
+        // visited_rows = {1, 3}
+        //
+        // then we produce:
+        //
+        // build_indices = [0, 1, 2, 3, 4]
+        // probe_indices = [None, Some(0), None, Some(0), None]
+        //
+        // Example: for each build row i in [0..5):
+        //   – We always output its own index i in `build_indices`
+        //   – We output `Some(0)` in `probe_indices[i]` if row i was ever 
visited, else `None`
         (JoinSide::Left, JoinType::LeftMark) => {
             let build_indices = (0..prune_length)
                 .map(L::Native::from_usize)


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to