kumarUjjawal commented on code in PR #19468:
URL: https://github.com/apache/datafusion/pull/19468#discussion_r2651269259


##########
datafusion/physical-plan/src/joins/nested_loop_join.rs:
##########
@@ -550,17 +550,34 @@ impl ExecutionPlan for NestedLoopJoinExec {
     }
 
     fn partition_statistics(&self, partition: Option<usize>) -> 
Result<Statistics> {
-        if partition.is_some() {
-            return Ok(Statistics::new_unknown(&self.schema()));
-        }
+        // NestedLoopJoinExec is designed for joins without equijoin keys in 
the
+        // ON clause (e.g., `t1 JOIN t2 ON (t1.v1 + t2.v1) % 2 = 0`). Any join
+        // predicates are stored in `self.filter`, but 
`estimate_join_statistics`
+        // currently doesn't support selectivity estimation for such arbitrary
+        // filter expressions. We pass an empty join column list, which means
+        // the cardinality estimation cannot use column statistics and returns
+        // unknown row counts.
         let join_columns = Vec::new();
-        estimate_join_statistics(
-            self.left.partition_statistics(None)?,
-            self.right.partition_statistics(None)?,
+
+        // Left side is always a single partition 
(Distribution::SinglePartition),
+        // so we always request overall stats with `None`. Right side can have
+        // multiple partitions, so we forward the partition parameter to get
+        // partition-specific statistics when requested.
+        let left_stats = self.left.partition_statistics(None)?;
+        let right_stats = match partition {
+            Some(partition) => 
self.right.partition_statistics(Some(partition))?,
+            None => self.right.partition_statistics(None)?,

Review Comment:
   Yes this test covers that
   
   
https://github.com/apache/datafusion/pull/19468/changes/BASE..68bcbd6f2ad8e8b59e1508022067631ed3414c4b#diff-984f04e8c9c9fcd35a4c1b89e446506c813be1fe41b2716533bf4f41c1157339R640



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to