Re: [PR] fix: SortPreservingMerge sanity check rejects valid ORDER BY with CASE expression [datafusion]

via GitHub Wed, 29 Oct 2025 12:14:04 -0700


alamb commented on code in PR #18342:
URL: https://github.com/apache/datafusion/pull/18342#discussion_r2474852321



##########
datafusion/core/tests/physical_optimizer/sanity_checker.rs:
##########
@@ -379,6 +379,32 @@ async fn test_analyzer() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_order_by_case_requirement() -> Result<()> {
+    let sql = r#"
+    WITH typ(oid, typnamespace, typname, typtype) AS (
+                      SELECT * FROM (VALUES (1, 10, 't1', 'b'))
+            UNION ALL SELECT * FROM (VALUES (2, NULL, 't2', 'b'))
+            UNION ALL SELECT * FROM (VALUES (3, 12, 't3', NULL))
+         )
+       , ns(oid, nspname) AS (VALUES (1, 'ns1'), (2, 'ns2'))
+
+    SELECT ns.nspname, typ.oid, typ.typname, typ.typtype
+      FROM typ JOIN ns ON (ns.oid = typ.typnamespace)
+     WHERE typ.typtype IN ('b','r','m','e','d')
+     ORDER BY CASE WHEN typ.typtype IN ('b','e','p') THEN 0
+                   WHEN typ.typtype = 'r' THEN 1
+              END
+    "#;
+
+    let ctx = SessionContext::new();
+    let df = ctx.sql(sql).await?;
+    let plan = df.create_physical_plan().await;
+
+    assert!(plan.is_ok(), "planning failed: {:?}", plan.err());

Review Comment:
   I verified this test fails with an assert without the code in this PR
   
   ```
   ---- physical_optimizer::sanity_checker::test_order_by_case_requirement 
stdout ----
   
   thread 'physical_optimizer::sanity_checker::test_order_by_case_requirement' 
panicked at datafusion/core/tests/physical_optimizer/sanity_checker.rs:404:5:
   planning failed: Some(Context("SanityCheckPlan", Plan("Plan: 
[\"SortPreservingMergeExec: [CASE WHEN typtype@3 = b OR typtype@3 = e OR 
typtype@3 = p THEN 0 WHEN typtype@3 = r THEN 1 END ASC NULLS LAST]\", \"  
SortExec: expr=[CASE WHEN typtype@3 = b OR typtype@3 = e OR typtype@3 = p THEN 
0 WHEN typtype@3 = r THEN 1 END ASC NULLS LAST], 
preserve_partitioning=[true]\", \"    CoalesceBatchesExec: 
target_batch_size=8192\", \"      HashJoinExec: mode=CollectLeft, 
join_type=Inner, on=[(oid@0, typnamespace@1)], projection=[nspname@1, oid@2, 
typname@4, typtype@5]\", \"        ProjectionExec: expr=[column1@0 as oid, 
column2@1 as nspname]\", \"          DataSourceExec: partitions=1, 
partition_sizes=[1]\", \"        ProjectionExec: expr=[column1@0 as oid, 
column2@1 as typnamespace, column3@2 as typname, column4@3 as typtype]\", \"    
      UnionExec\", \"            CoalesceBatchesExec: target_batch_size=8192\", 
\"              FilterExec: column4@3 IN (SET) ([b, r, m, e, d])\", \"          
  
     DataSourceExec: partitions=1, partition_sizes=[1]\", \"            
ProjectionExec: expr=[column1@0 as column1, CAST(column2@1 AS Int64) as 
column2, column3@2 as column3, column4@3 as column4]\", \"              
RepartitionExec: partitioning=RoundRobinBatch(16), input_partitions=1\", \"     
           CoalesceBatchesExec: target_batch_size=8192\", \"                  
FilterExec: column4@3 IN (SET) ([b, r, m, e, d])\", \"                    
DataSourceExec: partitions=1, partition_sizes=[1]\", \"            
ProjectionExec: expr=[column1@0 as column1, column2@1 as column2, column3@2 as 
column3, CAST(column4@3 AS Utf8) as column4]\", \"              
RepartitionExec: partitioning=RoundRobinBatch(16), input_partitions=1\", \"     
           CoalesceBatchesExec: target_batch_size=8192\", \"                  
FilterExec: CAST(column4@3 AS Utf8) IN (SET) ([b, r, m, e, d])\", \"            
        DataSourceExec: partitions=1, partition_sizes=[1]\"] does not satisfy 
order requirements: [CAS
 E WHEN typtype@3 = b OR typtype@3 = e OR typtype@3 = p THEN 0 WHEN typtype@3 = 
r THEN 1 END ASC NULLS LAST]. Child-0 order: [[CASE WHEN typtype@3 = b OR 
typtype@3 = e OR typtype@3 = p THEN 0 WHEN typtype@3 = r THEN 1 END ASC NULLS 
LAST]]")))
   note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
   
   
   ```



##########
datafusion/core/tests/physical_optimizer/sanity_checker.rs:
##########
@@ -379,6 +379,32 @@ async fn test_analyzer() -> Result<()> {
     Ok(())
 }
 
+#[tokio::test]
+async fn test_order_by_case_requirement() -> Result<()> {
+    let sql = r#"
+    WITH typ(oid, typnamespace, typname, typtype) AS (
+                      SELECT * FROM (VALUES (1, 10, 't1', 'b'))
+            UNION ALL SELECT * FROM (VALUES (2, NULL, 't2', 'b'))
+            UNION ALL SELECT * FROM (VALUES (3, 12, 't3', NULL))

Review Comment:
   added in dffb0c2a7



##########
datafusion/physical-expr/src/equivalence/properties/mod.rs:
##########
@@ -1470,3 +1470,40 @@ fn get_expr_properties(
         expr.get_properties(&child_states)
     }
 }
+
+#[cfg(test)]
+mod properties_tests {
+    use super::*;
+    use crate::expressions::{binary, case, col, lit};
+    use arrow::compute::SortOptions;
+    use arrow::datatypes::{DataType, Field, Schema};
+    use datafusion_common::Result;
+    use datafusion_expr::Operator;
+    use datafusion_physical_expr_common::sort_expr::PhysicalSortExpr;
+    use std::sync::Arc;
+
+    #[test]
+    fn test_case_expr_ordering_from_raw_oeq_class() -> Result<()> {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]));
+
+        // CASE WHEN a = 1 THEN a -> should be ordered
+        let mut eq_properties = 
EquivalenceProperties::new(Arc::clone(&schema));
+        let when = binary(col("a", &schema)?, Operator::Eq, lit(1i32), 
&schema)?;
+        let then = col("a", &schema)?;
+        let case_expr = case(None, vec![(when, then)], None)?;
+
+        let option = SortOptions::default();

Review Comment:
   This test seems unrelated to the fix in the PR and passes even when I 
removed the code change
   
   Can you explain why you included it?
   
   I also found it somewhat strange it is an entirely new `test` module



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] fix: SortPreservingMerge sanity check rejects valid ORDER BY with CASE expression [datafusion]

Reply via email to