This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e913557fe9 Fix: MemTable LIMIT ignored with reordered projections 
(#21177)
e913557fe9 is described below

commit e913557fe9e08e26978acc258cf670821d7b6d8b
Author: Ramakrishna Chilaka 
<[email protected]>
AuthorDate: Fri Mar 27 22:27:45 2026 +0530

    Fix: MemTable LIMIT ignored with reordered projections (#21177)
    
    ## Which issue does this PR close?
    
    - Closes #21176
    
    ## Rationale for this change
    
    When `SELECT` projects columns in a different order than the table
    schema (e.g., `SELECT col_c, col_b` vs schema order `col_a, col_b,
    col_c`), the `LIMIT` clause is silently ignored — all rows are returned
    instead of the requested limit.
    
    The physical optimizer runs `LimitPushdown` (which correctly sets
    `fetch=N` on `MemorySourceConfig`) **before** `ProjectionPushdown`. When
    the projection is pushed down, `try_swapping_with_projection()` creates
    a new `MemorySourceConfig` via `try_new()`, which resets `fetch` to
    `None`, silently dropping the limit.
    
    ## What changes are included in this PR?
    
    One-line fix in `MemorySourceConfig::try_swapping_with_projection` to
    preserve `self.fetch` on the newly created config:
    
    ```rust
    // Before:
    .map(|s| Arc::new(s) as Arc<dyn DataSource>)
    // After:
    .map(|s| Arc::new(s.with_limit(self.fetch)) as Arc<dyn DataSource>)
    ```
    
    ## Are these changes tested?
    
    Yes.
    
    - **Unit test** (`try_swapping_with_projection_preserves_fetch`):
    directly verifies `fetch` is preserved after projection pushdown.
    Confirmed it fails without the fix (`None` vs `Some(5)`).
    - **SQL logic test** (`limit.slt`, table `t21176`): end-to-end
    regression test for `SELECT col_c, col_b FROM t LIMIT 5` with reverse
    column order.
    
    ## Are there any user-facing changes?
    
    No API changes. This is a correctness fix — `LIMIT` now works correctly
    regardless of `SELECT` column order against in-memory tables.
---
 datafusion/datasource/src/memory.rs          | 59 ++++++++++++++++++++++++----
 datafusion/sqllogictest/test_files/limit.slt | 43 ++++++++++++++++++++
 2 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/datafusion/datasource/src/memory.rs 
b/datafusion/datasource/src/memory.rs
index 04fca9a1e2..7115c22080 100644
--- a/datafusion/datasource/src/memory.rs
+++ b/datafusion/datasource/src/memory.rs
@@ -243,13 +243,22 @@ impl DataSource for MemorySourceConfig {
                     &exprs,
                     self.projection().as_ref().unwrap_or(&all_projections),
                 );
-
-                MemorySourceConfig::try_new(
-                    self.partitions(),
-                    self.original_schema(),
-                    Some(new_projections),
-                )
-                .map(|s| Arc::new(s) as Arc<dyn DataSource>)
+                let projected_schema =
+                    project_schema(&self.schema, Some(&new_projections));
+
+                projected_schema.map(|projected_schema| {
+                    // Clone self to preserve all metadata (fetch, 
sort_information,
+                    // show_sizes, etc.) then update only the 
projection-related fields.
+                    let mut new_source = self.clone();
+                    new_source.projection = Some(new_projections);
+                    new_source.projected_schema = projected_schema;
+                    // Project sort information to match the new projection
+                    new_source.sort_information = project_orderings(
+                        &new_source.sort_information,
+                        &new_source.projected_schema,
+                    );
+                    Arc::new(new_source) as Arc<dyn DataSource>
+                })
             })
             .transpose()
     }
@@ -897,6 +906,42 @@ mod tests {
         Ok(())
     }
 
+    /// Test that `try_swapping_with_projection` preserves the `fetch` limit.
+    /// Regression test for <https://github.com/apache/datafusion/issues/21176>
+    #[test]
+    fn try_swapping_with_projection_preserves_fetch() {
+        use datafusion_physical_expr::projection::ProjectionExprs;
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("a", DataType::Int32, false),
+            Field::new("b", DataType::Utf8, false),
+            Field::new("c", DataType::Int64, false),
+        ]));
+        let partitions: Vec<Vec<RecordBatch>> = vec![vec![batch(10)]];
+        let source = MemorySourceConfig::try_new(&partitions, schema.clone(), 
None)
+            .unwrap()
+            .with_limit(Some(5));
+
+        assert_eq!(source.fetch, Some(5));
+
+        // Create a projection that reorders columns: [c, a] (indices 2, 0)
+        let projection = ProjectionExprs::from_indices(&[2, 0], &schema);
+        let swapped = source
+            .try_swapping_with_projection(&projection)
+            .unwrap()
+            .unwrap();
+        let new_source = swapped
+            .as_any()
+            .downcast_ref::<MemorySourceConfig>()
+            .unwrap();
+
+        assert_eq!(
+            new_source.fetch,
+            Some(5),
+            "fetch limit must be preserved after projection pushdown"
+        );
+    }
+
     #[tokio::test]
     async fn values_empty_case() -> Result<()> {
         let schema = aggr_test_schema();
diff --git a/datafusion/sqllogictest/test_files/limit.slt 
b/datafusion/sqllogictest/test_files/limit.slt
index 7ed701418e..6faf80c8c1 100644
--- a/datafusion/sqllogictest/test_files/limit.slt
+++ b/datafusion/sqllogictest/test_files/limit.slt
@@ -927,3 +927,46 @@ DROP TABLE t;
 # Tear down src_table table:
 statement ok
 DROP TABLE src_table;
+
+# LIMIT must work when SELECT projects columns in different order than table 
schema
+
+statement ok
+CREATE TABLE t21176 (col_a TEXT, col_b DOUBLE, col_c TEXT) AS VALUES
+  ('a-0', 0, 'c-0'), ('a-1', 1, 'c-1'), ('a-2', 2, 'c-2'), ('a-3', 3, 'c-3'),
+  ('a-4', 4, 'c-4'), ('a-5', 5, 'c-5'), ('a-6', 6, 'c-6'), ('a-7', 7, 'c-7'),
+  ('a-8', 8, 'c-8'), ('a-9', 9, 'c-9'), ('a-10', 10, 'c-10'), ('a-11', 11, 
'c-11'),
+  ('a-12', 12, 'c-12'), ('a-13', 13, 'c-13'), ('a-14', 14, 'c-14'), ('a-15', 
15, 'c-15'),
+  ('a-16', 16, 'c-16'), ('a-17', 17, 'c-17'), ('a-18', 18, 'c-18'), ('a-19', 
19, 'c-19');
+
+# Schema-order SELECT with LIMIT should return 5 rows
+query RT rowsort
+SELECT col_b, col_c FROM t21176 LIMIT 5;
+----
+0 c-0
+1 c-1
+2 c-2
+3 c-3
+4 c-4
+
+# Reverse-order SELECT with LIMIT should also return 5 rows (not 20)
+query TR rowsort
+SELECT col_c, col_b FROM t21176 LIMIT 5;
+----
+c-0 0
+c-1 1
+c-2 2
+c-3 3
+c-4 4
+
+# Single column reverse SELECT with LIMIT
+query T rowsort
+SELECT col_c FROM t21176 LIMIT 5;
+----
+c-0
+c-1
+c-2
+c-3
+c-4
+
+statement ok
+DROP TABLE t21176;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to