This is an automated email from the ASF dual-hosted git repository.

github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 9fa7500bbc Fix internal error "Physical input schema should be the 
same as the one converted from logical input schema." (#18412)
9fa7500bbc is described below

commit 9fa7500bbcc310bbd8664ffe063a0e17b49977c4
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Jan 10 22:30:58 2026 -0500

    Fix internal error "Physical input schema should be the same as the one 
converted from logical input schema." (#18412)
    
    ## Which issue does this PR close?
    
    
    - Closes https://github.com/apache/datafusion/issues/18337
    
    ## Rationale for this change
    It is a bug we are seeing in our production related to a schema mismatch
    
    ## What changes are included in this PR?
    
    1. New slt test for the issue
    2. Properly compute the output field from window functions
    
    ## Are these changes tested?
    
    Yes
    
    ## Are there any user-facing changes?
---
 datafusion/expr/src/expr_schema.rs              | 35 +++++++++--------------
 datafusion/sqllogictest/test_files/metadata.slt | 38 +++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 22 deletions(-)

diff --git a/datafusion/expr/src/expr_schema.rs 
b/datafusion/expr/src/expr_schema.rs
index dbba0f2914..854e907d68 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -25,7 +25,7 @@ use crate::type_coercion::functions::fields_with_udf;
 use crate::udf::ReturnFieldArgs;
 use crate::{LogicalPlan, Projection, Subquery, WindowFunctionDefinition, 
utils};
 use arrow::compute::can_cast_types;
-use arrow::datatypes::{DataType, Field};
+use arrow::datatypes::{DataType, Field, FieldRef};
 use datafusion_common::datatype::FieldExt;
 use datafusion_common::metadata::FieldMetadata;
 use datafusion_common::{
@@ -156,9 +156,10 @@ impl ExprSchemable for Expr {
                 let return_type = self.to_field(schema)?.1.data_type().clone();
                 Ok(return_type)
             }
-            Expr::WindowFunction(window_function) => self
-                .data_type_and_nullable_with_window_function(schema, 
window_function)
-                .map(|(return_type, _)| return_type),
+            Expr::WindowFunction(window_function) => Ok(self
+                .window_function_field(schema, window_function)?
+                .data_type()
+                .clone()),
             Expr::AggregateFunction(AggregateFunction {
                 func,
                 params: AggregateFunctionParams { args, .. },
@@ -357,12 +358,9 @@ impl ExprSchemable for Expr {
             Expr::AggregateFunction(AggregateFunction { func, .. }) => {
                 Ok(func.is_nullable())
             }
-            Expr::WindowFunction(window_function) => self
-                .data_type_and_nullable_with_window_function(
-                    input_schema,
-                    window_function,
-                )
-                .map(|(_, nullable)| nullable),
+            Expr::WindowFunction(window_function) => Ok(self
+                .window_function_field(input_schema, window_function)?
+                .is_nullable()),
             Expr::ScalarVariable(field, _) => Ok(field.is_nullable()),
             Expr::TryCast { .. } | Expr::Unnest(_) | Expr::Placeholder(_) => 
Ok(true),
             Expr::IsNull(_)
@@ -458,7 +456,7 @@ impl ExprSchemable for Expr {
     ///   with the default implementation returning empty field metadata
     /// - **Aggregate functions**: Generate metadata via function's 
[`return_field`] method,
     ///   with the default implementation returning empty field metadata
-    /// - **Window functions**: field metadata is empty
+    /// - **Window functions**: field metadata follows the function's return 
field
     ///
     /// ## Table Reference Scoping
     /// - Establishes proper qualified field references when columns belong to 
specific tables
@@ -534,11 +532,7 @@ impl ExprSchemable for Expr {
                 )))
             }
             Expr::WindowFunction(window_function) => {
-                let (dt, nullable) = 
self.data_type_and_nullable_with_window_function(
-                    schema,
-                    window_function,
-                )?;
-                Ok(Arc::new(Field::new(&schema_name, dt, nullable)))
+                self.window_function_field(schema, window_function)
             }
             Expr::AggregateFunction(aggregate_function) => {
                 let AggregateFunction {
@@ -698,11 +692,11 @@ impl Expr {
     ///
     /// Otherwise, returns an error if there's a type mismatch between
     /// the window function's signature and the provided arguments.
-    fn data_type_and_nullable_with_window_function(
+    fn window_function_field(
         &self,
         schema: &dyn ExprSchema,
         window_function: &WindowFunction,
-    ) -> Result<(DataType, bool)> {
+    ) -> Result<FieldRef> {
         let WindowFunction {
             fun,
             params: WindowFunctionParams { args, .. },
@@ -738,9 +732,7 @@ impl Expr {
                     .into_iter()
                     .collect::<Vec<_>>();
 
-                let return_field = udaf.return_field(&new_fields)?;
-
-                Ok((return_field.data_type().clone(), 
return_field.is_nullable()))
+                udaf.return_field(&new_fields)
             }
             WindowFunctionDefinition::WindowUDF(udwf) => {
                 let data_types = fields
@@ -769,7 +761,6 @@ impl Expr {
                 let field_args = WindowUDFFieldArgs::new(&new_fields, 
&function_name);
 
                 udwf.field(field_args)
-                    .map(|field| (field.data_type().clone(), 
field.is_nullable()))
             }
         }
     }
diff --git a/datafusion/sqllogictest/test_files/metadata.slt 
b/datafusion/sqllogictest/test_files/metadata.slt
index 41a511b5fa..6ed461debb 100644
--- a/datafusion/sqllogictest/test_files/metadata.slt
+++ b/datafusion/sqllogictest/test_files/metadata.slt
@@ -24,6 +24,22 @@
 ## in the test harness as there is no way to define schema
 ## with metadata in SQL.
 
+query ITTPT
+select * from table_with_metadata;
+----
+1 NULL NULL 2020-09-08T13:42:29.190855123 no_foo
+NULL bar l_bar 2020-09-08T13:42:29.190855123 no_bar
+3 baz l_baz 2020-09-08T13:42:29.190855123 no_baz
+
+query TTT
+describe table_with_metadata;
+----
+id Int32 YES
+name Utf8 YES
+l_name Utf8 YES
+ts Timestamp(ns) NO
+nonnull_name Utf8 NO
+
 query IT
 select id, name from table_with_metadata;
 ----
@@ -235,6 +251,28 @@ order by 1 asc nulls last;
 3 1
 NULL 1
 
+# Reproducer for https://github.com/apache/datafusion/issues/18337
+# this query should not get an internal error
+query TI
+SELECT
+  'foo' AS name,
+  COUNT(
+    CASE
+      WHEN prev_value = 'no_bar' AND value = 'no_baz' THEN 1
+      ELSE NULL
+      END
+     ) AS count_rises
+FROM
+  (
+    SELECT
+      nonnull_name as value,
+      LAG(nonnull_name) OVER (ORDER BY ts) AS prev_value
+    FROM
+      table_with_metadata
+);
+----
+foo 1
+
 # Regression test: first_value should preserve metadata
 query IT
 select first_value(id order by id asc nulls last), 
arrow_metadata(first_value(id order by id asc nulls last), 'metadata_key')


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to