This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 9fa7500bbc Fix internal error "Physical input schema should be the
same as the one converted from logical input schema." (#18412)
9fa7500bbc is described below
commit 9fa7500bbcc310bbd8664ffe063a0e17b49977c4
Author: Andrew Lamb <[email protected]>
AuthorDate: Sat Jan 10 22:30:58 2026 -0500
Fix internal error "Physical input schema should be the same as the one
converted from logical input schema." (#18412)
## Which issue does this PR close?
- Closes https://github.com/apache/datafusion/issues/18337
## Rationale for this change
It is a bug we are seeing in our production related to a schema mismatch
## What changes are included in this PR?
1. New slt test for the issue
2. Properly compute the output field from window functions
## Are these changes tested?
Yes
## Are there any user-facing changes?
---
datafusion/expr/src/expr_schema.rs | 35 +++++++++--------------
datafusion/sqllogictest/test_files/metadata.slt | 38 +++++++++++++++++++++++++
2 files changed, 51 insertions(+), 22 deletions(-)
diff --git a/datafusion/expr/src/expr_schema.rs
b/datafusion/expr/src/expr_schema.rs
index dbba0f2914..854e907d68 100644
--- a/datafusion/expr/src/expr_schema.rs
+++ b/datafusion/expr/src/expr_schema.rs
@@ -25,7 +25,7 @@ use crate::type_coercion::functions::fields_with_udf;
use crate::udf::ReturnFieldArgs;
use crate::{LogicalPlan, Projection, Subquery, WindowFunctionDefinition,
utils};
use arrow::compute::can_cast_types;
-use arrow::datatypes::{DataType, Field};
+use arrow::datatypes::{DataType, Field, FieldRef};
use datafusion_common::datatype::FieldExt;
use datafusion_common::metadata::FieldMetadata;
use datafusion_common::{
@@ -156,9 +156,10 @@ impl ExprSchemable for Expr {
let return_type = self.to_field(schema)?.1.data_type().clone();
Ok(return_type)
}
- Expr::WindowFunction(window_function) => self
- .data_type_and_nullable_with_window_function(schema,
window_function)
- .map(|(return_type, _)| return_type),
+ Expr::WindowFunction(window_function) => Ok(self
+ .window_function_field(schema, window_function)?
+ .data_type()
+ .clone()),
Expr::AggregateFunction(AggregateFunction {
func,
params: AggregateFunctionParams { args, .. },
@@ -357,12 +358,9 @@ impl ExprSchemable for Expr {
Expr::AggregateFunction(AggregateFunction { func, .. }) => {
Ok(func.is_nullable())
}
- Expr::WindowFunction(window_function) => self
- .data_type_and_nullable_with_window_function(
- input_schema,
- window_function,
- )
- .map(|(_, nullable)| nullable),
+ Expr::WindowFunction(window_function) => Ok(self
+ .window_function_field(input_schema, window_function)?
+ .is_nullable()),
Expr::ScalarVariable(field, _) => Ok(field.is_nullable()),
Expr::TryCast { .. } | Expr::Unnest(_) | Expr::Placeholder(_) =>
Ok(true),
Expr::IsNull(_)
@@ -458,7 +456,7 @@ impl ExprSchemable for Expr {
/// with the default implementation returning empty field metadata
/// - **Aggregate functions**: Generate metadata via function's
[`return_field`] method,
/// with the default implementation returning empty field metadata
- /// - **Window functions**: field metadata is empty
+ /// - **Window functions**: field metadata follows the function's return
field
///
/// ## Table Reference Scoping
/// - Establishes proper qualified field references when columns belong to
specific tables
@@ -534,11 +532,7 @@ impl ExprSchemable for Expr {
)))
}
Expr::WindowFunction(window_function) => {
- let (dt, nullable) =
self.data_type_and_nullable_with_window_function(
- schema,
- window_function,
- )?;
- Ok(Arc::new(Field::new(&schema_name, dt, nullable)))
+ self.window_function_field(schema, window_function)
}
Expr::AggregateFunction(aggregate_function) => {
let AggregateFunction {
@@ -698,11 +692,11 @@ impl Expr {
///
/// Otherwise, returns an error if there's a type mismatch between
/// the window function's signature and the provided arguments.
- fn data_type_and_nullable_with_window_function(
+ fn window_function_field(
&self,
schema: &dyn ExprSchema,
window_function: &WindowFunction,
- ) -> Result<(DataType, bool)> {
+ ) -> Result<FieldRef> {
let WindowFunction {
fun,
params: WindowFunctionParams { args, .. },
@@ -738,9 +732,7 @@ impl Expr {
.into_iter()
.collect::<Vec<_>>();
- let return_field = udaf.return_field(&new_fields)?;
-
- Ok((return_field.data_type().clone(),
return_field.is_nullable()))
+ udaf.return_field(&new_fields)
}
WindowFunctionDefinition::WindowUDF(udwf) => {
let data_types = fields
@@ -769,7 +761,6 @@ impl Expr {
let field_args = WindowUDFFieldArgs::new(&new_fields,
&function_name);
udwf.field(field_args)
- .map(|field| (field.data_type().clone(),
field.is_nullable()))
}
}
}
diff --git a/datafusion/sqllogictest/test_files/metadata.slt
b/datafusion/sqllogictest/test_files/metadata.slt
index 41a511b5fa..6ed461debb 100644
--- a/datafusion/sqllogictest/test_files/metadata.slt
+++ b/datafusion/sqllogictest/test_files/metadata.slt
@@ -24,6 +24,22 @@
## in the test harness as there is no way to define schema
## with metadata in SQL.
+query ITTPT
+select * from table_with_metadata;
+----
+1 NULL NULL 2020-09-08T13:42:29.190855123 no_foo
+NULL bar l_bar 2020-09-08T13:42:29.190855123 no_bar
+3 baz l_baz 2020-09-08T13:42:29.190855123 no_baz
+
+query TTT
+describe table_with_metadata;
+----
+id Int32 YES
+name Utf8 YES
+l_name Utf8 YES
+ts Timestamp(ns) NO
+nonnull_name Utf8 NO
+
query IT
select id, name from table_with_metadata;
----
@@ -235,6 +251,28 @@ order by 1 asc nulls last;
3 1
NULL 1
+# Reproducer for https://github.com/apache/datafusion/issues/18337
+# this query should not get an internal error
+query TI
+SELECT
+ 'foo' AS name,
+ COUNT(
+ CASE
+ WHEN prev_value = 'no_bar' AND value = 'no_baz' THEN 1
+ ELSE NULL
+ END
+ ) AS count_rises
+FROM
+ (
+ SELECT
+ nonnull_name as value,
+ LAG(nonnull_name) OVER (ORDER BY ts) AS prev_value
+ FROM
+ table_with_metadata
+);
+----
+foo 1
+
# Regression test: first_value should preserve metadata
query IT
select first_value(id order by id asc nulls last),
arrow_metadata(first_value(id order by id asc nulls last), 'metadata_key')
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]