Copilot commented on code in PR #19419:
URL: https://github.com/apache/datafusion/pull/19419#discussion_r2639011084
##########
datafusion/physical-expr/src/projection.rs:
##########
@@ -587,6 +587,78 @@ impl ProjectionExprs {
let expr = &proj_expr.expr;
let col_stats = if let Some(col) =
expr.as_any().downcast_ref::<Column>() {
std::mem::take(&mut stats.column_statistics[col.index()])
+ } else if let Some(literal) =
expr.as_any().downcast_ref::<Literal>() {
+ let data_type = expr.data_type(output_schema)?;
+
+ if literal.value().is_null() {
+ // For NULL literals (constant NULL columns), output
proper statistics
+ // This enables optimizations like constant column
detection and sort elimination
+ // For constant NULL columns:
+ // - null_count = num_rows (all rows are NULL)
+ // - distinct_count = 1 (all NULLs are considered the same)
+ // - min_value/max_value = Absent (NULLs don't have
min/max)
+ // - byte_size = Absent (NULLs don't take space in most
representations)
+ let null_count = match stats.num_rows {
+ Precision::Exact(num_rows) =>
Precision::Exact(num_rows),
+ _ => Precision::Absent, // Can't determine null_count
without exact row count
+ };
+
+ ColumnStatistics {
+ min_value: Precision::Absent, // NULLs don't have
min/max
+ max_value: Precision::Absent,
+ distinct_count: Precision::Exact(1), // All NULLs are
considered the same
+ null_count,
+ sum_value: Precision::Absent, // Sum doesn't make
sense for NULLs
+ byte_size: Precision::Absent, // NULLs don't take space
+ }
+ } else {
+ // For constant columns (non-null literals), output proper
statistics
+ let value = literal.value();
+
+ // For constant columns:
+ // - min_value = max_value = the literal value
+ // - distinct_count = 1
+ // - null_count = 0
+ // - byte_size = calculated from data type and num_rows
+ let distinct_count = Precision::Exact(1);
+ let null_count = Precision::Exact(0);
+
+ // Calculate byte_size: for primitive types, use width *
num_rows
+ let byte_size = if let Some(byte_width) =
data_type.primitive_width()
+ {
+ stats.num_rows.multiply(&Precision::Exact(byte_width))
+ } else {
+ // For complex types (Utf8, List, etc.), the byte_size
when materialized
+ // as an array depends on the array encoding and
representation (e.g.,
+ // dictionary encoding, string view arrays), so we
conservatively set it to Absent
+ Precision::Absent
+ };
+
+ // Calculate sum_value: for numeric types, sum = value *
num_rows
+ // This is useful for optimizations (e.g., cross joins
multiply sum_value by row count)
+ let sum_value = if !value.is_null() {
Review Comment:
The null check on line 639 is redundant since this code is already in the
`else` branch (line 614) that handles non-null literals. The outer condition at
line 593 has already verified that `literal.value().is_null()` is false. This
redundant check adds unnecessary complexity and could be removed to simplify
the code logic.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]