Copilot commented on code in PR #19419:
URL: https://github.com/apache/datafusion/pull/19419#discussion_r2639011084


##########
datafusion/physical-expr/src/projection.rs:
##########
@@ -587,6 +587,78 @@ impl ProjectionExprs {
             let expr = &proj_expr.expr;
             let col_stats = if let Some(col) = 
expr.as_any().downcast_ref::<Column>() {
                 std::mem::take(&mut stats.column_statistics[col.index()])
+            } else if let Some(literal) = 
expr.as_any().downcast_ref::<Literal>() {
+                let data_type = expr.data_type(output_schema)?;
+
+                if literal.value().is_null() {
+                    // For NULL literals (constant NULL columns), output 
proper statistics
+                    // This enables optimizations like constant column 
detection and sort elimination
+                    // For constant NULL columns:
+                    // - null_count = num_rows (all rows are NULL)
+                    // - distinct_count = 1 (all NULLs are considered the same)
+                    // - min_value/max_value = Absent (NULLs don't have 
min/max)
+                    // - byte_size = Absent (NULLs don't take space in most 
representations)
+                    let null_count = match stats.num_rows {
+                        Precision::Exact(num_rows) => 
Precision::Exact(num_rows),
+                        _ => Precision::Absent, // Can't determine null_count 
without exact row count
+                    };
+
+                    ColumnStatistics {
+                        min_value: Precision::Absent, // NULLs don't have 
min/max
+                        max_value: Precision::Absent,
+                        distinct_count: Precision::Exact(1), // All NULLs are 
considered the same
+                        null_count,
+                        sum_value: Precision::Absent, // Sum doesn't make 
sense for NULLs
+                        byte_size: Precision::Absent, // NULLs don't take space
+                    }
+                } else {
+                    // For constant columns (non-null literals), output proper 
statistics
+                    let value = literal.value();
+
+                    // For constant columns:
+                    // - min_value = max_value = the literal value
+                    // - distinct_count = 1
+                    // - null_count = 0
+                    // - byte_size = calculated from data type and num_rows
+                    let distinct_count = Precision::Exact(1);
+                    let null_count = Precision::Exact(0);
+
+                    // Calculate byte_size: for primitive types, use width * 
num_rows
+                    let byte_size = if let Some(byte_width) = 
data_type.primitive_width()
+                    {
+                        stats.num_rows.multiply(&Precision::Exact(byte_width))
+                    } else {
+                        // For complex types (Utf8, List, etc.), the byte_size 
when materialized
+                        // as an array depends on the array encoding and 
representation (e.g.,
+                        // dictionary encoding, string view arrays), so we 
conservatively set it to Absent
+                        Precision::Absent
+                    };
+
+                    // Calculate sum_value: for numeric types, sum = value * 
num_rows
+                    // This is useful for optimizations (e.g., cross joins 
multiply sum_value by row count)
+                    let sum_value = if !value.is_null() {

Review Comment:
   The null check on line 639 is redundant since this code is already in the 
`else` branch (line 614) that handles non-null literals. The outer condition at 
line 593 has already verified that `literal.value().is_null()` is false. This 
redundant check adds unnecessary complexity and could be removed to simplify 
the code logic.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to