Jefffrey commented on code in PR #19611:
URL: https://github.com/apache/datafusion/pull/19611#discussion_r2657844779


##########
datafusion/functions-aggregate/src/percentile_cont.rs:
##########
@@ -234,53 +176,26 @@ impl AggregateUDFImpl for PercentileCont {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        if !arg_types[0].is_numeric() {
-            return plan_err!("percentile_cont requires numeric input types");
-        }
-        // PERCENTILE_CONT performs linear interpolation and should return a 
float type
-        // For integer inputs, return Float64 (matching PostgreSQL/DuckDB 
behavior)
-        // For float inputs, preserve the float type
         match &arg_types[0] {
-            DataType::Float16 | DataType::Float32 | DataType::Float64 => {
-                Ok(arg_types[0].clone())
-            }
-            DataType::Decimal32(_, _)
-            | DataType::Decimal64(_, _)
-            | DataType::Decimal128(_, _)
-            | DataType::Decimal256(_, _) => Ok(arg_types[0].clone()),
-            DataType::UInt8
-            | DataType::UInt16
-            | DataType::UInt32
-            | DataType::UInt64
-            | DataType::Int8
-            | DataType::Int16
-            | DataType::Int32
-            | DataType::Int64 => Ok(DataType::Float64),
-            // Shouldn't happen due to signature check, but just in case
-            dt => plan_err!(
-                "percentile_cont does not support input type {}, must be 
numeric",
-                dt
-            ),
+            DataType::Null => Ok(DataType::Float64),

Review Comment:
   Null types are a little annoying to handle, see 
https://github.com/apache/datafusion/issues/19458



##########
datafusion/functions-aggregate/src/percentile_cont.rs:
##########
@@ -297,76 +212,71 @@ impl AggregateUDFImpl for PercentileCont {
         ])
     }
 
-    fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn 
Accumulator>> {
-        self.create_accumulator(&acc_args)
+    fn accumulator(&self, args: AccumulatorArgs) -> Result<Box<dyn 
Accumulator>> {
+        let percentile = get_percentile(&args)?;
+
+        let input_dt = args.expr_fields[0].data_type();
+        if input_dt.is_null() {
+            return 
Ok(Box::new(NoopAccumulator::new(ScalarValue::Float64(None))));
+        }
+
+        if args.is_distinct {
+            match input_dt {
+                DataType::Float16 => 
Ok(Box::new(DistinctPercentileContAccumulator::<
+                    Float16Type,
+                >::new(percentile))),
+                DataType::Float32 => 
Ok(Box::new(DistinctPercentileContAccumulator::<
+                    Float32Type,
+                >::new(percentile))),
+                DataType::Float64 => 
Ok(Box::new(DistinctPercentileContAccumulator::<
+                    Float64Type,

Review Comment:
   Much more clear what the internal types we operate on are now



##########
datafusion/functions-aggregate/src/percentile_cont.rs:
##########
@@ -511,31 +423,22 @@ impl<T: ArrowNumericType> Accumulator for 
PercentileContAccumulator<T> {
     }
 
     fn update_batch(&mut self, values: &[ArrayRef]) -> Result<()> {
-        // Cast to target type if needed (e.g., integer to Float64)
-        let values = if values[0].data_type() != &self.data_type {

Review Comment:
   These are the internal casts we're removing; now input arguments should 
already be of the right types via type coercion



##########
datafusion/functions-aggregate/src/percentile_cont.rs:
##########
@@ -454,37 +375,29 @@ fn simplify_percentile_cont_aggregate(
     Ok(rewritten)
 }
 
-fn extract_percentile_literal(expr: &Expr) -> Option<f64> {
-    match expr {
-        Expr::Literal(ScalarValue::Float64(Some(value)), _) => Some(*value),
-        _ => None,
-    }
-}
-
 /// The percentile_cont accumulator accumulates the raw input values
 /// as native types.
 ///
 /// The intermediate state is represented as a List of scalar values updated by
 /// `merge_batch` and a `Vec` of native values that are converted to scalar 
values
 /// in the final evaluation step so that we avoid expensive conversions and
 /// allocations during `update_batch`.
-struct PercentileContAccumulator<T: ArrowNumericType> {
-    data_type: DataType,

Review Comment:
   Removing `data_type` from all accumulators; this was only needed if `T` was 
ever a decimal type, to maintain precision/scale. However there was no valid 
way to have decimals come through to the accumulator anyway, and we want to 
cast to float anyway, so we can refactor this away



##########
datafusion/functions-aggregate/src/percentile_cont.rs:
##########
@@ -496,12 +409,11 @@ impl<T: ArrowNumericType> Accumulator for 
PercentileContAccumulator<T> {
         let values_array = PrimitiveArray::<T>::new(
             ScalarBuffer::from(std::mem::take(&mut self.all_values)),
             None,
-        )
-        .with_data_type(self.data_type.clone());
+        );
 
         // Build the result list array
         let list_array = ListArray::new(
-            Arc::new(Field::new_list_field(self.data_type.clone(), true)),
+            Arc::new(Field::new_list_field(T::DATA_TYPE, true)),

Review Comment:
   Since we don't need to consider decimals we can just replace usages with 
`T::DATA_TYPE`



##########
datafusion/functions-aggregate/src/percentile_cont.rs:
##########
@@ -144,76 +135,27 @@ impl Default for PercentileCont {
 
 impl PercentileCont {
     pub fn new() -> Self {
-        let mut variants = Vec::with_capacity(NUMERICS.len());
-        // Accept any numeric value paired with a float64 percentile
-        for num in NUMERICS {
-            variants.push(TypeSignature::Exact(vec![num.clone(), 
DataType::Float64]));
-        }
         Self {
-            signature: Signature::one_of(variants, Volatility::Immutable)
-                .with_parameter_names(vec!["expr".to_string(), 
"percentile".to_string()])
-                .expect("valid parameter names for percentile_cont"),
+            signature: Signature::coercible(
+                vec![
+                    Coercion::new_implicit(
+                        TypeSignatureClass::Float,
+                        vec![TypeSignatureClass::Numeric],
+                        NativeType::Float64,
+                    ),
+                    Coercion::new_implicit(
+                        TypeSignatureClass::Native(logical_float64()),
+                        vec![TypeSignatureClass::Numeric],
+                        NativeType::Float64,
+                    ),
+                ],
+                Volatility::Immutable,

Review Comment:
   Here we are much clearer that we expect all expressions to be float64's, 
letting type coercion handle the casting for use so we can remove internal casts



##########
datafusion/functions-aggregate/src/percentile_cont.rs:
##########
@@ -144,76 +135,27 @@ impl Default for PercentileCont {
 
 impl PercentileCont {
     pub fn new() -> Self {
-        let mut variants = Vec::with_capacity(NUMERICS.len());
-        // Accept any numeric value paired with a float64 percentile
-        for num in NUMERICS {
-            variants.push(TypeSignature::Exact(vec![num.clone(), 
DataType::Float64]));
-        }
         Self {
-            signature: Signature::one_of(variants, Volatility::Immutable)
-                .with_parameter_names(vec!["expr".to_string(), 
"percentile".to_string()])
-                .expect("valid parameter names for percentile_cont"),
+            signature: Signature::coercible(
+                vec![
+                    Coercion::new_implicit(
+                        TypeSignatureClass::Float,
+                        vec![TypeSignatureClass::Numeric],
+                        NativeType::Float64,
+                    ),
+                    Coercion::new_implicit(
+                        TypeSignatureClass::Native(logical_float64()),
+                        vec![TypeSignatureClass::Numeric],
+                        NativeType::Float64,
+                    ),
+                ],
+                Volatility::Immutable,
+            )
+            .with_parameter_names(vec!["expr", "percentile"])
+            .unwrap(),
             aliases: vec![String::from("quantile_cont")],
         }
     }
-
-    fn create_accumulator(&self, args: &AccumulatorArgs) -> Result<Box<dyn 
Accumulator>> {

Review Comment:
   Inlining this



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to