jayzhan211 commented on code in PR #10644:
URL: https://github.com/apache/datafusion/pull/10644#discussion_r1613540288


##########
datafusion/functions-aggregate/src/median.rs:
##########
@@ -15,71 +15,105 @@
 // specific language governing permissions and limitations
 // under the License.
 
-//! # Median
-
-use crate::aggregate::utils::{down_cast_any_ref, Hashable};
-use crate::expressions::format_state_name;
-use crate::{AggregateExpr, PhysicalExpr};
-use arrow::array::{Array, ArrayRef};
-use arrow::datatypes::{DataType, Field};
-use arrow_array::cast::AsArray;
-use arrow_array::{downcast_integer, ArrowNativeTypeOp, ArrowNumericType};
-use arrow_buffer::ArrowNativeType;
-use datafusion_common::{DataFusionError, Result, ScalarValue};
-use datafusion_expr::Accumulator;
-use std::any::Any;
 use std::collections::HashSet;
 use std::fmt::Formatter;
-use std::sync::Arc;
+use std::{fmt::Debug, sync::Arc};
+
+use arrow::array::{downcast_integer, ArrowNumericType};
+use arrow::{
+    array::{ArrayRef, AsArray},
+    datatypes::{
+        DataType, Decimal128Type, Decimal256Type, Field, Float16Type, 
Float32Type,
+        Float64Type,
+    },
+};
+
+use arrow::array::Array;
+use arrow::array::ArrowNativeTypeOp;
+use arrow::datatypes::ArrowNativeType;
+
+use datafusion_common::{DataFusionError, Result, ScalarValue};
+use datafusion_expr::function::StateFieldsArgs;
+use datafusion_expr::{
+    function::AccumulatorArgs, utils::format_state_name, Accumulator, 
AggregateUDFImpl,
+    Signature, Volatility,
+};
+use datafusion_physical_expr_common::aggregate::utils::Hashable;
+
+make_udaf_expr_and_func!(
+    Median,
+    median,
+    expression,
+    "Computes the median of a set of numbers",
+    median_udaf
+);
 
-/// MEDIAN aggregate expression. If using the non-distinct variation, then 
this uses a
-/// lot of memory because all values need to be stored in memory before a 
result can be
-/// computed. If an approximation is sufficient then APPROX_MEDIAN provides a 
much more
-/// efficient solution.
-///
-/// If using the distinct variation, the memory usage will be similarly high 
if the
-/// cardinality is high as it stores all distinct values in memory before 
computing the
-/// result, but if cardinality is low then memory usage will also be lower.
-#[derive(Debug)]
 pub struct Median {
-    name: String,
-    expr: Arc<dyn PhysicalExpr>,
-    data_type: DataType,
-    distinct: bool,
+    signature: Signature,
+    aliases: Vec<String>,
+}
+
+impl Debug for Median {
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        f.debug_struct("Median")
+            .field("name", &self.name())
+            .field("signature", &self.signature)
+            .finish()
+    }
+}
+
+impl Default for Median {
+    fn default() -> Self {
+        Self::new()
+    }
 }
 
 impl Median {
-    /// Create a new MEDIAN aggregate function
-    pub fn new(
-        expr: Arc<dyn PhysicalExpr>,
-        name: impl Into<String>,
-        data_type: DataType,
-        distinct: bool,
-    ) -> Self {
+    pub fn new() -> Self {
         Self {
-            name: name.into(),
-            expr,
-            data_type,
-            distinct,
+            aliases: vec!["median".to_string()],
+            signature: Signature::numeric(1, Volatility::Immutable),

Review Comment:
   I introduce this signature that computes the final coerce types instead of 
generating tons of valid types with the possible same type. The numeric 
signature considers decimal types too, which is needed but not included in the 
Numeric types array.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to