alamb commented on a change in pull request #9359:
URL: https://github.com/apache/arrow/pull/9359#discussion_r567411585



##########
File path: rust/datafusion/src/physical_plan/expressions/extract.rs
##########
@@ -0,0 +1,115 @@
+use crate::error::Result;
+use core::fmt;
+use std::{any::Any, sync::Arc};
+
+use arrow::{
+    array::{
+        Date32Array, Date64Array, TimestampMicrosecondArray, 
TimestampMillisecondArray,
+        TimestampNanosecondArray, TimestampSecondArray,
+    },
+    compute::hour,
+    datatypes::{DataType, Schema, TimeUnit},
+    record_batch::RecordBatch,
+};
+
+use crate::{
+    error::DataFusionError,
+    logical_plan::DatePart,
+    physical_plan::{ColumnarValue, PhysicalExpr},
+};
+
+impl fmt::Display for Extract {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "EXTRACT({} AS {:?})", self.date_part, self.expr)
+    }
+}
+
+/// Extract
+#[derive(Debug)]
+pub struct Extract {
+    date_part: DatePart,
+    expr: Arc<dyn PhysicalExpr>,
+}
+
+impl Extract {
+    /// Create new Extract expression
+    pub fn new(date_part: DatePart, expr: Arc<dyn PhysicalExpr>) -> Self {
+        Self { date_part, expr }
+    }
+}
+
+impl PhysicalExpr for Extract {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn data_type(&self, _input_schema: &Schema) -> Result<DataType> {
+        Ok(DataType::Int32)
+    }
+
+    fn nullable(&self, input_schema: &Schema) -> Result<bool> {
+        self.expr.nullable(input_schema)
+    }
+
+    fn evaluate(&self, batch: &RecordBatch) -> Result<ColumnarValue> {
+        let value = self.expr.evaluate(batch)?;
+        let data_type = value.data_type();
+        let array = match value {
+            ColumnarValue::Array(array) => array,
+            ColumnarValue::Scalar(scalar) => scalar.to_array(),
+        };
+
+        match data_type {
+            DataType::Date32 => {
+                let array = 
array.as_any().downcast_ref::<Date32Array>().unwrap();
+                Ok(ColumnarValue::Array(Arc::new(hour(array)?)))

Review comment:
       I find it confusing that `date_part` is passed all the way down in the 
Exprs / trees only to be ignored in the actual implementation which directly 
calls `hour`. I can see that the expr seems to always be made with 
`DatePart::Hour` but I am not 100% sure. 
   
   I am fine with not supporting all the various date parts initially, but I 
would recommend the following as a way of documenting through code / safe guard 
against future bugs:
   
   1. Add a test for `EXTRACT DAY from timestamp` and show that it generates a 
useful error
   2. Add a check in this function for `date_part != DataPart::Hour` and throw 
an error




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to