viirya commented on code in PR #1332:
URL: https://github.com/apache/datafusion-comet/pull/1332#discussion_r1932805481


##########
native/core/src/execution/operators/filter.rs:
##########
@@ -332,28 +427,41 @@ fn collect_new_statistics(
 /// The FilterExec streams wraps the input iterator and applies the predicate 
expression to
 /// determine which rows to include in its output batches
 struct FilterExecStream {
-    /// Output schema, which is the same as the input schema for this operator
+    /// Output schema after the projection
     schema: SchemaRef,
     /// The expression to filter on. This expression must evaluate to a 
boolean value.
     predicate: Arc<dyn PhysicalExpr>,
     /// The input partition to filter.
     input: SendableRecordBatchStream,
-    /// runtime metrics recording
+    /// Runtime metrics recording
     baseline_metrics: BaselineMetrics,
+    /// The projection indices of the columns in the input schema
+    projection: Option<Vec<usize>>,
 }
 
-pub(crate) fn batch_filter(
+fn filter_and_project(
     batch: &RecordBatch,
     predicate: &Arc<dyn PhysicalExpr>,
+    projection: Option<&Vec<usize>>,
+    output_schema: &SchemaRef,
 ) -> Result<RecordBatch> {
     predicate
         .evaluate(batch)
         .and_then(|v| v.into_array(batch.num_rows()))
         .and_then(|array| {
-            Ok(match as_boolean_array(&array) {
-                // apply filter array to record batch
-                Ok(filter_array) => comet_filter_record_batch(batch, 
filter_array)?,
-                Err(_) => {
+            Ok(match (as_boolean_array(&array), projection) {
+                // Apply filter array to record batch
+                (Ok(filter_array), None) => comet_filter_record_batch(batch, 
filter_array)?,
+                (Ok(filter_array), Some(projection)) => {
+                    let projected_columns = projection
+                        .iter()
+                        .map(|i| Arc::clone(batch.column(*i)))
+                        .collect();
+                    let projected_batch =
+                        RecordBatch::try_new(Arc::clone(output_schema), 
projected_columns)?;

Review Comment:
   Normally projection should come after predicate, no?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to