viirya commented on code in PR #1332: URL: https://github.com/apache/datafusion-comet/pull/1332#discussion_r1932805481
########## native/core/src/execution/operators/filter.rs: ########## @@ -332,28 +427,41 @@ fn collect_new_statistics( /// The FilterExec streams wraps the input iterator and applies the predicate expression to /// determine which rows to include in its output batches struct FilterExecStream { - /// Output schema, which is the same as the input schema for this operator + /// Output schema after the projection schema: SchemaRef, /// The expression to filter on. This expression must evaluate to a boolean value. predicate: Arc<dyn PhysicalExpr>, /// The input partition to filter. input: SendableRecordBatchStream, - /// runtime metrics recording + /// Runtime metrics recording baseline_metrics: BaselineMetrics, + /// The projection indices of the columns in the input schema + projection: Option<Vec<usize>>, } -pub(crate) fn batch_filter( +fn filter_and_project( batch: &RecordBatch, predicate: &Arc<dyn PhysicalExpr>, + projection: Option<&Vec<usize>>, + output_schema: &SchemaRef, ) -> Result<RecordBatch> { predicate .evaluate(batch) .and_then(|v| v.into_array(batch.num_rows())) .and_then(|array| { - Ok(match as_boolean_array(&array) { - // apply filter array to record batch - Ok(filter_array) => comet_filter_record_batch(batch, filter_array)?, - Err(_) => { + Ok(match (as_boolean_array(&array), projection) { + // Apply filter array to record batch + (Ok(filter_array), None) => comet_filter_record_batch(batch, filter_array)?, + (Ok(filter_array), Some(projection)) => { + let projected_columns = projection + .iter() + .map(|i| Arc::clone(batch.column(*i))) + .collect(); + let projected_batch = + RecordBatch::try_new(Arc::clone(output_schema), projected_columns)?; Review Comment: Normally projection should come after predicate, no? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org