mbutrovich commented on code in PR #2521:
URL: https://github.com/apache/iceberg-rust/pull/2521#discussion_r3326724242
##########
crates/integrations/datafusion/src/physical_plan/scan.rs:
##########
@@ -247,3 +283,58 @@ fn get_column_names(
.collect::<Vec<String>>()
})
}
+
+#[cfg(test)]
+mod tests {
+ use std::sync::Arc;
+
+ use datafusion::arrow::array::Int64Array;
+ use datafusion::arrow::datatypes::{
+ DataType, Field, Schema as ArrowSchema, SchemaRef as ArrowSchemaRef,
+ };
+ use datafusion::arrow::record_batch::RecordBatch;
+ use datafusion::physical_plan::metrics::{BaselineMetrics,
ExecutionPlanMetricsSet};
+ use futures::StreamExt;
+
+ use super::stream_with_baseline_metrics;
+
+ #[test]
+ fn stream_with_baseline_metrics_records_rows_and_compute() {
+ let metrics = ExecutionPlanMetricsSet::new();
+ let baseline_metrics = BaselineMetrics::new(&metrics, 0);
+ let batch = make_batch();
+ let stream = Box::pin(futures::stream::iter([Ok(batch)]));
+ let mut stream = stream_with_baseline_metrics(stream,
baseline_metrics);
+
+ futures::executor::block_on(async {
+ let batch = stream
+ .next()
+ .await
+ .expect("stream should return one item")
+ .expect("stream item should be valid");
+ assert_eq!(batch.num_rows(), 3);
+ assert!(stream.next().await.is_none());
+ });
+
+ let metrics = metrics.clone_inner();
+ assert_eq!(metrics.output_rows(), Some(3));
+ assert!(
+ metrics.elapsed_compute().is_some_and(|elapsed| elapsed > 0),
Review Comment:
`stream_with_baseline_metrics_records_rows_and_compute` asserts
`output_rows` and `elapsed_compute`. The PR description also lists
`output_batches`, `output_bytes`, and completion timestamps as exposed. Those
*are* in fact recorded — `BaselineMetrics::record_poll →
batch.record_output(...)` updates `output_batches` and `output_bytes` (see
`datafusion/physical-expr-common/src/metrics/baseline.rs:331`) — so adding
`assert!(metrics.output_batches() == Some(1))` and
`assert!(metrics.output_bytes().is_some_and(|b| b > 0))` is a cheap regression
guard that matches the documented contract.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]