alamb commented on code in PR #5898:
URL: https://github.com/apache/arrow-datafusion/pull/5898#discussion_r1161792996
##########
datafusion/core/src/physical_plan/file_format/file_stream.rs:
##########
@@ -140,14 +141,31 @@ impl StartableTime {
}
}
+/// Metrics for [`FileStream`]
+///
+/// Note that all of these metrics are in terms of wall clock time
+/// (not cpu time) so they include time spent waiting on I/O as well
+/// as other operators.
struct FileStreamMetrics {
- /// Time elapsed for file opening
+ /// Wall clock time elapsed for file opening.
+ ///
+ /// Time between when [`FileReader::open`] is called and when the
+ /// [`FileStream`] receives a stream for reading.
pub time_opening: StartableTime,
- /// Time elapsed for file scanning + first record batch of decompression +
decoding
+ /// Wall clock time elapsed for file scanning + first record batch of
decompression + decoding
+ ///
+ /// Time between when the [`FileStream`] requests data from the
+ /// stream and when the first [`RecordBatch`] is produced.
pub time_scanning_until_data: StartableTime,
- /// Total elapsed time for for scanning + record batch decompression /
decoding
+ /// total elapsed wall clock time for for scanning + record batch
decompression / decoding
+ ///
+ /// Sum of time between when the [`FileStream`] requests data from
+ /// the stream and when a [`RecordBatch`] is produced for all
+ /// record batches in the stream.
pub time_scanning_total: StartableTime,
Review Comment:
Updated in 94ecfc49fa
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]