westonpace commented on code in PR #13796:
URL: https://github.com/apache/arrow/pull/13796#discussion_r949549509
##########
cpp/src/arrow/filesystem/localfs.h:
##########
@@ -34,10 +34,29 @@ namespace fs {
/// Options for the LocalFileSystem implementation.
struct ARROW_EXPORT LocalFileSystemOptions {
+ static constexpr int32_t kDefaultDirectoryReadahead = 16;
+ static constexpr int32_t kDefaultFileInfoBatchSize = 1000;
+
/// Whether OpenInputStream and OpenInputFile return a mmap'ed file,
/// or a regular one.
bool use_mmap = false;
+ /// Options related to `GetFileInfoGenerator` interface.
+
+ /// EXPERIMENTAL: The maximum number of directories processed in parallel
+ /// by `GetFileInfoGenerator`.
+ int32_t directory_readahead = kDefaultDirectoryReadahead;
+
+ /// EXPERIMENTAL: The maximum number of entries aggregated into each
+ /// FileInfoVector chunk by `GetFileInfoGenerator`.
+ ///
+ /// Since each FileInfo entry needs a separate `stat` system call, a
+ /// directory with a very large number of files may take a lot of time to
+ /// process entirely. By generating a FileInfoVector after this chunk
+ /// size is reached, we ensure FileInfo entries can start being consumed
+ /// from the FileInfoGenerator with less initial latency.
+ int32_t file_info_batch_size = kDefaultFileInfoBatchSize;
Review Comment:
That helps a lot, thank you.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]