alamb commented on code in PR #4180:
URL: https://github.com/apache/arrow-datafusion/pull/4180#discussion_r1020756782


##########
datafusion-examples/examples/parquet_sql_multiple_files.rs:
##########
@@ -33,13 +33,11 @@ async fn main() -> Result<()> {
 
     // Configure listing options
     let file_format = ParquetFormat::default().with_enable_pruning(true);
-    let listing_options = ListingOptions {
-        file_extension: FileType::PARQUET.get_ext(),
-        format: Arc::new(file_format),
-        table_partition_cols: vec![],
-        collect_stat: true,
-        target_partitions: 1,
-    };
+    let listing_options = ListingOptions::new(Arc::new(file_format))
+        .with_file_extension(FileType::PARQUET.get_ext())
+        .with_table_partition_cols(vec![])
+        .with_collect_stat(true)
+        .with_target_partitions(1);

Review Comment:
   We could further  simplify this code and avoid explicitly specifying the 
default values (last three lines)
   
   But that can also be done as a follow on PR



##########
datafusion/core/src/datasource/listing/table.rs:
##########
@@ -238,6 +238,73 @@ impl ListingOptions {
             target_partitions: 1,
         }
     }
+    /// Set file extension on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use datafusion::datasource::{listing::ListingOptions, 
file_format::parquet::ParquetFormat};
+    ///
+    /// let listing_options = 
ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_file_extension(".parquet");
+    ///
+    /// assert_eq!(listing_options.file_extension, ".parquet");
+    /// ```
+    pub fn with_file_extension(mut self, file_extension: impl Into<String>) -> 
Self {
+        self.file_extension = file_extension.into();
+        self
+    }
+
+    /// Set table partition column names on [`ListingOptions`] and returns 
self.
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use datafusion::datasource::{listing::ListingOptions, 
file_format::parquet::ParquetFormat};
+    ///
+    /// let listing_options = 
ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_table_partition_cols(vec!["col_a".to_string(), 
"col_b".to_string()]);
+    ///
+    /// assert_eq!(listing_options.table_partition_cols, vec!["col_a", 
"col_b"]);
+    /// ```
+    pub fn with_table_partition_cols(
+        mut self,
+        table_partition_cols: Vec<String>,
+    ) -> Self {
+        self.table_partition_cols = table_partition_cols;
+        self
+    }
+
+    /// Set stat collection on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// use std::sync::Arc;
+    /// use datafusion::datasource::{listing::ListingOptions, 
file_format::parquet::ParquetFormat};
+    ///
+    /// // Enable stat collection
+    /// let listing_options = 
ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_collect_stat(true);
+    ///
+    /// assert_eq!(listing_options.collect_stat, true);
+    /// ```
+    pub fn with_collect_stat(mut self, collect_stat: bool) -> Self {
+        self.collect_stat = collect_stat;
+        self
+    }
+
+    /// Set number of target partitions on [`ListingOptions`] and returns self.

Review Comment:
   ❤️ 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to