alamb commented on code in PR #11403:
URL: https://github.com/apache/datafusion/pull/11403#discussion_r1677163178


##########
datafusion-cli/src/catalog.rs:
##########
@@ -178,13 +180,18 @@ impl SchemaProvider for DynamicFileSchemaProvider {
                 // to any command options so the only choice is to use an 
empty collection
                 match scheme {
                     "s3" | "oss" | "cos" => {
-                        state = 
state.add_table_options_extension(AwsOptions::default());
+                        if let Some(table_options) = builder.table_options() {

Review Comment:
   It seems like this code previously always registered the table options and 
now it only registers them if another table option was registered. Is that 
intentional? 
   
   Maybe it doesn't matter



##########
datafusion/core/src/execution/context/mod.rs:
##########
@@ -1733,9 +1743,12 @@ mod tests {
     #[tokio::test]
     async fn custom_query_planner() -> Result<()> {
         let runtime = Arc::new(RuntimeEnv::default());
-        let session_state =
-            SessionState::new_with_config_rt(SessionConfig::new(), runtime)
-                .with_query_planner(Arc::new(MyQueryPlanner {}));
+        let session_state = SessionStateBuilder::new()

Review Comment:
   this just looks so much nicer



##########
datafusion/core/tests/user_defined/user_defined_plan.rs:
##########
@@ -290,10 +291,14 @@ async fn topk_plan() -> Result<()> {
 fn make_topk_context() -> SessionContext {
     let config = SessionConfig::new().with_target_partitions(48);
     let runtime = Arc::new(RuntimeEnv::default());
-    let mut state = SessionState::new_with_config_rt(config, runtime)
+    let state = SessionStateBuilder::new()
+        .with_config(config)
+        .with_runtime_env(runtime)
+        .with_default_features()
         .with_query_planner(Arc::new(TopKQueryPlanner {}))
-        .add_optimizer_rule(Arc::new(TopKOptimizerRule {}));
-    state.add_analyzer_rule(Arc::new(MyAnalyzerRule {}));
+        .add_optimizer_rule(Arc::new(TopKOptimizerRule {}))

Review Comment:
   maybe we could make these names consistent (rather than `add_optimizer_rule` 
it could be `with_optimizer_rule` -- a follow on PR would be totally fine too



##########
datafusion/core/src/execution/session_state.rs:
##########
@@ -976,6 +872,713 @@ impl SessionState {
     }
 }
 
+/// A builder to be used for building [`SessionState`]'s. Defaults will
+/// be used for all values unless explicitly provided.
+pub struct SessionStateBuilder {
+    session_id: Option<String>,
+    analyzer: Option<Analyzer>,
+    expr_planners: Option<Vec<Arc<dyn ExprPlanner>>>,
+    optimizer: Option<Optimizer>,
+    physical_optimizers: Option<PhysicalOptimizer>,
+    query_planner: Option<Arc<dyn QueryPlanner + Send + Sync>>,
+    catalog_list: Option<Arc<dyn CatalogProviderList>>,
+    table_functions: Option<HashMap<String, Arc<TableFunction>>>,
+    scalar_functions: Option<Vec<Arc<ScalarUDF>>>,
+    aggregate_functions: Option<Vec<Arc<AggregateUDF>>>,
+    window_functions: Option<Vec<Arc<WindowUDF>>>,
+    serializer_registry: Option<Arc<dyn SerializerRegistry>>,
+    file_formats: Option<Vec<Arc<dyn FileFormatFactory>>>,
+    config: Option<SessionConfig>,
+    table_options: Option<TableOptions>,
+    execution_props: Option<ExecutionProps>,
+    table_factories: Option<HashMap<String, Arc<dyn TableProviderFactory>>>,
+    runtime_env: Option<Arc<RuntimeEnv>>,
+    function_factory: Option<Arc<dyn FunctionFactory>>,
+    // fields to support convenience functions
+    analyzer_rules: Option<Vec<Arc<dyn AnalyzerRule + Send + Sync>>>,
+    optimizer_rules: Option<Vec<Arc<dyn OptimizerRule + Send + Sync>>>,
+    physical_optimizer_rules: Option<Vec<Arc<dyn PhysicalOptimizerRule + Send 
+ Sync>>>,
+}
+
+impl SessionStateBuilder {
+    /// Returns a new [`SessionStateBuilder`] with no options set.
+    pub fn new() -> Self {
+        Self {
+            session_id: None,
+            analyzer: None,
+            expr_planners: None,
+            optimizer: None,
+            physical_optimizers: None,
+            query_planner: None,
+            catalog_list: None,
+            table_functions: None,
+            scalar_functions: None,
+            aggregate_functions: None,
+            window_functions: None,
+            serializer_registry: None,
+            file_formats: None,
+            table_options: None,
+            config: None,
+            execution_props: None,
+            table_factories: None,
+            runtime_env: None,
+            function_factory: None,
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Returns a new [SessionStateBuilder] based on an existing [SessionState]
+    /// The session id for the new builder will be unset; all other fields will
+    /// be cloned from what is set in the provided session state
+    pub fn new_from_existing(existing: SessionState) -> Self {
+        let cloned = existing.clone();
+
+        Self {
+            session_id: None,
+            analyzer: Some(cloned.analyzer),
+            expr_planners: Some(cloned.expr_planners),
+            optimizer: Some(cloned.optimizer),
+            physical_optimizers: Some(cloned.physical_optimizers),
+            query_planner: Some(cloned.query_planner),
+            catalog_list: Some(cloned.catalog_list),
+            table_functions: Some(cloned.table_functions),
+            scalar_functions: 
Some(cloned.scalar_functions.into_values().collect_vec()),
+            aggregate_functions: Some(
+                cloned.aggregate_functions.into_values().collect_vec(),
+            ),
+            window_functions: 
Some(cloned.window_functions.into_values().collect_vec()),
+            serializer_registry: Some(cloned.serializer_registry),
+            file_formats: 
Some(cloned.file_formats.into_values().collect_vec()),
+            config: Some(cloned.config),
+            table_options: Some(cloned.table_options),
+            execution_props: Some(cloned.execution_props),
+            table_factories: Some(cloned.table_factories),
+            runtime_env: Some(cloned.runtime_env),
+            function_factory: cloned.function_factory,
+
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Set defaults for table_factories, file formats, expr_planners and 
builtin

Review Comment:
   I think this is great for this PR
   
   As part of the larger catalog refactor, I think we'll ned to figure out how 
to get SessioNStateDefaults` out of this file -- but I have some ideas 
(specifically we can make the defaults a trait that could be passed to the 
builder).
   
   Not for this PR



##########
datafusion/core/src/execution/session_state.rs:
##########
@@ -976,6 +872,713 @@ impl SessionState {
     }
 }
 
+/// A builder to be used for building [`SessionState`]'s. Defaults will
+/// be used for all values unless explicitly provided.

Review Comment:
   👍 
   
   We could also add a doc comment to `SessionState` for an example. Something 
like
   
   ```suggestion
   /// be used for all values unless explicitly provided.
   ///
   /// See example on [`SessionState`]
   ```



##########
datafusion/core/src/execution/session_state.rs:
##########
@@ -89,9 +91,29 @@ use uuid::Uuid;
 /// Execution context for registering data sources and executing queries.
 /// See [`SessionContext`] for a higher level API.
 ///
+/// Use the [`SessionStateBuilder`] to build a SessionState object.

Review Comment:
   ❤️ 



##########
datafusion/core/src/execution/context/mod.rs:
##########
@@ -315,7 +320,7 @@ impl SessionContext {
     }
 
     /// Creates a new `SessionContext` using the provided [`SessionState`]
-    #[deprecated(since = "32.0.0", note = "Use SessionState::new_with_state")]
+    #[deprecated(since = "32.0.0", note = "Use 
SessionContext::new_with_state")]

Review Comment:
   As a follow on PR perhaps we can remove this API entirely 
   
   We could probably change the note to say say "SessionStateBuilder" 



##########
datafusion/core/src/execution/session_state.rs:
##########
@@ -976,6 +872,713 @@ impl SessionState {
     }
 }
 
+/// A builder to be used for building [`SessionState`]'s. Defaults will
+/// be used for all values unless explicitly provided.
+pub struct SessionStateBuilder {
+    session_id: Option<String>,
+    analyzer: Option<Analyzer>,
+    expr_planners: Option<Vec<Arc<dyn ExprPlanner>>>,
+    optimizer: Option<Optimizer>,
+    physical_optimizers: Option<PhysicalOptimizer>,
+    query_planner: Option<Arc<dyn QueryPlanner + Send + Sync>>,
+    catalog_list: Option<Arc<dyn CatalogProviderList>>,
+    table_functions: Option<HashMap<String, Arc<TableFunction>>>,
+    scalar_functions: Option<Vec<Arc<ScalarUDF>>>,
+    aggregate_functions: Option<Vec<Arc<AggregateUDF>>>,
+    window_functions: Option<Vec<Arc<WindowUDF>>>,
+    serializer_registry: Option<Arc<dyn SerializerRegistry>>,
+    file_formats: Option<Vec<Arc<dyn FileFormatFactory>>>,
+    config: Option<SessionConfig>,
+    table_options: Option<TableOptions>,
+    execution_props: Option<ExecutionProps>,
+    table_factories: Option<HashMap<String, Arc<dyn TableProviderFactory>>>,
+    runtime_env: Option<Arc<RuntimeEnv>>,
+    function_factory: Option<Arc<dyn FunctionFactory>>,
+    // fields to support convenience functions
+    analyzer_rules: Option<Vec<Arc<dyn AnalyzerRule + Send + Sync>>>,
+    optimizer_rules: Option<Vec<Arc<dyn OptimizerRule + Send + Sync>>>,
+    physical_optimizer_rules: Option<Vec<Arc<dyn PhysicalOptimizerRule + Send 
+ Sync>>>,
+}
+
+impl SessionStateBuilder {
+    /// Returns a new [`SessionStateBuilder`] with no options set.
+    pub fn new() -> Self {
+        Self {
+            session_id: None,
+            analyzer: None,
+            expr_planners: None,
+            optimizer: None,
+            physical_optimizers: None,
+            query_planner: None,
+            catalog_list: None,
+            table_functions: None,
+            scalar_functions: None,
+            aggregate_functions: None,
+            window_functions: None,
+            serializer_registry: None,
+            file_formats: None,
+            table_options: None,
+            config: None,
+            execution_props: None,
+            table_factories: None,
+            runtime_env: None,
+            function_factory: None,
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Returns a new [SessionStateBuilder] based on an existing [SessionState]
+    /// The session id for the new builder will be unset; all other fields will
+    /// be cloned from what is set in the provided session state
+    pub fn new_from_existing(existing: SessionState) -> Self {
+        let cloned = existing.clone();
+
+        Self {
+            session_id: None,
+            analyzer: Some(cloned.analyzer),
+            expr_planners: Some(cloned.expr_planners),
+            optimizer: Some(cloned.optimizer),
+            physical_optimizers: Some(cloned.physical_optimizers),
+            query_planner: Some(cloned.query_planner),
+            catalog_list: Some(cloned.catalog_list),
+            table_functions: Some(cloned.table_functions),
+            scalar_functions: 
Some(cloned.scalar_functions.into_values().collect_vec()),
+            aggregate_functions: Some(
+                cloned.aggregate_functions.into_values().collect_vec(),
+            ),
+            window_functions: 
Some(cloned.window_functions.into_values().collect_vec()),
+            serializer_registry: Some(cloned.serializer_registry),
+            file_formats: 
Some(cloned.file_formats.into_values().collect_vec()),
+            config: Some(cloned.config),
+            table_options: Some(cloned.table_options),
+            execution_props: Some(cloned.execution_props),
+            table_factories: Some(cloned.table_factories),
+            runtime_env: Some(cloned.runtime_env),
+            function_factory: cloned.function_factory,
+
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Set defaults for table_factories, file formats, expr_planners and 
builtin
+    /// scalar and aggregate functions.
+    pub fn with_default_features(mut self) -> Self {
+        self.table_factories = 
Some(SessionStateDefaults::default_table_factories());
+        self.file_formats = Some(SessionStateDefaults::default_file_formats());
+        self.expr_planners = 
Some(SessionStateDefaults::default_expr_planners());
+        self.scalar_functions = 
Some(SessionStateDefaults::default_scalar_functions());
+        self.aggregate_functions =
+            Some(SessionStateDefaults::default_aggregate_functions());
+        self
+    }
+
+    /// Set the session id.
+    pub fn with_session_id(mut self, session_id: String) -> Self {
+        self.session_id = Some(session_id);
+        self
+    }
+
+    /// Set the [`AnalyzerRule`]s optimizer plan rules.
+    pub fn with_analyzer_rules(
+        mut self,
+        rules: Vec<Arc<dyn AnalyzerRule + Send + Sync>>,
+    ) -> Self {
+        self.analyzer = Some(Analyzer::with_rules(rules));
+        self
+    }
+
+    /// Add `analyzer_rule` to the end of the list of
+    /// [`AnalyzerRule`]s used to rewrite queries.
+    pub fn add_analyzer_rule(
+        mut self,
+        analyzer_rule: Arc<dyn AnalyzerRule + Send + Sync>,
+    ) -> Self {
+        let mut rules = self.analyzer_rules.unwrap_or_default();
+        rules.push(analyzer_rule);
+        self.analyzer_rules = Some(rules);
+        self
+    }
+
+    /// Set the [`OptimizerRule`]s used to optimize plans.
+    pub fn with_optimizer_rules(
+        mut self,
+        rules: Vec<Arc<dyn OptimizerRule + Send + Sync>>,
+    ) -> Self {
+        self.optimizer = Some(Optimizer::with_rules(rules));
+        self
+    }
+
+    /// Add `optimizer_rule` to the end of the list of
+    /// [`OptimizerRule`]s used to rewrite queries.
+    pub fn add_optimizer_rule(
+        mut self,
+        optimizer_rule: Arc<dyn OptimizerRule + Send + Sync>,
+    ) -> Self {
+        let mut rules = self.optimizer_rules.unwrap_or_default();
+        rules.push(optimizer_rule);
+        self.optimizer_rules = Some(rules);
+        self
+    }
+
+    /// Set the [`ExprPlanner`]s used to customize the behavior of the SQL 
planner.
+    pub fn with_expr_planners(
+        mut self,
+        expr_planners: Vec<Arc<dyn ExprPlanner>>,
+    ) -> Self {
+        self.expr_planners = Some(expr_planners);
+        self
+    }
+
+    /// Set tje [`PhysicalOptimizerRule`]s used to optimize plans.
+    pub fn with_physical_optimizer_rules(
+        mut self,
+        physical_optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
+    ) -> Self {
+        self.physical_optimizers =
+            Some(PhysicalOptimizer::with_rules(physical_optimizers));
+        self
+    }
+
+    /// Add `physical_optimizer_rule` to the end of the list of
+    /// [`PhysicalOptimizerRule`]s used to rewrite queries.
+    pub fn add_physical_optimizer_rule(

Review Comment:
   I know this is just following the existing (inconsistent) API naming, but 
since we are making a new set of APIs, perhaps we could make these builder 
style APIs consistently start with `with_` 



##########
datafusion/core/src/execution/session_state.rs:
##########
@@ -976,6 +872,713 @@ impl SessionState {
     }
 }
 
+/// A builder to be used for building [`SessionState`]'s. Defaults will
+/// be used for all values unless explicitly provided.
+pub struct SessionStateBuilder {
+    session_id: Option<String>,
+    analyzer: Option<Analyzer>,
+    expr_planners: Option<Vec<Arc<dyn ExprPlanner>>>,
+    optimizer: Option<Optimizer>,
+    physical_optimizers: Option<PhysicalOptimizer>,
+    query_planner: Option<Arc<dyn QueryPlanner + Send + Sync>>,
+    catalog_list: Option<Arc<dyn CatalogProviderList>>,
+    table_functions: Option<HashMap<String, Arc<TableFunction>>>,
+    scalar_functions: Option<Vec<Arc<ScalarUDF>>>,
+    aggregate_functions: Option<Vec<Arc<AggregateUDF>>>,
+    window_functions: Option<Vec<Arc<WindowUDF>>>,
+    serializer_registry: Option<Arc<dyn SerializerRegistry>>,
+    file_formats: Option<Vec<Arc<dyn FileFormatFactory>>>,
+    config: Option<SessionConfig>,
+    table_options: Option<TableOptions>,
+    execution_props: Option<ExecutionProps>,
+    table_factories: Option<HashMap<String, Arc<dyn TableProviderFactory>>>,
+    runtime_env: Option<Arc<RuntimeEnv>>,
+    function_factory: Option<Arc<dyn FunctionFactory>>,
+    // fields to support convenience functions
+    analyzer_rules: Option<Vec<Arc<dyn AnalyzerRule + Send + Sync>>>,
+    optimizer_rules: Option<Vec<Arc<dyn OptimizerRule + Send + Sync>>>,
+    physical_optimizer_rules: Option<Vec<Arc<dyn PhysicalOptimizerRule + Send 
+ Sync>>>,
+}
+
+impl SessionStateBuilder {
+    /// Returns a new [`SessionStateBuilder`] with no options set.
+    pub fn new() -> Self {
+        Self {
+            session_id: None,
+            analyzer: None,
+            expr_planners: None,
+            optimizer: None,
+            physical_optimizers: None,
+            query_planner: None,
+            catalog_list: None,
+            table_functions: None,
+            scalar_functions: None,
+            aggregate_functions: None,
+            window_functions: None,
+            serializer_registry: None,
+            file_formats: None,
+            table_options: None,
+            config: None,
+            execution_props: None,
+            table_factories: None,
+            runtime_env: None,
+            function_factory: None,
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Returns a new [SessionStateBuilder] based on an existing [SessionState]
+    /// The session id for the new builder will be unset; all other fields will
+    /// be cloned from what is set in the provided session state
+    pub fn new_from_existing(existing: SessionState) -> Self {
+        let cloned = existing.clone();
+
+        Self {
+            session_id: None,
+            analyzer: Some(cloned.analyzer),
+            expr_planners: Some(cloned.expr_planners),
+            optimizer: Some(cloned.optimizer),
+            physical_optimizers: Some(cloned.physical_optimizers),
+            query_planner: Some(cloned.query_planner),
+            catalog_list: Some(cloned.catalog_list),
+            table_functions: Some(cloned.table_functions),
+            scalar_functions: 
Some(cloned.scalar_functions.into_values().collect_vec()),
+            aggregate_functions: Some(
+                cloned.aggregate_functions.into_values().collect_vec(),
+            ),
+            window_functions: 
Some(cloned.window_functions.into_values().collect_vec()),
+            serializer_registry: Some(cloned.serializer_registry),
+            file_formats: 
Some(cloned.file_formats.into_values().collect_vec()),
+            config: Some(cloned.config),
+            table_options: Some(cloned.table_options),
+            execution_props: Some(cloned.execution_props),
+            table_factories: Some(cloned.table_factories),
+            runtime_env: Some(cloned.runtime_env),
+            function_factory: cloned.function_factory,
+
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Set defaults for table_factories, file formats, expr_planners and 
builtin
+    /// scalar and aggregate functions.
+    pub fn with_default_features(mut self) -> Self {
+        self.table_factories = 
Some(SessionStateDefaults::default_table_factories());
+        self.file_formats = Some(SessionStateDefaults::default_file_formats());
+        self.expr_planners = 
Some(SessionStateDefaults::default_expr_planners());
+        self.scalar_functions = 
Some(SessionStateDefaults::default_scalar_functions());
+        self.aggregate_functions =
+            Some(SessionStateDefaults::default_aggregate_functions());
+        self
+    }
+
+    /// Set the session id.
+    pub fn with_session_id(mut self, session_id: String) -> Self {
+        self.session_id = Some(session_id);
+        self
+    }
+
+    /// Set the [`AnalyzerRule`]s optimizer plan rules.
+    pub fn with_analyzer_rules(
+        mut self,
+        rules: Vec<Arc<dyn AnalyzerRule + Send + Sync>>,
+    ) -> Self {
+        self.analyzer = Some(Analyzer::with_rules(rules));
+        self
+    }
+
+    /// Add `analyzer_rule` to the end of the list of
+    /// [`AnalyzerRule`]s used to rewrite queries.
+    pub fn add_analyzer_rule(
+        mut self,
+        analyzer_rule: Arc<dyn AnalyzerRule + Send + Sync>,
+    ) -> Self {
+        let mut rules = self.analyzer_rules.unwrap_or_default();
+        rules.push(analyzer_rule);
+        self.analyzer_rules = Some(rules);
+        self
+    }
+
+    /// Set the [`OptimizerRule`]s used to optimize plans.
+    pub fn with_optimizer_rules(
+        mut self,
+        rules: Vec<Arc<dyn OptimizerRule + Send + Sync>>,
+    ) -> Self {
+        self.optimizer = Some(Optimizer::with_rules(rules));
+        self
+    }
+
+    /// Add `optimizer_rule` to the end of the list of
+    /// [`OptimizerRule`]s used to rewrite queries.
+    pub fn add_optimizer_rule(
+        mut self,
+        optimizer_rule: Arc<dyn OptimizerRule + Send + Sync>,
+    ) -> Self {
+        let mut rules = self.optimizer_rules.unwrap_or_default();
+        rules.push(optimizer_rule);
+        self.optimizer_rules = Some(rules);
+        self
+    }
+
+    /// Set the [`ExprPlanner`]s used to customize the behavior of the SQL 
planner.
+    pub fn with_expr_planners(
+        mut self,
+        expr_planners: Vec<Arc<dyn ExprPlanner>>,
+    ) -> Self {
+        self.expr_planners = Some(expr_planners);
+        self
+    }
+
+    /// Set tje [`PhysicalOptimizerRule`]s used to optimize plans.
+    pub fn with_physical_optimizer_rules(
+        mut self,
+        physical_optimizers: Vec<Arc<dyn PhysicalOptimizerRule + Send + Sync>>,
+    ) -> Self {
+        self.physical_optimizers =
+            Some(PhysicalOptimizer::with_rules(physical_optimizers));
+        self
+    }
+
+    /// Add `physical_optimizer_rule` to the end of the list of
+    /// [`PhysicalOptimizerRule`]s used to rewrite queries.
+    pub fn add_physical_optimizer_rule(
+        mut self,
+        physical_optimizer_rule: Arc<dyn PhysicalOptimizerRule + Send + Sync>,
+    ) -> Self {
+        let mut rules = self.physical_optimizer_rules.unwrap_or_default();
+        rules.push(physical_optimizer_rule);
+        self.physical_optimizer_rules = Some(rules);
+        self
+    }
+
+    /// Set the [`QueryPlanner`]
+    pub fn with_query_planner(
+        mut self,
+        query_planner: Arc<dyn QueryPlanner + Send + Sync>,
+    ) -> Self {
+        self.query_planner = Some(query_planner);
+        self
+    }
+
+    /// Set the [`CatalogProviderList`]
+    pub fn with_catalog_list(
+        mut self,
+        catalog_list: Arc<dyn CatalogProviderList>,
+    ) -> Self {
+        self.catalog_list = Some(catalog_list);
+        self
+    }
+
+    /// Set the map of [`TableFunction`]s
+    pub fn with_table_functions(
+        mut self,
+        table_functions: HashMap<String, Arc<TableFunction>>,
+    ) -> Self {
+        self.table_functions = Some(table_functions);
+        self
+    }
+
+    /// Set the map of [`ScalarUDF`]s
+    pub fn with_scalar_functions(
+        mut self,
+        scalar_functions: Vec<Arc<ScalarUDF>>,
+    ) -> Self {
+        self.scalar_functions = Some(scalar_functions);
+        self
+    }
+
+    /// Set the map of [`AggregateUDF`]s
+    pub fn with_aggregate_functions(
+        mut self,
+        aggregate_functions: Vec<Arc<AggregateUDF>>,
+    ) -> Self {
+        self.aggregate_functions = Some(aggregate_functions);
+        self
+    }
+
+    /// Set the map of [`WindowUDF`]s
+    pub fn with_window_functions(
+        mut self,
+        window_functions: Vec<Arc<WindowUDF>>,
+    ) -> Self {
+        self.window_functions = Some(window_functions);
+        self
+    }
+
+    /// Set the [`SerializerRegistry`]
+    pub fn with_serializer_registry(
+        mut self,
+        serializer_registry: Arc<dyn SerializerRegistry>,
+    ) -> Self {
+        self.serializer_registry = Some(serializer_registry);
+        self
+    }
+
+    /// Set the map of [`FileFormatFactory`]s
+    pub fn with_file_formats(
+        mut self,
+        file_formats: Vec<Arc<dyn FileFormatFactory>>,
+    ) -> Self {
+        self.file_formats = Some(file_formats);
+        self
+    }
+
+    /// Set the [`SessionConfig`]
+    pub fn with_config(mut self, config: SessionConfig) -> Self {
+        self.config = Some(config);
+        self
+    }
+
+    /// Set the [`TableOptions`]
+    pub fn with_table_options(mut self, table_options: TableOptions) -> Self {
+        self.table_options = Some(table_options);
+        self
+    }
+
+    /// Set the [`ExecutionProps`]
+    pub fn with_execution_props(mut self, execution_props: ExecutionProps) -> 
Self {
+        self.execution_props = Some(execution_props);
+        self
+    }
+
+    /// Set the map of [`TableProviderFactory`]s
+    pub fn with_table_factories(
+        mut self,
+        table_factories: HashMap<String, Arc<dyn TableProviderFactory>>,
+    ) -> Self {
+        self.table_factories = Some(table_factories);
+        self
+    }
+
+    /// Set the [`RuntimeEnv`]
+    pub fn with_runtime_env(mut self, runtime_env: Arc<RuntimeEnv>) -> Self {
+        self.runtime_env = Some(runtime_env);
+        self
+    }
+
+    /// Set a [`FunctionFactory`] to handle `CREATE FUNCTION` statements
+    pub fn with_function_factory(
+        mut self,
+        function_factory: Option<Arc<dyn FunctionFactory>>,
+    ) -> Self {
+        self.function_factory = function_factory;
+        self
+    }
+
+    /// Builds a [`SessionState`] with the current configuration.
+    ///
+    /// Note that there is an explicit option for enabling catalog and schema 
defaults
+    /// in [SessionConfig::create_default_catalog_and_schema] which if enabled
+    /// will be built here.
+    pub fn build(self) -> SessionState {
+        let config = self.config.unwrap_or_default();
+        let runtime_env = 
self.runtime_env.unwrap_or(Arc::new(RuntimeEnv::default()));
+
+        let mut state = SessionState {

Review Comment:
   Stylistically, another way to write this code that might be more boiler 
plate but would let the compiler ensure all fields are used would be:
   
   ```rust
   let Self {
    session_id, 
    analyzer, 
   ...
    // by explicitly listing all these fields, the compiler will complain if 
any fields are missed
   } = self;
   
   let session_id = session_id.unwrap_or_else(|| Uuid::new_v4().to_string());
   let analyzer = analyzer.unwrap_or_default(),
   



##########
datafusion/core/src/execution/session_state.rs:
##########
@@ -976,6 +872,713 @@ impl SessionState {
     }
 }
 
+/// A builder to be used for building [`SessionState`]'s. Defaults will
+/// be used for all values unless explicitly provided.
+pub struct SessionStateBuilder {
+    session_id: Option<String>,
+    analyzer: Option<Analyzer>,
+    expr_planners: Option<Vec<Arc<dyn ExprPlanner>>>,
+    optimizer: Option<Optimizer>,
+    physical_optimizers: Option<PhysicalOptimizer>,
+    query_planner: Option<Arc<dyn QueryPlanner + Send + Sync>>,
+    catalog_list: Option<Arc<dyn CatalogProviderList>>,
+    table_functions: Option<HashMap<String, Arc<TableFunction>>>,
+    scalar_functions: Option<Vec<Arc<ScalarUDF>>>,
+    aggregate_functions: Option<Vec<Arc<AggregateUDF>>>,
+    window_functions: Option<Vec<Arc<WindowUDF>>>,
+    serializer_registry: Option<Arc<dyn SerializerRegistry>>,
+    file_formats: Option<Vec<Arc<dyn FileFormatFactory>>>,
+    config: Option<SessionConfig>,
+    table_options: Option<TableOptions>,
+    execution_props: Option<ExecutionProps>,
+    table_factories: Option<HashMap<String, Arc<dyn TableProviderFactory>>>,
+    runtime_env: Option<Arc<RuntimeEnv>>,
+    function_factory: Option<Arc<dyn FunctionFactory>>,
+    // fields to support convenience functions
+    analyzer_rules: Option<Vec<Arc<dyn AnalyzerRule + Send + Sync>>>,
+    optimizer_rules: Option<Vec<Arc<dyn OptimizerRule + Send + Sync>>>,
+    physical_optimizer_rules: Option<Vec<Arc<dyn PhysicalOptimizerRule + Send 
+ Sync>>>,
+}
+
+impl SessionStateBuilder {
+    /// Returns a new [`SessionStateBuilder`] with no options set.
+    pub fn new() -> Self {
+        Self {
+            session_id: None,
+            analyzer: None,
+            expr_planners: None,
+            optimizer: None,
+            physical_optimizers: None,
+            query_planner: None,
+            catalog_list: None,
+            table_functions: None,
+            scalar_functions: None,
+            aggregate_functions: None,
+            window_functions: None,
+            serializer_registry: None,
+            file_formats: None,
+            table_options: None,
+            config: None,
+            execution_props: None,
+            table_factories: None,
+            runtime_env: None,
+            function_factory: None,
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Returns a new [SessionStateBuilder] based on an existing [SessionState]
+    /// The session id for the new builder will be unset; all other fields will
+    /// be cloned from what is set in the provided session state
+    pub fn new_from_existing(existing: SessionState) -> Self {
+        let cloned = existing.clone();

Review Comment:
   Since `existing` is owned, why do we need `cloned` here? 
   
   I tried removing this clone locally and it seemed to work 
   
   



##########
datafusion/core/src/execution/session_state.rs:
##########
@@ -976,6 +872,713 @@ impl SessionState {
     }
 }
 
+/// A builder to be used for building [`SessionState`]'s. Defaults will
+/// be used for all values unless explicitly provided.
+pub struct SessionStateBuilder {
+    session_id: Option<String>,
+    analyzer: Option<Analyzer>,
+    expr_planners: Option<Vec<Arc<dyn ExprPlanner>>>,
+    optimizer: Option<Optimizer>,
+    physical_optimizers: Option<PhysicalOptimizer>,
+    query_planner: Option<Arc<dyn QueryPlanner + Send + Sync>>,
+    catalog_list: Option<Arc<dyn CatalogProviderList>>,
+    table_functions: Option<HashMap<String, Arc<TableFunction>>>,
+    scalar_functions: Option<Vec<Arc<ScalarUDF>>>,
+    aggregate_functions: Option<Vec<Arc<AggregateUDF>>>,
+    window_functions: Option<Vec<Arc<WindowUDF>>>,
+    serializer_registry: Option<Arc<dyn SerializerRegistry>>,
+    file_formats: Option<Vec<Arc<dyn FileFormatFactory>>>,
+    config: Option<SessionConfig>,
+    table_options: Option<TableOptions>,
+    execution_props: Option<ExecutionProps>,
+    table_factories: Option<HashMap<String, Arc<dyn TableProviderFactory>>>,
+    runtime_env: Option<Arc<RuntimeEnv>>,
+    function_factory: Option<Arc<dyn FunctionFactory>>,
+    // fields to support convenience functions
+    analyzer_rules: Option<Vec<Arc<dyn AnalyzerRule + Send + Sync>>>,
+    optimizer_rules: Option<Vec<Arc<dyn OptimizerRule + Send + Sync>>>,
+    physical_optimizer_rules: Option<Vec<Arc<dyn PhysicalOptimizerRule + Send 
+ Sync>>>,
+}
+
+impl SessionStateBuilder {
+    /// Returns a new [`SessionStateBuilder`] with no options set.
+    pub fn new() -> Self {
+        Self {
+            session_id: None,
+            analyzer: None,
+            expr_planners: None,
+            optimizer: None,
+            physical_optimizers: None,
+            query_planner: None,
+            catalog_list: None,
+            table_functions: None,
+            scalar_functions: None,
+            aggregate_functions: None,
+            window_functions: None,
+            serializer_registry: None,
+            file_formats: None,
+            table_options: None,
+            config: None,
+            execution_props: None,
+            table_factories: None,
+            runtime_env: None,
+            function_factory: None,
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Returns a new [SessionStateBuilder] based on an existing [SessionState]
+    /// The session id for the new builder will be unset; all other fields will
+    /// be cloned from what is set in the provided session state
+    pub fn new_from_existing(existing: SessionState) -> Self {
+        let cloned = existing.clone();
+
+        Self {
+            session_id: None,
+            analyzer: Some(cloned.analyzer),
+            expr_planners: Some(cloned.expr_planners),
+            optimizer: Some(cloned.optimizer),
+            physical_optimizers: Some(cloned.physical_optimizers),
+            query_planner: Some(cloned.query_planner),
+            catalog_list: Some(cloned.catalog_list),
+            table_functions: Some(cloned.table_functions),
+            scalar_functions: 
Some(cloned.scalar_functions.into_values().collect_vec()),
+            aggregate_functions: Some(
+                cloned.aggregate_functions.into_values().collect_vec(),
+            ),
+            window_functions: 
Some(cloned.window_functions.into_values().collect_vec()),
+            serializer_registry: Some(cloned.serializer_registry),
+            file_formats: 
Some(cloned.file_formats.into_values().collect_vec()),
+            config: Some(cloned.config),
+            table_options: Some(cloned.table_options),
+            execution_props: Some(cloned.execution_props),
+            table_factories: Some(cloned.table_factories),
+            runtime_env: Some(cloned.runtime_env),
+            function_factory: cloned.function_factory,
+
+            // fields to support convenience functions
+            analyzer_rules: None,
+            optimizer_rules: None,
+            physical_optimizer_rules: None,
+        }
+    }
+
+    /// Set defaults for table_factories, file formats, expr_planners and 
builtin
+    /// scalar and aggregate functions.
+    pub fn with_default_features(mut self) -> Self {
+        self.table_factories = 
Some(SessionStateDefaults::default_table_factories());
+        self.file_formats = Some(SessionStateDefaults::default_file_formats());
+        self.expr_planners = 
Some(SessionStateDefaults::default_expr_planners());
+        self.scalar_functions = 
Some(SessionStateDefaults::default_scalar_functions());
+        self.aggregate_functions =
+            Some(SessionStateDefaults::default_aggregate_functions());
+        self
+    }
+
+    /// Set the session id.
+    pub fn with_session_id(mut self, session_id: String) -> Self {
+        self.session_id = Some(session_id);
+        self
+    }
+
+    /// Set the [`AnalyzerRule`]s optimizer plan rules.
+    pub fn with_analyzer_rules(
+        mut self,
+        rules: Vec<Arc<dyn AnalyzerRule + Send + Sync>>,
+    ) -> Self {
+        self.analyzer = Some(Analyzer::with_rules(rules));
+        self
+    }
+
+    /// Add `analyzer_rule` to the end of the list of
+    /// [`AnalyzerRule`]s used to rewrite queries.
+    pub fn add_analyzer_rule(
+        mut self,
+        analyzer_rule: Arc<dyn AnalyzerRule + Send + Sync>,
+    ) -> Self {
+        let mut rules = self.analyzer_rules.unwrap_or_default();
+        rules.push(analyzer_rule);
+        self.analyzer_rules = Some(rules);
+        self
+    }
+
+    /// Set the [`OptimizerRule`]s used to optimize plans.
+    pub fn with_optimizer_rules(
+        mut self,
+        rules: Vec<Arc<dyn OptimizerRule + Send + Sync>>,
+    ) -> Self {
+        self.optimizer = Some(Optimizer::with_rules(rules));
+        self
+    }
+
+    /// Add `optimizer_rule` to the end of the list of
+    /// [`OptimizerRule`]s used to rewrite queries.
+    pub fn add_optimizer_rule(

Review Comment:
   likewise it would be great to call this `with_optimizer_rule`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to