This is an automated email from the ASF dual-hosted git repository. xudong963 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new 4c36226bc4 Docs: Add Examples to Config Options page (#17039) 4c36226bc4 is described below commit 4c36226bc4a4a82c42a24a07c2ba068891f981e4 Author: Andrew Lamb <and...@nerdnetworks.org> AuthorDate: Tue Aug 5 07:15:36 2025 -0400 Docs: Add Examples to Config Options page (#17039) --- datafusion/common/src/config.rs | 20 +++++++++++++---- datafusion/execution/src/config.rs | 3 +++ dev/update_config_docs.sh | 45 +++++++++++++++++++++++++++++-------- docs/source/user-guide/configs.md | 46 ++++++++++++++++++++++++++++++-------- 4 files changed, 92 insertions(+), 22 deletions(-) diff --git a/datafusion/common/src/config.rs b/datafusion/common/src/config.rs index a2c5d08d52..e5ddb8459b 100644 --- a/datafusion/common/src/config.rs +++ b/datafusion/common/src/config.rs @@ -1008,11 +1008,23 @@ impl ConfigOptions { e.0.set(key, value) } - /// Create new ConfigOptions struct, taking values from - /// environment variables where possible. + /// Create new [`ConfigOptions`], taking values from environment variables + /// where possible. /// - /// For example, setting `DATAFUSION_EXECUTION_BATCH_SIZE` will - /// control `datafusion.execution.batch_size`. + /// For example, to configure `datafusion.execution.batch_size` + /// ([`ExecutionOptions::batch_size`]) you would set the + /// `DATAFUSION_EXECUTION_BATCH_SIZE` environment variable. + /// + /// The name of the environment variable is the option's key, transformed to + /// uppercase and with periods replaced with underscores. + /// + /// Values are parsed according to the [same rules used in casts from + /// Utf8](https://docs.rs/arrow/latest/arrow/compute/kernels/cast/fn.cast.html). + /// + /// If the value in the environment variable cannot be cast to the type of + /// the configuration option, the default value will be used instead and a + /// warning emitted. Environment variables are read when this method is + /// called, and are not re-read later. pub fn from_env() -> Result<Self> { struct Visitor(Vec<String>); diff --git a/datafusion/execution/src/config.rs b/datafusion/execution/src/config.rs index c1ee2820c0..ccda6dc4d3 100644 --- a/datafusion/execution/src/config.rs +++ b/datafusion/execution/src/config.rs @@ -117,6 +117,9 @@ impl SessionConfig { } /// Create an execution config with config options read from the environment + /// + /// See [`ConfigOptions::from_env`] for details on how environment variables + /// are mapped to config options. pub fn from_env() -> Result<Self> { Ok(ConfigOptions::from_env()?.into()) } diff --git a/dev/update_config_docs.sh b/dev/update_config_docs.sh index 10f82ce945..7baee8ee00 100755 --- a/dev/update_config_docs.sh +++ b/dev/update_config_docs.sh @@ -50,22 +50,49 @@ cat <<'EOF' > "$TARGET_FILE" --> <!--- -This file was generated by the dev/update_config_docs.sh script. +NOTE: This file was generated by the dev/update_config_docs.sh script. Do not edit it manually as changes will be overwritten. Instead, edit dev/update_config_docs.sh or the docstrings in datafusion/core/src/config.rs. --> # Configuration Settings -The following configuration options can be passed to `SessionConfig` to control various aspects of query execution. +DataFusion configurations control various aspects of DataFusion planning and execution -For applications which do not expose `SessionConfig`, like `datafusion-cli`, these options may also be set via environment variables. -To construct a session with options from the environment, use `SessionConfig::from_env`. -The name of the environment variable is the option's key, transformed to uppercase and with periods replaced with underscores. -For example, to configure `datafusion.execution.batch_size` you would set the `DATAFUSION_EXECUTION_BATCH_SIZE` environment variable. -Values are parsed according to the [same rules used in casts from Utf8](https://docs.rs/arrow/latest/arrow/compute/kernels/cast/fn.cast.html). -If the value in the environment variable cannot be cast to the type of the configuration option, the default value will be used instead and a warning emitted. -Environment variables are read during `SessionConfig` initialisation so they must be set beforehand and will not affect running sessions. +## Setting Configuration Options + +### Programmatically +You can set the options programmatically via the [`ConfigOptions`] object. For +example, to configure the `datafusion.execution.target_partitions` using the API: + +```rust +use datafusion::common::config::ConfigOptions; +let mut config = ConfigOptions::new(); +config.execution.target_partitions = 1; +``` + +### Via Environment Variables + +You can also set configuration options via environment variables using +[`ConfigOptions::from_env`], for example + +```shell +DATAFUSION_EXECUTION_TARGET_PARTITIONS=1 ./your_program +``` + +### Via SQL + +You can also set configuration options via SQL using the `SET` command. For +example, to configure `datafusion.execution.target_partitions`: + +```sql +SET datafusion.execution.target_partitions = '1'; +``` + +[`ConfigOptions`]: https://docs.rs/datafusion/latest/datafusion/common/config/struct.ConfigOptions.html +[`ConfigOptions::from_env`]: https://docs.rs/datafusion/latest/datafusion/common/config/struct.ConfigOptions.html#method.from_env + +The following configuration settings are available: EOF diff --git a/docs/source/user-guide/configs.md b/docs/source/user-guide/configs.md index 7c760e7de4..c817daad2c 100644 --- a/docs/source/user-guide/configs.md +++ b/docs/source/user-guide/configs.md @@ -18,22 +18,50 @@ --> <!--- -This file was generated by the dev/update_config_docs.sh script. +NOTE: This file was generated by the dev/update_config_docs.sh script. Do not edit it manually as changes will be overwritten. Instead, edit dev/update_config_docs.sh or the docstrings in datafusion/core/src/config.rs. --> # Configuration Settings -The following configuration options can be passed to `SessionConfig` to control various aspects of query execution. +DataFusion configurations control various aspects of DataFusion planning and execution -For applications which do not expose `SessionConfig`, like `datafusion-cli`, these options may also be set via environment variables. -To construct a session with options from the environment, use `SessionConfig::from_env`. -The name of the environment variable is the option's key, transformed to uppercase and with periods replaced with underscores. -For example, to configure `datafusion.execution.batch_size` you would set the `DATAFUSION_EXECUTION_BATCH_SIZE` environment variable. -Values are parsed according to the [same rules used in casts from Utf8](https://docs.rs/arrow/latest/arrow/compute/kernels/cast/fn.cast.html). -If the value in the environment variable cannot be cast to the type of the configuration option, the default value will be used instead and a warning emitted. -Environment variables are read during `SessionConfig` initialisation so they must be set beforehand and will not affect running sessions. +## Setting Configuration Options + +### Programmatically + +You can set the options programmatically via the [`ConfigOptions`] object. For +example, to configure the `datafusion.execution.target_partitions` using the API: + +```rust +use datafusion::common::config::ConfigOptions; +let mut config = ConfigOptions::new(); +config.execution.target_partitions = 1; +``` + +### Via Environment Variables + +You can also set configuration options via environment variables using +[`ConfigOptions::from_env`], for example + +```shell +DATAFUSION_EXECUTION_TARGET_PARTITIONS=1 ./your_program +``` + +### Via SQL + +You can also set configuration options via SQL using the `SET` command. For +example, to configure `datafusion.execution.target_partitions`: + +```sql +SET datafusion.execution.target_partitions = '1'; +``` + +[`configoptions`]: https://docs.rs/datafusion/latest/datafusion/common/config/struct.ConfigOptions.html +[`configoptions::from_env`]: https://docs.rs/datafusion/latest/datafusion/common/config/struct.ConfigOptions.html#method.from_env + +The following configuration settings are available: | key | default | description [...] | ----------------------------------------------------------------------- | ------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- [...] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org