This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 5c396bc [SPARK-31498][SQL][DOCS] Dump public static sql configurations through doc generation 5c396bc is described below commit 5c396bc22575a805df7e97f587207af7c214df42 Author: Kent Yao <yaooq...@hotmail.com> AuthorDate: Wed Apr 22 10:16:39 2020 +0000 [SPARK-31498][SQL][DOCS] Dump public static sql configurations through doc generation ### What changes were proposed in this pull request? Currently, only the non-static public SQL configurations are dump to public doc, we'd better also add those static public ones as the command `set -v` This PR force call StaticSQLConf to buildStaticConf. ### Why are the changes needed? Fix missing SQL configurations in doc ### Does this PR introduce any user-facing change? NO ### How was this patch tested? add unit test and verify locally to see if public static SQL conf is in `docs/sql-config.html` Closes #28274 from yaooqinn/SPARK-31498. Authored-by: Kent Yao <yaooq...@hotmail.com> Signed-off-by: Wenchen Fan <wenc...@databricks.com> (cherry picked from commit 2c2062ea7c77780ea017cf94e4258a09fe34c6ac) Signed-off-by: Wenchen Fan <wenc...@databricks.com> --- docs/configuration.md | 27 ++++++++- .../spark/sql/api/python/PythonSQLUtils.scala | 13 ++++- .../spark/sql/api/python/PythonSQLUtilsSuite.scala | 64 ++++++++++++++++++++++ sql/create-docs.sh | 7 ++- sql/gen-sql-config-docs.py | 21 +++++-- 5 files changed, 119 insertions(+), 13 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 829d062..9e91d08 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2611,11 +2611,32 @@ Please refer to the [Security](security.html) page for available options on how Spark subsystems. -{% for static_file in site.static_files %} - {% if static_file.name == 'generated-sql-configuration-table.html' %} ### Spark SQL -{% include_relative generated-sql-configuration-table.html %} +#### Runtime SQL Configuration + +Runtime SQL configurations are per-session, mutable Spark SQL configurations. They can be set with initial values by the config file +and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`. +Also, they can be set and queried by SET commands and rest to their initial values by RESET command, +or by `SparkSession.conf`'s setter and getter methods in runtime. + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-runtime-sql-config-table.html' %} + {% include_relative generated-runtime-sql-config-table.html %} + {% break %} + {% endif %} +{% endfor %} + + +#### Static SQL Configuration + +Static SQL configurations are cross-session, immutable Spark SQL configurations. They can be set with final values by the config file +and command-line options with `--conf/-c` prefixed, or by setting `SparkConf` that are used to create `SparkSession`. +External users can query the static sql config values via `SparkSession.conf` or via set command, e.g. `SET spark.sql.extensions;`, but cannot set/unset them. + +{% for static_file in site.static_files %} + {% if static_file.name == 'generated-static-sql-config-table.html' %} + {% include_relative generated-static-sql-config-table.html %} {% break %} {% endif %} {% endfor %} diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala index 03f5a60..2e5f59e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala @@ -23,13 +23,13 @@ import java.nio.channels.Channels import org.apache.spark.api.java.JavaRDD import org.apache.spark.api.python.PythonRDDServer import org.apache.spark.rdd.RDD -import org.apache.spark.sql.{DataFrame, Dataset, SQLContext} +import org.apache.spark.sql.{DataFrame, SQLContext} import org.apache.spark.sql.catalyst.analysis.FunctionRegistry import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.parser.CatalystSqlParser import org.apache.spark.sql.execution.{ExplainMode, QueryExecution} import org.apache.spark.sql.execution.arrow.ArrowConverters -import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} import org.apache.spark.sql.types.DataType private[sql] object PythonSQLUtils { @@ -43,7 +43,14 @@ private[sql] object PythonSQLUtils { def listSQLConfigs(): Array[(String, String, String, String)] = { val conf = new SQLConf() // Py4J doesn't seem to translate Seq well, so we convert to an Array. - conf.getAllDefinedConfs.toArray + conf.getAllDefinedConfs.filterNot(p => SQLConf.staticConfKeys.contains(p._1)).toArray + } + + def listStaticSQLConfigs(): Array[(String, String, String, String)] = { + val conf = new SQLConf() + // Force to build static SQL configurations + StaticSQLConf + conf.getAllDefinedConfs.filter(p => SQLConf.staticConfKeys.contains(p._1)).toArray } /** diff --git a/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala new file mode 100644 index 0000000..0d18d12 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.api.python + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf} + +class PythonSQLUtilsSuite extends SparkFunSuite { + + test("listing sql configurations contains runtime ones only") { + val configs = PythonSQLUtils.listSQLConfigs() + + // static sql configurations + assert(!configs.exists(entry => entry._1 == StaticSQLConf.SPARK_SESSION_EXTENSIONS.key), + "listSQLConfigs should contain public static sql configuration") + assert(!configs.exists(entry => entry._1 == StaticSQLConf.DEBUG_MODE.key), + "listSQLConfigs should not contain internal static sql configuration") + + // dynamic sql configurations + assert(configs.exists(entry => entry._1 == SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key), + "listSQLConfigs should contain public dynamic sql configuration") + assert(!configs.exists(entry => entry._1 == SQLConf.ANALYZER_MAX_ITERATIONS.key), + "listSQLConfigs should not contain internal dynamic sql configuration") + + // spark core configurations + assert(!configs.exists(entry => entry._1 == "spark.master"), + "listSQLConfigs should not contain core configuration") + } + + test("listing static sql configurations contains public static ones only") { + val configs = PythonSQLUtils.listStaticSQLConfigs() + + // static sql configurations + assert(configs.exists(entry => entry._1 == StaticSQLConf.SPARK_SESSION_EXTENSIONS.key), + "listStaticSQLConfigs should contain public static sql configuration") + assert(!configs.exists(entry => entry._1 == StaticSQLConf.DEBUG_MODE.key), + "listStaticSQLConfigs should not contain internal static sql configuration") + + // dynamic sql configurations + assert(!configs.exists(entry => entry._1 == SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key), + "listStaticSQLConfigs should not contain dynamic sql configuration") + assert(!configs.exists(entry => entry._1 == SQLConf.ANALYZER_MAX_ITERATIONS.key), + "listStaticSQLConfigs should not contain internal dynamic sql configuration") + + // spark core configurations + assert(!configs.exists(entry => entry._1 == "spark.master"), + "listStaticSQLConfigs should not contain core configuration") + } +} diff --git a/sql/create-docs.sh b/sql/create-docs.sh index 6614c71..336afc4 100755 --- a/sql/create-docs.sh +++ b/sql/create-docs.sh @@ -45,8 +45,11 @@ mkdir docs echo "Generating SQL API Markdown files." "$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py -echo "Generating SQL configuration table HTML file." -"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py +echo "Generating runtime SQL runtime configuration table HTML file." +"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py runtime + +echo "Generating static SQL configuration table HTML file." +"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py static echo "Generating HTML files for SQL function table and examples." "$SPARK_HOME/bin/spark-submit" gen-sql-functions-docs.py diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py index 0043c41..848d2f2 100644 --- a/sql/gen-sql-config-docs.py +++ b/sql/gen-sql-config-docs.py @@ -17,6 +17,7 @@ import os import re +import sys from collections import namedtuple from textwrap import dedent @@ -30,7 +31,11 @@ SQLConfEntry = namedtuple( "SQLConfEntry", ["name", "default", "description", "version"]) -def get_public_sql_configs(jvm): +def get_public_sql_configs(jvm, group): + if group == "static": + config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listStaticSQLConfigs() + else: + config_set = jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() sql_configs = [ SQLConfEntry( name=_sql_config._1(), @@ -38,7 +43,7 @@ def get_public_sql_configs(jvm): description=_sql_config._3(), version=_sql_config._4() ) - for _sql_config in jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs() + for _sql_config in config_set ] return sql_configs @@ -114,11 +119,17 @@ def generate_sql_configs_table_html(sql_configs, path): if __name__ == "__main__": + if len(sys.argv) != 2: + print("Usage: ./bin/spark-submit sql/gen-sql-config-docs.py <static|runtime>") + sys.exit(-1) + else: + group = sys.argv[1] + jvm = launch_gateway().jvm - sql_configs = get_public_sql_configs(jvm) + sql_configs = get_public_sql_configs(jvm, group) spark_root_dir = os.path.dirname(os.path.dirname(__file__)) - sql_configs_table_path = os.path.join( - spark_root_dir, "docs/generated-sql-configuration-table.html") + sql_configs_table_path = os.path\ + .join(spark_root_dir, "docs", "generated-" + group + "-sql-config-table.html") generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org