This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 2c2062e [SPARK-31498][SQL][DOCS] Dump public static sql
configurations through doc generation
2c2062e is described below
commit 2c2062ea7c77780ea017cf94e4258a09fe34c6ac
Author: Kent Yao <[email protected]>
AuthorDate: Wed Apr 22 10:16:39 2020 +0000
[SPARK-31498][SQL][DOCS] Dump public static sql configurations through doc
generation
### What changes were proposed in this pull request?
Currently, only the non-static public SQL configurations are dump to public
doc, we'd better also add those static public ones as the command `set -v`
This PR force call StaticSQLConf to buildStaticConf.
### Why are the changes needed?
Fix missing SQL configurations in doc
### Does this PR introduce any user-facing change?
NO
### How was this patch tested?
add unit test and verify locally to see if public static SQL conf is in
`docs/sql-config.html`
Closes #28274 from yaooqinn/SPARK-31498.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Wenchen Fan <[email protected]>
---
docs/configuration.md | 27 ++++++++-
.../spark/sql/api/python/PythonSQLUtils.scala | 13 ++++-
.../spark/sql/api/python/PythonSQLUtilsSuite.scala | 64 ++++++++++++++++++++++
sql/create-docs.sh | 7 ++-
sql/gen-sql-config-docs.py | 21 +++++--
5 files changed, 119 insertions(+), 13 deletions(-)
diff --git a/docs/configuration.md b/docs/configuration.md
index e322247..6faa5e7 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -2622,11 +2622,32 @@ Please refer to the [Security](security.html) page for
available options on how
Spark subsystems.
-{% for static_file in site.static_files %}
- {% if static_file.name == 'generated-sql-configuration-table.html' %}
### Spark SQL
-{% include_relative generated-sql-configuration-table.html %}
+#### Runtime SQL Configuration
+
+Runtime SQL configurations are per-session, mutable Spark SQL configurations.
They can be set with initial values by the config file
+and command-line options with `--conf/-c` prefixed, or by setting `SparkConf`
that are used to create `SparkSession`.
+Also, they can be set and queried by SET commands and rest to their initial
values by RESET command,
+or by `SparkSession.conf`'s setter and getter methods in runtime.
+
+{% for static_file in site.static_files %}
+ {% if static_file.name == 'generated-runtime-sql-config-table.html' %}
+ {% include_relative generated-runtime-sql-config-table.html %}
+ {% break %}
+ {% endif %}
+{% endfor %}
+
+
+#### Static SQL Configuration
+
+Static SQL configurations are cross-session, immutable Spark SQL
configurations. They can be set with final values by the config file
+and command-line options with `--conf/-c` prefixed, or by setting `SparkConf`
that are used to create `SparkSession`.
+External users can query the static sql config values via `SparkSession.conf`
or via set command, e.g. `SET spark.sql.extensions;`, but cannot set/unset them.
+
+{% for static_file in site.static_files %}
+ {% if static_file.name == 'generated-static-sql-config-table.html' %}
+ {% include_relative generated-static-sql-config-table.html %}
{% break %}
{% endif %}
{% endfor %}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
index 03f5a60..2e5f59e 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@@ -23,13 +23,13 @@ import java.nio.channels.Channels
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.python.PythonRDDServer
import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, SQLContext}
+import org.apache.spark.sql.{DataFrame, SQLContext}
import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
import org.apache.spark.sql.catalyst.expressions.ExpressionInfo
import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
import org.apache.spark.sql.execution.arrow.ArrowConverters
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
import org.apache.spark.sql.types.DataType
private[sql] object PythonSQLUtils {
@@ -43,7 +43,14 @@ private[sql] object PythonSQLUtils {
def listSQLConfigs(): Array[(String, String, String, String)] = {
val conf = new SQLConf()
// Py4J doesn't seem to translate Seq well, so we convert to an Array.
- conf.getAllDefinedConfs.toArray
+ conf.getAllDefinedConfs.filterNot(p =>
SQLConf.staticConfKeys.contains(p._1)).toArray
+ }
+
+ def listStaticSQLConfigs(): Array[(String, String, String, String)] = {
+ val conf = new SQLConf()
+ // Force to build static SQL configurations
+ StaticSQLConf
+ conf.getAllDefinedConfs.filter(p =>
SQLConf.staticConfKeys.contains(p._1)).toArray
}
/**
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala
new file mode 100644
index 0000000..0d18d12
--- /dev/null
+++
b/sql/core/src/test/scala/org/apache/spark/sql/api/python/PythonSQLUtilsSuite.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.python
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+
+class PythonSQLUtilsSuite extends SparkFunSuite {
+
+ test("listing sql configurations contains runtime ones only") {
+ val configs = PythonSQLUtils.listSQLConfigs()
+
+ // static sql configurations
+ assert(!configs.exists(entry => entry._1 ==
StaticSQLConf.SPARK_SESSION_EXTENSIONS.key),
+ "listSQLConfigs should contain public static sql configuration")
+ assert(!configs.exists(entry => entry._1 == StaticSQLConf.DEBUG_MODE.key),
+ "listSQLConfigs should not contain internal static sql configuration")
+
+ // dynamic sql configurations
+ assert(configs.exists(entry => entry._1 ==
SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key),
+ "listSQLConfigs should contain public dynamic sql configuration")
+ assert(!configs.exists(entry => entry._1 ==
SQLConf.ANALYZER_MAX_ITERATIONS.key),
+ "listSQLConfigs should not contain internal dynamic sql configuration")
+
+ // spark core configurations
+ assert(!configs.exists(entry => entry._1 == "spark.master"),
+ "listSQLConfigs should not contain core configuration")
+ }
+
+ test("listing static sql configurations contains public static ones only") {
+ val configs = PythonSQLUtils.listStaticSQLConfigs()
+
+ // static sql configurations
+ assert(configs.exists(entry => entry._1 ==
StaticSQLConf.SPARK_SESSION_EXTENSIONS.key),
+ "listStaticSQLConfigs should contain public static sql configuration")
+ assert(!configs.exists(entry => entry._1 == StaticSQLConf.DEBUG_MODE.key),
+ "listStaticSQLConfigs should not contain internal static sql
configuration")
+
+ // dynamic sql configurations
+ assert(!configs.exists(entry => entry._1 ==
SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key),
+ "listStaticSQLConfigs should not contain dynamic sql configuration")
+ assert(!configs.exists(entry => entry._1 ==
SQLConf.ANALYZER_MAX_ITERATIONS.key),
+ "listStaticSQLConfigs should not contain internal dynamic sql
configuration")
+
+ // spark core configurations
+ assert(!configs.exists(entry => entry._1 == "spark.master"),
+ "listStaticSQLConfigs should not contain core configuration")
+ }
+}
diff --git a/sql/create-docs.sh b/sql/create-docs.sh
index 6614c71..336afc4 100755
--- a/sql/create-docs.sh
+++ b/sql/create-docs.sh
@@ -45,8 +45,11 @@ mkdir docs
echo "Generating SQL API Markdown files."
"$SPARK_HOME/bin/spark-submit" gen-sql-api-docs.py
-echo "Generating SQL configuration table HTML file."
-"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py
+echo "Generating runtime SQL runtime configuration table HTML file."
+"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py runtime
+
+echo "Generating static SQL configuration table HTML file."
+"$SPARK_HOME/bin/spark-submit" gen-sql-config-docs.py static
echo "Generating HTML files for SQL function table and examples."
"$SPARK_HOME/bin/spark-submit" gen-sql-functions-docs.py
diff --git a/sql/gen-sql-config-docs.py b/sql/gen-sql-config-docs.py
index 0043c41..848d2f2 100644
--- a/sql/gen-sql-config-docs.py
+++ b/sql/gen-sql-config-docs.py
@@ -17,6 +17,7 @@
import os
import re
+import sys
from collections import namedtuple
from textwrap import dedent
@@ -30,7 +31,11 @@ SQLConfEntry = namedtuple(
"SQLConfEntry", ["name", "default", "description", "version"])
-def get_public_sql_configs(jvm):
+def get_public_sql_configs(jvm, group):
+ if group == "static":
+ config_set =
jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listStaticSQLConfigs()
+ else:
+ config_set =
jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
sql_configs = [
SQLConfEntry(
name=_sql_config._1(),
@@ -38,7 +43,7 @@ def get_public_sql_configs(jvm):
description=_sql_config._3(),
version=_sql_config._4()
)
- for _sql_config in
jvm.org.apache.spark.sql.api.python.PythonSQLUtils.listSQLConfigs()
+ for _sql_config in config_set
]
return sql_configs
@@ -114,11 +119,17 @@ def generate_sql_configs_table_html(sql_configs, path):
if __name__ == "__main__":
+ if len(sys.argv) != 2:
+ print("Usage: ./bin/spark-submit sql/gen-sql-config-docs.py
<static|runtime>")
+ sys.exit(-1)
+ else:
+ group = sys.argv[1]
+
jvm = launch_gateway().jvm
- sql_configs = get_public_sql_configs(jvm)
+ sql_configs = get_public_sql_configs(jvm, group)
spark_root_dir = os.path.dirname(os.path.dirname(__file__))
- sql_configs_table_path = os.path.join(
- spark_root_dir, "docs/generated-sql-configuration-table.html")
+ sql_configs_table_path = os.path\
+ .join(spark_root_dir, "docs", "generated-" + group +
"-sql-config-table.html")
generate_sql_configs_table_html(sql_configs, path=sql_configs_table_path)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]