This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 03fc5e26b866 [SPARK-46600][SQL] Move shared code between SqlConf and SqlApiConf to SqlApiConfHelper 03fc5e26b866 is described below commit 03fc5e26b866491b52f89f4d24beade7d1669a37 Author: Rui Wang <rui.w...@databricks.com> AuthorDate: Mon Jan 8 22:22:06 2024 -0400 [SPARK-46600][SQL] Move shared code between SqlConf and SqlApiConf to SqlApiConfHelper ### What changes were proposed in this pull request? This code proposes to introduce a new object named `SqlApiConfHelper` to contain shared code between `SqlApiConf` and `SqlConf`. ### Why are the changes needed? As of now, SqlConf will access some of the variables of SqlApiConf while SqlApiConf also try to initialize SqlConf upon initialization. This PR is to avoid potential circular dependency between SqlConf and SqlApiConf. The shared variables or access to the shared variables are moved to the new `SqlApiConfHelper`. So either SqlApiConf and SqlConf wants to initialize the other side, they will only initialize the same third object. ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? Existing UT ### Was this patch authored or co-authored using generative AI tooling? NO Closes #44602 from amaliujia/refactor_sql_api. Authored-by: Rui Wang <rui.w...@databricks.com> Signed-off-by: Herman van Hovell <her...@databricks.com> --- .../org/apache/spark/sql/internal/SqlApiConf.scala | 26 ++++-------- .../spark/sql/internal/SqlApiConfHelper.scala | 48 ++++++++++++++++++++++ .../org/apache/spark/sql/internal/SQLConf.scala | 12 +++--- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala index d746e9037ec4..5ec72b83837e 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.internal import java.util.TimeZone -import java.util.concurrent.atomic.AtomicReference import scala.util.Try @@ -48,25 +47,14 @@ private[sql] trait SqlApiConf { private[sql] object SqlApiConf { // Shared keys. - val ANSI_ENABLED_KEY: String = "spark.sql.ansi.enabled" - val LEGACY_TIME_PARSER_POLICY_KEY: String = "spark.sql.legacy.timeParserPolicy" - val CASE_SENSITIVE_KEY: String = "spark.sql.caseSensitive" - val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone" - val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = "spark.sql.session.localRelationCacheThreshold" + val ANSI_ENABLED_KEY: String = SqlApiConfHelper.ANSI_ENABLED_KEY + val LEGACY_TIME_PARSER_POLICY_KEY: String = SqlApiConfHelper.LEGACY_TIME_PARSER_POLICY_KEY + val CASE_SENSITIVE_KEY: String = SqlApiConfHelper.CASE_SENSITIVE_KEY + val SESSION_LOCAL_TIMEZONE_KEY: String = SqlApiConfHelper.SESSION_LOCAL_TIMEZONE_KEY + val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = + SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY - /** - * Defines a getter that returns the [[SqlApiConf]] within scope. - */ - private val confGetter = new AtomicReference[() => SqlApiConf](() => DefaultSqlApiConf) - - /** - * Sets the active config getter. - */ - private[sql] def setConfGetter(getter: () => SqlApiConf): Unit = { - confGetter.set(getter) - } - - def get: SqlApiConf = confGetter.get()() + def get: SqlApiConf = SqlApiConfHelper.getConfGetter.get()() // Force load SQLConf. This will trigger the installation of a confGetter that points to SQLConf. Try(SparkClassUtils.classForName("org.apache.spark.sql.internal.SQLConf$")) diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala new file mode 100644 index 000000000000..79b6cb9231c5 --- /dev/null +++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.internal + +import java.util.concurrent.atomic.AtomicReference + +/** + * SqlApiConfHelper is created to avoid a deadlock during a concurrent access to SQLConf and + * SqlApiConf, which is because SQLConf and SqlApiConf tries to load each other upon + * initializations. SqlApiConfHelper is private to sql package and is not supposed to be + * accessed by end users. Variables and methods within SqlApiConfHelper are defined to + * be used by SQLConf and SqlApiConf only. + */ +private[sql] object SqlApiConfHelper { + // Shared keys. + val ANSI_ENABLED_KEY: String = "spark.sql.ansi.enabled" + val LEGACY_TIME_PARSER_POLICY_KEY: String = "spark.sql.legacy.timeParserPolicy" + val CASE_SENSITIVE_KEY: String = "spark.sql.caseSensitive" + val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone" + val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = "spark.sql.session.localRelationCacheThreshold" + + val confGetter: AtomicReference[() => SqlApiConf] = { + new AtomicReference[() => SqlApiConf](() => DefaultSqlApiConf) + } + + def getConfGetter: AtomicReference[() => SqlApiConf] = confGetter + + /** + * Sets the active config getter. + */ + def setConfGetter(getter: () => SqlApiConf): Unit = { + confGetter.set(getter) + } +} diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index d54cb3756638..fbceea4e5f8e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -182,7 +182,7 @@ object SQLConf { // Make sure SqlApiConf is always in sync with SQLConf. SqlApiConf will always try to // load SqlConf to make sure both classes are in sync from the get go. - SqlApiConf.setConfGetter(() => SQLConf.get) + SqlApiConfHelper.setConfGetter(() => SQLConf.get) /** * Returns the active config object within the current scope. If there is an active SparkSession, @@ -915,7 +915,7 @@ object SQLConf { .booleanConf .createWithDefault(false) - val CASE_SENSITIVE = buildConf(SqlApiConf.CASE_SENSITIVE_KEY) + val CASE_SENSITIVE = buildConf(SqlApiConfHelper.CASE_SENSITIVE_KEY) .internal() .doc("Whether the query analyzer should be case sensitive or not. " + "Default to case insensitive. It is highly discouraged to turn on case sensitive mode.") @@ -2757,7 +2757,7 @@ object SQLConf { Try { DateTimeUtils.getZoneId(zone) }.isSuccess } - val SESSION_LOCAL_TIMEZONE = buildConf(SqlApiConf.SESSION_LOCAL_TIMEZONE_KEY) + val SESSION_LOCAL_TIMEZONE = buildConf(SqlApiConfHelper.SESSION_LOCAL_TIMEZONE_KEY) .doc("The ID of session local timezone in the format of either region-based zone IDs or " + "zone offsets. Region IDs must have the form 'area/city', such as 'America/Los_Angeles'. " + "Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm' or '(+|-)HH:mm:ss', e.g '-08', " + @@ -3281,7 +3281,7 @@ object SQLConf { .checkValues(StoreAssignmentPolicy.values.map(_.toString)) .createWithDefault(StoreAssignmentPolicy.ANSI.toString) - val ANSI_ENABLED = buildConf(SqlApiConf.ANSI_ENABLED_KEY) + val ANSI_ENABLED = buildConf(SqlApiConfHelper.ANSI_ENABLED_KEY) .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being Hive compliant. " + "For example, Spark will throw an exception at runtime instead of returning null results " + "when the inputs to a SQL operator/function are invalid." + @@ -3914,7 +3914,7 @@ object SQLConf { .booleanConf .createWithDefault(false) - val LEGACY_TIME_PARSER_POLICY = buildConf(SqlApiConf.LEGACY_TIME_PARSER_POLICY_KEY) + val LEGACY_TIME_PARSER_POLICY = buildConf(SqlApiConfHelper.LEGACY_TIME_PARSER_POLICY_KEY) .internal() .doc("When LEGACY, java.text.SimpleDateFormat is used for formatting and parsing " + "dates/timestamps in a locale-sensitive manner, which is the approach before Spark 3.0. " + @@ -4476,7 +4476,7 @@ object SQLConf { .createWithDefault(false) val LOCAL_RELATION_CACHE_THRESHOLD = - buildConf(SqlApiConf.LOCAL_RELATION_CACHE_THRESHOLD_KEY) + buildConf(SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY) .doc("The threshold for the size in bytes of local relations to be cached at " + "the driver side after serialization.") .version("3.5.0") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org