(spark) branch master updated: [SPARK-46600][SQL] Move shared code between SqlConf and SqlApiConf to SqlApiConfHelper

hvanhovell Mon, 08 Jan 2024 18:22:25 -0800

This is an automated email from the ASF dual-hosted git repository.

hvanhovell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 03fc5e26b866 [SPARK-46600][SQL] Move shared code between SqlConf and 
SqlApiConf to SqlApiConfHelper
03fc5e26b866 is described below

commit 03fc5e26b866491b52f89f4d24beade7d1669a37
Author: Rui Wang <rui.w...@databricks.com>
AuthorDate: Mon Jan 8 22:22:06 2024 -0400

    [SPARK-46600][SQL] Move shared code between SqlConf and SqlApiConf to 
SqlApiConfHelper
    
    ### What changes were proposed in this pull request?
    
    This code proposes to introduce a new object named `SqlApiConfHelper` to 
contain shared code between `SqlApiConf` and `SqlConf`.
    
    ### Why are the changes needed?
    
    As of now, SqlConf will access some of the variables of SqlApiConf while 
SqlApiConf also try to initialize SqlConf upon initialization.  This PR is to 
avoid potential circular dependency between SqlConf and SqlApiConf. The shared 
variables or access to the shared variables are moved to the new 
`SqlApiConfHelper`. So either SqlApiConf and SqlConf wants to initialize the 
other side, they will only initialize the same third object.
    
    ### Does this PR introduce _any_ user-facing change?
    
    NO
    
    ### How was this patch tested?
    
    Existing UT
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    NO
    
    Closes #44602 from amaliujia/refactor_sql_api.
    
    Authored-by: Rui Wang <rui.w...@databricks.com>
    Signed-off-by: Herman van Hovell <her...@databricks.com>
---
 .../org/apache/spark/sql/internal/SqlApiConf.scala | 26 ++++--------
 .../spark/sql/internal/SqlApiConfHelper.scala      | 48 ++++++++++++++++++++++
 .../org/apache/spark/sql/internal/SQLConf.scala    | 12 +++---
 3 files changed, 61 insertions(+), 25 deletions(-)

diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
index d746e9037ec4..5ec72b83837e 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
@@ -17,7 +17,6 @@
 package org.apache.spark.sql.internal
 
 import java.util.TimeZone
-import java.util.concurrent.atomic.AtomicReference
 
 import scala.util.Try
 
@@ -48,25 +47,14 @@ private[sql] trait SqlApiConf {
 
 private[sql] object SqlApiConf {
   // Shared keys.
-  val ANSI_ENABLED_KEY: String = "spark.sql.ansi.enabled"
-  val LEGACY_TIME_PARSER_POLICY_KEY: String = 
"spark.sql.legacy.timeParserPolicy"
-  val CASE_SENSITIVE_KEY: String = "spark.sql.caseSensitive"
-  val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone"
-  val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = 
"spark.sql.session.localRelationCacheThreshold"
+  val ANSI_ENABLED_KEY: String = SqlApiConfHelper.ANSI_ENABLED_KEY
+  val LEGACY_TIME_PARSER_POLICY_KEY: String = 
SqlApiConfHelper.LEGACY_TIME_PARSER_POLICY_KEY
+  val CASE_SENSITIVE_KEY: String = SqlApiConfHelper.CASE_SENSITIVE_KEY
+  val SESSION_LOCAL_TIMEZONE_KEY: String = 
SqlApiConfHelper.SESSION_LOCAL_TIMEZONE_KEY
+  val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String =
+    SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY
 
-  /**
-   * Defines a getter that returns the [[SqlApiConf]] within scope.
-   */
-  private val confGetter = new AtomicReference[() => SqlApiConf](() => 
DefaultSqlApiConf)
-
-  /**
-   * Sets the active config getter.
-   */
-  private[sql] def setConfGetter(getter: () => SqlApiConf): Unit = {
-    confGetter.set(getter)
-  }
-
-  def get: SqlApiConf = confGetter.get()()
+  def get: SqlApiConf = SqlApiConfHelper.getConfGetter.get()()
 
   // Force load SQLConf. This will trigger the installation of a confGetter 
that points to SQLConf.
   Try(SparkClassUtils.classForName("org.apache.spark.sql.internal.SQLConf$"))
diff --git 
a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala 
b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
new file mode 100644
index 000000000000..79b6cb9231c5
--- /dev/null
+++ 
b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import java.util.concurrent.atomic.AtomicReference
+
+/**
+ * SqlApiConfHelper is created to avoid a deadlock during a concurrent access 
to SQLConf and
+ * SqlApiConf, which is because SQLConf and SqlApiConf tries to load each 
other upon
+ * initializations. SqlApiConfHelper is private to sql package and is not 
supposed to be
+ * accessed by end users. Variables and methods within SqlApiConfHelper are 
defined to
+ * be used by SQLConf and SqlApiConf only.
+ */
+private[sql] object SqlApiConfHelper {
+  // Shared keys.
+  val ANSI_ENABLED_KEY: String = "spark.sql.ansi.enabled"
+  val LEGACY_TIME_PARSER_POLICY_KEY: String = 
"spark.sql.legacy.timeParserPolicy"
+  val CASE_SENSITIVE_KEY: String = "spark.sql.caseSensitive"
+  val SESSION_LOCAL_TIMEZONE_KEY: String = "spark.sql.session.timeZone"
+  val LOCAL_RELATION_CACHE_THRESHOLD_KEY: String = 
"spark.sql.session.localRelationCacheThreshold"
+
+  val confGetter: AtomicReference[() => SqlApiConf] = {
+    new AtomicReference[() => SqlApiConf](() => DefaultSqlApiConf)
+  }
+
+  def getConfGetter: AtomicReference[() => SqlApiConf] = confGetter
+
+  /**
+   * Sets the active config getter.
+   */
+  def setConfGetter(getter: () => SqlApiConf): Unit = {
+    confGetter.set(getter)
+  }
+}
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index d54cb3756638..fbceea4e5f8e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -182,7 +182,7 @@ object SQLConf {
 
   // Make sure SqlApiConf is always in sync with SQLConf. SqlApiConf will 
always try to
   // load SqlConf to make sure both classes are in sync from the get go.
-  SqlApiConf.setConfGetter(() => SQLConf.get)
+  SqlApiConfHelper.setConfGetter(() => SQLConf.get)
 
   /**
    * Returns the active config object within the current scope. If there is an 
active SparkSession,
@@ -915,7 +915,7 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
-  val CASE_SENSITIVE = buildConf(SqlApiConf.CASE_SENSITIVE_KEY)
+  val CASE_SENSITIVE = buildConf(SqlApiConfHelper.CASE_SENSITIVE_KEY)
     .internal()
     .doc("Whether the query analyzer should be case sensitive or not. " +
       "Default to case insensitive. It is highly discouraged to turn on case 
sensitive mode.")
@@ -2757,7 +2757,7 @@ object SQLConf {
     Try { DateTimeUtils.getZoneId(zone) }.isSuccess
   }
 
-  val SESSION_LOCAL_TIMEZONE = buildConf(SqlApiConf.SESSION_LOCAL_TIMEZONE_KEY)
+  val SESSION_LOCAL_TIMEZONE = 
buildConf(SqlApiConfHelper.SESSION_LOCAL_TIMEZONE_KEY)
     .doc("The ID of session local timezone in the format of either 
region-based zone IDs or " +
       "zone offsets. Region IDs must have the form 'area/city', such as 
'America/Los_Angeles'. " +
       "Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm' or 
'(+|-)HH:mm:ss', e.g '-08', " +
@@ -3281,7 +3281,7 @@ object SQLConf {
       .checkValues(StoreAssignmentPolicy.values.map(_.toString))
       .createWithDefault(StoreAssignmentPolicy.ANSI.toString)
 
-  val ANSI_ENABLED = buildConf(SqlApiConf.ANSI_ENABLED_KEY)
+  val ANSI_ENABLED = buildConf(SqlApiConfHelper.ANSI_ENABLED_KEY)
     .doc("When true, Spark SQL uses an ANSI compliant dialect instead of being 
Hive compliant. " +
       "For example, Spark will throw an exception at runtime instead of 
returning null results " +
       "when the inputs to a SQL operator/function are invalid." +
@@ -3914,7 +3914,7 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val LEGACY_TIME_PARSER_POLICY = 
buildConf(SqlApiConf.LEGACY_TIME_PARSER_POLICY_KEY)
+  val LEGACY_TIME_PARSER_POLICY = 
buildConf(SqlApiConfHelper.LEGACY_TIME_PARSER_POLICY_KEY)
     .internal()
     .doc("When LEGACY, java.text.SimpleDateFormat is used for formatting and 
parsing " +
       "dates/timestamps in a locale-sensitive manner, which is the approach 
before Spark 3.0. " +
@@ -4476,7 +4476,7 @@ object SQLConf {
       .createWithDefault(false)
 
   val LOCAL_RELATION_CACHE_THRESHOLD =
-    buildConf(SqlApiConf.LOCAL_RELATION_CACHE_THRESHOLD_KEY)
+    buildConf(SqlApiConfHelper.LOCAL_RELATION_CACHE_THRESHOLD_KEY)
       .doc("The threshold for the size in bytes of local relations to be 
cached at " +
         "the driver side after serialization.")
       .version("3.5.0")


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-46600][SQL] Move shared code between SqlConf and SqlApiConf to SqlApiConfHelper

Reply via email to