This is an automated email from the ASF dual-hosted git repository.
yangjie01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 7bd149b60da1 [SPARK-45646][SQL] Remove hardcoding time variables prior
to Hive 2.0
7bd149b60da1 is described below
commit 7bd149b60da1557c4c6c35256908d97a4a03481a
Author: Cheng Pan <[email protected]>
AuthorDate: Tue Oct 24 22:56:10 2023 +0800
[SPARK-45646][SQL] Remove hardcoding time variables prior to Hive 2.0
### What changes were proposed in this pull request?
Remove the following hardcoding time variables prior to Hive 2.0
```
hive.stats.jdbc.timeout
hive.stats.retries.wait
```
### Why are the changes needed?
It's kind of a cleanup since Spark 4.0 only supports Hive 2.0 and above.
The removal also reduces the warning message on `spark-sql` bootstrap.
### Does this PR introduce _any_ user-facing change?
Yes, it reduces the warning message on `spark-sql` bootstrap.
```patch
➜ $ build/sbt clean package -Phive-thriftserver
➜ $ SPARK_PREPEND_CLASSES=true bin/spark-sql
NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark
classes ahead of assembly.
23/10/24 15:42:22 WARN Utils: Your hostname, pop-os resolves to a
loopback address: 127.0.1.1; using 10.221.99.150 instead (on interface wlp61s0)
23/10/24 15:42:22 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to
another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use
setLogLevel(newLevel).
23/10/24 15:42:23 WARN NativeCodeLoader: Unable to load native-hadoop
library for your platform... using builtin-java classes where applicable
- 23/10/24 15:42:25 WARN HiveConf: HiveConf of name hive.stats.jdbc.timeout
does not exist
- 23/10/24 15:42:25 WARN HiveConf: HiveConf of name hive.stats.retries.wait
does not exist
23/10/24 15:42:28 WARN ObjectStore: Version information not found in
metastore. hive.metastore.schema.verification is not enabled so recording the
schema version 2.3.0
23/10/24 15:42:28 WARN ObjectStore: setMetaStoreSchemaVersion called but
recording version is disabled: version = 2.3.0, comment = Set by MetaStore
chengpan127.0.1.1
23/10/24 15:42:28 WARN ObjectStore: Failed to get database default,
returning NoSuchObjectException
Spark Web UI available at http://10.221.99.150:4040
Spark master: local[*], Application Id: local-1698133344448
spark-sql (default)>
```
### How was this patch tested?
Pass GA and manually test.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #43506 from pan3793/SPARK-45646.
Authored-by: Cheng Pan <[email protected]>
Signed-off-by: yangjie01 <[email protected]>
---
.../main/scala/org/apache/spark/sql/hive/HiveUtils.scala | 15 ++-------------
.../scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala | 13 -------------
2 files changed, 2 insertions(+), 26 deletions(-)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index ab54b15af630..5ce7977ab168 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -269,7 +269,7 @@ private[spark] object HiveUtils extends Logging {
//
// Here we enumerate all time `ConfVar`s and convert their values to
numeric strings according
// to their output time units.
- val commonTimeVars = Seq(
+ Seq(
ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY -> TimeUnit.SECONDS,
ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT -> TimeUnit.SECONDS,
ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME -> TimeUnit.SECONDS,
@@ -309,18 +309,7 @@ private[spark] object HiveUtils extends Logging {
ConfVars.SPARK_RPC_CLIENT_HANDSHAKE_TIMEOUT -> TimeUnit.MILLISECONDS
).map { case (confVar, unit) =>
confVar.varname -> HiveConf.getTimeVar(hadoopConf, confVar,
unit).toString
- }
-
- // The following configurations were removed by HIVE-12164(Hive 2.0)
- val hardcodingTimeVars = Seq(
- ("hive.stats.jdbc.timeout", "30s") -> TimeUnit.SECONDS,
- ("hive.stats.retries.wait", "3000ms") -> TimeUnit.MILLISECONDS
- ).map { case ((key, defaultValue), unit) =>
- val value = hadoopConf.get(key, defaultValue)
- key -> HiveConf.toTime(value, unit, unit).toString
- }
-
- (commonTimeVars ++ hardcodingTimeVars).toMap
+ }.toMap
}
/**
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
index 823ac8ed957e..10dbbc80c9ec 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveUtilsSuite.scala
@@ -106,17 +106,4 @@ class HiveUtilsSuite extends QueryTest with SQLTestUtils
with TestHiveSingleton
}
}
}
-
- test("SPARK-27349: Dealing with TimeVars removed in Hive 2.x") {
- // Test default value
- val defaultConf = new Configuration
-
assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.jdbc.timeout")
=== "30")
-
assert(HiveUtils.formatTimeVarsForHiveClient(defaultConf)("hive.stats.retries.wait")
=== "3000")
-
- testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "40s", 40)
- testFormatTimeVarsForHiveClient("hive.stats.jdbc.timeout", "1d", 1 * 24 *
60 * 60)
-
- testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "4000ms", 4000)
- testFormatTimeVarsForHiveClient("hive.stats.retries.wait", "1d", 1 * 24 *
60 * 60 * 1000)
- }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]