[spark] branch master updated: [SPARK-37791][EXAMPLES] Use log4j2 in examples
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new fe73039 [SPARK-37791][EXAMPLES] Use log4j2 in examples fe73039 is described below commit fe73039f991ce2c44bc5bb2dc845c735e6959c14 Author: William Hyun AuthorDate: Thu Dec 30 18:32:01 2021 -0800 [SPARK-37791][EXAMPLES] Use log4j2 in examples ### What changes were proposed in this pull request? This PR aims to use log4j2 in examples. ### Why are the changes needed? Since Spark is migrating to log4j2, we best use this in our examples. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually review. Closes #35074 from williamhyun/log4j2. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- .../org/apache/spark/examples/mllib/BinaryClassification.scala| 5 +++-- .../main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala | 5 +++-- .../main/scala/org/apache/spark/examples/mllib/LDAExample.scala | 5 +++-- .../main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala | 5 +++-- .../spark/examples/mllib/PowerIterationClusteringExample.scala| 5 +++-- .../scala/org/apache/spark/examples/mllib/SparseNaiveBayes.scala | 5 +++-- .../org/apache/spark/examples/streaming/StreamingExamples.scala | 8 +--- .../org/apache/spark/examples/streaming/KinesisWordCountASL.scala | 8 +--- 8 files changed, 28 insertions(+), 18 deletions(-) diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala index 6fc3501..6748ffb 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/BinaryClassification.scala @@ -18,7 +18,8 @@ // scalastyle:off println package org.apache.spark.examples.mllib -import org.apache.log4j.{Level, Logger} +import org.apache.logging.log4j.Level +import org.apache.logging.log4j.core.config.Configurator import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} @@ -105,7 +106,7 @@ object BinaryClassification { val conf = new SparkConf().setAppName(s"BinaryClassification with $params") val sc = new SparkContext(conf) -Logger.getRootLogger.setLevel(Level.WARN) +Configurator.setRootLevel(Level.WARN) val examples = MLUtils.loadLibSVMFile(sc, params.input).cache() diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala index 0259df2..0aa30a6 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/DenseKMeans.scala @@ -18,7 +18,8 @@ // scalastyle:off println package org.apache.spark.examples.mllib -import org.apache.log4j.{Level, Logger} +import org.apache.logging.log4j.Level +import org.apache.logging.log4j.core.config.Configurator import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} @@ -79,7 +80,7 @@ object DenseKMeans { val conf = new SparkConf().setAppName(s"DenseKMeans with $params") val sc = new SparkContext(conf) -Logger.getRootLogger.setLevel(Level.WARN) +Configurator.setRootLevel(Level.WARN) val examples = sc.textFile(params.input).map { line => Vectors.dense(line.split(' ').map(_.toDouble)) diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala index 605ca68..a3006a1 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala +++ b/examples/src/main/scala/org/apache/spark/examples/mllib/LDAExample.scala @@ -20,7 +20,8 @@ package org.apache.spark.examples.mllib import java.util.Locale -import org.apache.log4j.{Level, Logger} +import org.apache.logging.log4j.Level +import org.apache.logging.log4j.core.config.Configurator import scopt.OptionParser import org.apache.spark.{SparkConf, SparkContext} @@ -111,7 +112,7 @@ object LDAExample { val conf = new SparkConf().setAppName(s"LDAExample with $params") val sc = new SparkContext(conf) -Logger.getRootLogger.setLevel(Level.WARN) +Configurator.setRootLevel(Level.WARN) // Load documents, and prepare them for LDA. val preprocessStart = System.nanoTime() diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala index 92c85c9..23523d7 100644 --- a/examples/src/main/scala/org/apache/spark/examples/mllib/MovieLensALS.scala +++ b/examples/src/main/scal
[spark] branch master updated (8b2e426 -> c4a9772)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 8b2e426 [SPARK-37790][BUILD] Upgrade SLF4J to 1.7.32 add c4a9772 [SPARK-37713][K8S] Assign namespace to executor configmap No new revisions were added by this update. Summary of changes: .../deploy/k8s/submit/KubernetesClientUtils.scala | 5 +++- .../k8s/KubernetesClusterSchedulerBackend.scala| 4 +++- .../k8s/submit/KubernetesClientUtilsSuite.scala| 27 ++ 3 files changed, 34 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37790][BUILD] Upgrade SLF4J to 1.7.32
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 8b2e426 [SPARK-37790][BUILD] Upgrade SLF4J to 1.7.32 8b2e426 is described below commit 8b2e4260701dbaa7ca0e52a4cb682fd0a115d9b8 Author: William Hyun AuthorDate: Thu Dec 30 17:42:36 2021 -0800 [SPARK-37790][BUILD] Upgrade SLF4J to 1.7.32 ### What changes were proposed in this pull request? This PR aims to upgrade SLF4J version to 1.7.32. ### Why are the changes needed? This release has the following fixes. - https://jira.qos.ch/browse/SLF4J-514 - https://jira.qos.ch/browse/SLF4J-515 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. Closes #35072 from williamhyun/slf4j. Authored-by: William Hyun Signed-off-by: Dongjoon Hyun --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 6 +++--- dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++--- pom.xml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 88e2583..868c7b3 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -135,7 +135,7 @@ javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar +jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.34//jersey-client-2.34.jar jersey-common/2.34//jersey-common-2.34.jar @@ -159,7 +159,7 @@ json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar -jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar +jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/5.10.1//kubernetes-client-5.10.1.jar kubernetes-model-admissionregistration/5.10.1//kubernetes-model-admissionregistration-5.10.1.jar @@ -244,7 +244,7 @@ scala-reflect/2.12.15//scala-reflect-2.12.15.jar scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar shims/0.9.23//shims-0.9.23.jar -slf4j-api/1.7.30//slf4j-api-1.7.30.jar +slf4j-api/1.7.32//slf4j-api-1.7.32.jar snakeyaml/1.28//snakeyaml-1.28.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 57fca2e..f1183cd 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -123,7 +123,7 @@ javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcl-over-slf4j/1.7.30//jcl-over-slf4j-1.7.30.jar +jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jdom/1.1//jdom-1.1.jar jersey-client/2.34//jersey-client-2.34.jar @@ -146,7 +146,7 @@ json4s-jackson_2.12/3.7.0-M11//json4s-jackson_2.12-3.7.0-M11.jar json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar -jul-to-slf4j/1.7.30//jul-to-slf4j-1.7.30.jar +jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client/5.10.1//kubernetes-client-5.10.1.jar kubernetes-model-admissionregistration/5.10.1//kubernetes-model-admissionregistration-5.10.1.jar @@ -231,7 +231,7 @@ scala-reflect/2.12.15//scala-reflect-2.12.15.jar scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar shapeless_2.12/2.3.3//shapeless_2.12-2.3.3.jar shims/0.9.23//shims-0.9.23.jar -slf4j-api/1.7.30//slf4j-api-1.7.30.jar +slf4j-api/1.7.32//slf4j-api-1.7.32.jar snakeyaml/1.28//snakeyaml-1.28.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar diff --git a/pom.xml b/pom.xml index 235387c..fdda77c 100644 --- a/pom.xml +++ b/pom.xml @@ -118,7 +118,7 @@ 3.8.4 1.6.0 spark -1.7.30 +1.7.32 2.17.1 3.3.1 2.5.0 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (4caface -> 88c7b6a)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 4caface [MINOR] Remove unused imports in Scala 2.13 Repl2Suite add 88c7b6a [SPARK-37719][BUILD] Remove the `-add-exports` compilation option introduced by SPARK-37070 No new revisions were added by this update. Summary of changes: dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- mllib-local/pom.xml | 5 + mllib/pom.xml | 5 + pom.xml | 20 ++-- project/SparkBuild.scala | 6 +- 6 files changed, 23 insertions(+), 17 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (068d53b -> 4caface)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 068d53b [SPARK-37735][K8S] Add appId interface to KubernetesConf add 4caface [MINOR] Remove unused imports in Scala 2.13 Repl2Suite No new revisions were added by this update. Summary of changes: repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala | 5 - 1 file changed, 5 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37735][K8S] Add appId interface to KubernetesConf
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 068d53b [SPARK-37735][K8S] Add appId interface to KubernetesConf 068d53b is described below commit 068d53bd5d89c96bf0cdb05d3ec7f2f023cf3875 Author: Yikun Jiang AuthorDate: Thu Dec 30 13:48:58 2021 -0800 [SPARK-37735][K8S] Add appId interface to KubernetesConf ### What changes were proposed in this pull request? Add `appId` interface to KubernetesConf ### Why are the changes needed? The `appId` now can be only accessed in `KuberntesDriverConf` and `KubernetesExecutorConf`, but can't be accesssed in `KubernetesConf`. Some user featurestep are using `KubernetesConf` as init constructor parameter in order to share the featurestep between driver and executor. One of cases is customized feature step (such as volcano, yunikorn) is using appId as job identification. So, we'd better add appId to KubernetesConf to help such featurestep access appId. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? UT Closes #35015 from Yikun/SPARK-36057. Authored-by: Yikun Jiang Signed-off-by: Dongjoon Hyun --- .../main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala | 1 + .../scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala | 8 2 files changed, 9 insertions(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala index 8130402..46086fa 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala @@ -43,6 +43,7 @@ private[spark] abstract class KubernetesConf(val sparkConf: SparkConf) { def secretNamesToMountPaths: Map[String, String] def volumes: Seq[KubernetesVolumeSpec] def schedulerName: String + def appId: String def appName: String = get("spark.app.name", "spark") diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala index 119bcb0..1b3aaa5 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala @@ -218,6 +218,14 @@ class KubernetesConfSuite extends SparkFunSuite { assert(driverConf.schedulerName === "driverScheduler") } + test("SPARK-37735: access appId in KubernetesConf") { +val sparkConf = new SparkConf(false) +val driverConf = KubernetesTestConf.createDriverConf(sparkConf) +val execConf = KubernetesTestConf.createExecutorConf(sparkConf) +assert(driverConf.asInstanceOf[KubernetesConf].appId === KubernetesTestConf.APP_ID) +assert(execConf.asInstanceOf[KubernetesConf].appId === KubernetesTestConf.APP_ID) + } + test("SPARK-36566: get app name label") { assert(KubernetesConf.getAppNameLabel(" Job+Spark-Pi 2021") === "job-spark-pi-2021") assert(KubernetesConf.getAppNameLabel("a" * 63) === "a" * 63) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37738][PYTHON] Support column type inputs for second arg of date manipulation functions
This is an automated email from the ASF dual-hosted git repository. zero323 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 899dec2 [SPARK-37738][PYTHON] Support column type inputs for second arg of date manipulation functions 899dec2 is described below commit 899dec2ea36bcd934f1512b26e7fc8903e9f5ba8 Author: Daniel Davies AuthorDate: Thu Dec 30 16:07:39 2021 +0100 [SPARK-37738][PYTHON] Support column type inputs for second arg of date manipulation functions ### What changes were proposed in this pull request? See https://issues.apache.org/jira/browse/SPARK-37738 There seems to be a skew in the Scala Spark API & PySpark API; namely, date_add/ date_sub/ add_months take an 'int' type for the 'days' parameter in PySpark, but can accept a column or an integer in Scala. This PR makes both types available to the 'days' parameter in PySpark. ### Why are the changes needed? Users should see a consistent API across Python & Scala side processing. ### Does this PR introduce _any_ user-facing change? Yes- additive only. >>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'add']) >>> df.select(date_add(df.dt, df.add).alias('next_date')).collect() [Row(next_date=datetime.date(2015, 4, 10))] ### How was this patch tested? 3 new unit tests Closes #35032 from Daniel-Davies/master. Lead-authored-by: Daniel Davies Co-authored-by: Daniel-Davies <33356828+daniel-dav...@users.noreply.github.com> Signed-off-by: zero323 --- python/pyspark/sql/functions.py| 33 - python/pyspark/sql/tests/test_functions.py | 57 ++ 2 files changed, 81 insertions(+), 9 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index a0f2bbf..4791d3c 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2200,7 +2200,7 @@ def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName") return Column(jc) -def date_add(start: "ColumnOrName", days: int) -> Column: +def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: """ Returns the date that is `days` days after `start` @@ -2208,16 +2208,21 @@ def date_add(start: "ColumnOrName", days: int) -> Column: Examples ->>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) +>>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'add']) >>> df.select(date_add(df.dt, 1).alias('next_date')).collect() [Row(next_date=datetime.date(2015, 4, 9))] +>>> df.select(date_add(df.dt, df.add.cast('integer')).alias('next_date')).collect() +[Row(next_date=datetime.date(2015, 4, 10))] """ sc = SparkContext._active_spark_context assert sc is not None and sc._jvm is not None -return Column(sc._jvm.functions.date_add(_to_java_column(start), days)) + +days = lit(days) if isinstance(days, int) else days + +return Column(sc._jvm.functions.date_add(_to_java_column(start), _to_java_column(days))) -def date_sub(start: "ColumnOrName", days: int) -> Column: +def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column: """ Returns the date that is `days` days before `start` @@ -2225,13 +2230,18 @@ def date_sub(start: "ColumnOrName", days: int) -> Column: Examples ->>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) +>>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'sub']) >>> df.select(date_sub(df.dt, 1).alias('prev_date')).collect() [Row(prev_date=datetime.date(2015, 4, 7))] +>>> df.select(date_sub(df.dt, df.sub.cast('integer')).alias('prev_date')).collect() +[Row(prev_date=datetime.date(2015, 4, 6))] """ sc = SparkContext._active_spark_context assert sc is not None and sc._jvm is not None -return Column(sc._jvm.functions.date_sub(_to_java_column(start), days)) + +days = lit(days) if isinstance(days, int) else days + +return Column(sc._jvm.functions.date_sub(_to_java_column(start), _to_java_column(days))) def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column: @@ -2251,7 +2261,7 @@ def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column: return Column(sc._jvm.functions.datediff(_to_java_column(end), _to_java_column(start))) -def add_months(start: "ColumnOrName", months: int) -> Column: +def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column: """ Returns the date that is `months` months after `start` @@ -2259,13 +2269,18 @@ def add_months(start: "ColumnOrName", months: int) -> Column: Examples ->>> df = spark.createDataFrame([('2015-04-08',)], ['dt']) +>>> df = spark.createDat
[spark] branch master updated: [SPARK-37785][SQL][CORE] Add Utils.isInRunningSparkTask
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d96ee1f [SPARK-37785][SQL][CORE] Add Utils.isInRunningSparkTask d96ee1f is described below commit d96ee1f3b3d136971b1893741f4b022a9f15ae20 Author: Hyukjin Kwon AuthorDate: Thu Dec 30 23:09:45 2021 +0900 [SPARK-37785][SQL][CORE] Add Utils.isInRunningSparkTask ### What changes were proposed in this pull request? This PR proposes to add `Utils.isInRunningSparkTask` to see if the codes are running on tasks e.g., on executors. ### Why are the changes needed? There is currently no single call to see if we're in a running Spark task (e.g., in executors). `TaskContext.get == null` is being used for that way. We should better explicitly factor out to `Utils`. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? Existing unittests should cover this case. Closes #35065 from HyukjinKwon/mindor-util-at-executor. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon --- core/src/main/scala/org/apache/spark/SparkContext.scala | 2 +- core/src/main/scala/org/apache/spark/util/Utils.scala| 5 + .../spark/sql/catalyst/expressions/EquivalentExpressions.scala | 4 ++-- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 2 +- sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala | 4 ++-- .../src/main/scala/org/apache/spark/sql/execution/Columnar.scala | 3 ++- 6 files changed, 13 insertions(+), 7 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index a14efa5..0c5fb0a 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -2680,7 +2680,7 @@ object SparkContext extends Logging { * Throws an exception if a SparkContext is about to be created in executors. */ private def assertOnDriver(): Unit = { -if (TaskContext.get != null) { +if (Utils.isInRunningSparkTask) { // we're accessing it during task execution, fail. throw new IllegalStateException( "SparkContext should only be created and accessed on the driver.") diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala index 6597750..4410fe7 100644 --- a/core/src/main/scala/org/apache/spark/util/Utils.scala +++ b/core/src/main/scala/org/apache/spark/util/Utils.scala @@ -875,6 +875,11 @@ private[spark] object Utils extends Logging { } /** + * Returns if the current codes are running in a Spark task, e.g., in executors. + */ + def isInRunningSparkTask: Boolean = TaskContext.get() != null + + /** * Gets or creates the directories listed in spark.local.dir or SPARK_LOCAL_DIRS, * and returns only the directories that exist / could be created. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala index 269ab31..59e2be4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala @@ -21,9 +21,9 @@ import java.util.Objects import scala.collection.mutable -import org.apache.spark.TaskContext import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback import org.apache.spark.sql.catalyst.expressions.objects.LambdaVariable +import org.apache.spark.util.Utils /** * This class is used to compute equality of (sub)expression trees. Expressions can be added @@ -197,7 +197,7 @@ class EquivalentExpressions { expr.find(_.isInstanceOf[LambdaVariable]).isDefined || // `PlanExpression` wraps query plan. To compare query plans of `PlanExpression` on executor, // can cause error like NPE. - (expr.isInstanceOf[PlanExpression[_]] && TaskContext.get != null) + (expr.isInstanceOf[PlanExpression[_]] && Utils.isInRunningSparkTask) if (!skip && !updateExprInMap(expr, map, useCount)) { val uc = useCount.signum diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 2ca68c6..105a1c4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -198,7 +198,7 @@ object SQLConf { * run unit tests (that does not involve SparkSession) in serial
[spark] branch master updated (77e8683 -> 4c58f12)
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 77e8683 [SPARK-37369][SQL][FOLLOWUP] Override supportsRowBased in UnionExec add 4c58f12 [SPARK-3][SQL] Update the SQL syntax of SHOW FUNCTIONS No new revisions were added by this update. Summary of changes: docs/sql-ref-syntax-aux-show-functions.md | 13 +++- .../apache/spark/sql/catalyst/parser/SqlBase.g4| 4 +-- .../spark/sql/catalyst/analysis/Analyzer.scala | 2 ++ .../spark/sql/catalyst/parser/AstBuilder.scala | 20 + .../sql/catalyst/plans/logical/v2Commands.scala| 11 --- .../spark/sql/errors/QueryCompilationErrors.scala | 7 ++--- .../spark/sql/errors/QueryParsingErrors.scala | 5 .../spark/sql/catalyst/parser/DDLParserSuite.scala | 35 +- .../catalyst/analysis/ResolveSessionCatalog.scala | 22 +++--- .../spark/sql/execution/command/functions.scala| 5 ++-- .../spark/sql/connector/DataSourceV2SQLSuite.scala | 5 ++-- .../spark/sql/hive/execution/HiveUDFSuite.scala| 8 + 12 files changed, 87 insertions(+), 50 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (22585b6 -> 77e8683)
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 22585b6 [SPARK-37478][SQL][TESTS][FOLLOWUP] Unify v1 and v2 DROP NAMESPACE error add 77e8683 [SPARK-37369][SQL][FOLLOWUP] Override supportsRowBased in UnionExec No new revisions were added by this update. Summary of changes: .../sql/execution/basicPhysicalOperators.scala | 2 ++ .../columnar/CachedBatchSerializerSuite.scala | 12 +++-- .../spark/sql/execution/debug/DebuggingSuite.scala | 31 -- 3 files changed, 29 insertions(+), 16 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-37478][SQL][TESTS][FOLLOWUP] Unify v1 and v2 DROP NAMESPACE error
This is an automated email from the ASF dual-hosted git repository. wenchen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 22585b6 [SPARK-37478][SQL][TESTS][FOLLOWUP] Unify v1 and v2 DROP NAMESPACE error 22585b6 is described below commit 22585b62d83b1594ad237651354e354949f137b7 Author: dch nguyen AuthorDate: Thu Dec 30 16:59:54 2021 +0800 [SPARK-37478][SQL][TESTS][FOLLOWUP] Unify v1 and v2 DROP NAMESPACE error ### What changes were proposed in this pull request? According to [#cmt](https://github.com/apache/spark/pull/34819#discussion_r763629569), unify the error of v1, v2 and hive external catalog in DROP NAMESPACE tests. ### Why are the changes needed? Currently, v1 and hive external catalog command throw `AnalysisException`, while v2 command throws `SparkException`. The error messages of v1 and hive catalog are also completely different from v2. So this PR is for unifying those errors to `AnalysisException`. The error message will have one difference between v1/hive and v2: `Cannot drop a non-empty database: ...` vs `Cannot drop a non-empty namespace: ...` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? v1/v2 and Hive v1 DropNamespaceSuite: ```$ build/sbt -Phive-2.3 -Phive-thriftserver "test:testOnly *DropNamespaceSuite"``` Closes #35007 from dchvn/unify_dropnamespace_error. Authored-by: dch nguyen Signed-off-by: Wenchen Fan --- .../apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala | 7 ++- .../org/apache/spark/sql/errors/QueryCompilationErrors.scala | 10 -- .../org/apache/spark/sql/errors/QueryExecutionErrors.scala| 6 -- .../sql/execution/datasources/v2/DropNamespaceExec.scala | 6 +++--- .../spark/sql/execution/command/DropNamespaceSuiteBase.scala | 7 +-- .../spark/sql/execution/command/v1/DropNamespaceSuite.scala | 7 +-- .../spark/sql/execution/command/v2/DropNamespaceSuite.scala | 11 +-- .../scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala | 11 ++- .../org/apache/spark/sql/hive/execution/HiveDDLSuite.scala| 2 +- 9 files changed, 31 insertions(+), 36 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala index c10e0bb..e3896c5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala @@ -139,11 +139,8 @@ class InMemoryCatalog( if (catalog.contains(db)) { if (!cascade) { // If cascade is false, make sure the database is empty. -if (catalog(db).tables.nonEmpty) { - throw QueryCompilationErrors.databaseNotEmptyError(db, "tables") -} -if (catalog(db).functions.nonEmpty) { - throw QueryCompilationErrors.databaseNotEmptyError(db, "functions") +if (catalog(db).tables.nonEmpty || catalog(db).functions.nonEmpty) { + throw QueryCompilationErrors.cannotDropNonemptyDatabaseError(db) } } // Remove the database. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 300ba03..7284d06 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -540,8 +540,14 @@ object QueryCompilationErrors { s"rename temporary view from '$oldName' to '$newName': destination view already exists") } - def databaseNotEmptyError(db: String, details: String): Throwable = { -new AnalysisException(s"Database $db is not empty. One or more $details exist.") + def cannotDropNonemptyDatabaseError(db: String): Throwable = { +new AnalysisException(s"Cannot drop a non-empty database: $db. " + + "Use CASCADE option to drop a non-empty database.") + } + + def cannotDropNonemptyNamespaceError(namespace: Seq[String]): Throwable = { +new AnalysisException(s"Cannot drop a non-empty namespace: ${namespace.quoted}. " + + "Use CASCADE option to drop a non-empty namespace.") } def invalidNameForTableOrDatabaseError(name: String): Throwable = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala index eb6e814..4ec57dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/