(spark) branch master updated (09739294ba1d -> c2079b9b13e4)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 09739294ba1d [SPARK-47143][CONNECT][TESTS] Improve `ArtifactSuite` to use unique `MavenCoordinate`s add c2079b9b13e4 [SPARK-47140][SPARK47139][INFRA][PYTHON] Upgrade Python verion and codecov action in Coverage job No new revisions were added by this update. Summary of changes: .github/workflows/build_and_test.yml | 2 +- .github/workflows/build_coverage.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated (d466c0beabcf -> 09739294ba1d)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from d466c0beabcf [SPARK-47142][K8S][TESTS] Use `spark.jars.ivy` instead `spark.driver.extraJavaOptions` in `DepsTestsSuite` add 09739294ba1d [SPARK-47143][CONNECT][TESTS] Improve `ArtifactSuite` to use unique `MavenCoordinate`s No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/connect/client/ArtifactSuite.scala | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated (a053b40ac0e9 -> d466c0beabcf)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from a053b40ac0e9 [SPARK-47099][SQL][FOLLOW-UP] Uses ordinalNumber in UNEXPECTED_INPUT_TYPE add d466c0beabcf [SPARK-47142][K8S][TESTS] Use `spark.jars.ivy` instead `spark.driver.extraJavaOptions` in `DepsTestsSuite` No new revisions were added by this update. Summary of changes: .../org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated: [SPARK-47099][SQL][FOLLOW-UP] Uses ordinalNumber in UNEXPECTED_INPUT_TYPE
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new a053b40ac0e9 [SPARK-47099][SQL][FOLLOW-UP] Uses ordinalNumber in UNEXPECTED_INPUT_TYPE a053b40ac0e9 is described below commit a053b40ac0e95b0eace2cb4da5e6e79e7019793b Author: Hyukjin Kwon AuthorDate: Fri Feb 23 14:12:10 2024 +0900 [SPARK-47099][SQL][FOLLOW-UP] Uses ordinalNumber in UNEXPECTED_INPUT_TYPE ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/45177 that fixes some leftovers missed. ### Why are the changes needed? For consistency. Also, I think this fixes the Maven build failure: https://github.com/apache/spark/actions/runs/8005710953/job/21865798408 ### Does this PR introduce _any_ user-facing change? Yes, the value of 'paramIndex' for the error class `UNEXPECTED-INPUT-TYPE` is uniformly set by `ordinalNumber`. ### How was this patch tested? CI in this PR. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45225 from HyukjinKwon/SPARK-47099-followup. Authored-by: Hyukjin Kwon Signed-off-by: Hyukjin Kwon --- .../scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala| 2 +- sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out | 2 +- .../sql-tests/analyzer-results/table-valued-functions.sql.out | 4 ++-- sql/core/src/test/resources/sql-tests/results/mode.sql.out| 2 +- .../test/resources/sql-tests/results/table-valued-functions.sql.out | 4 ++-- sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala | 2 +- 6 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index e96474862b1d..3ab6c22e5fda 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1966,7 +1966,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat new AnalysisException( errorClass = "UNEXPECTED_INPUT_TYPE", messageParameters = Map( -"paramIndex" -> paramIndex.toString, +"paramIndex" -> ordinalNumber(paramIndex - 1), "functionName" -> toSQLId(functionName), "requiredType" -> toSQLType(dataType), "inputSql" -> toSQLExpr(expression), diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out index a0a0c81ef027..2508b9b5fdd9 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/mode.sql.out @@ -124,7 +124,7 @@ org.apache.spark.sql.AnalysisException "functionName" : "`mode`", "inputSql" : "\"true\"", "inputType" : "\"STRING\"", -"paramIndex" : "2", +"paramIndex" : "second", "requiredType" : "\"BOOLEAN\"" }, "queryContext" : [ { diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/table-valued-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/table-valued-functions.sql.out index 765de505d9a2..c8698f7c7cd7 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/table-valued-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/table-valued-functions.sql.out @@ -81,7 +81,7 @@ org.apache.spark.sql.AnalysisException "functionName" : "`range`", "inputSql" : "\"NULL\"", "inputType" : "\"VOID\"", -"paramIndex" : "2", +"paramIndex" : "second", "requiredType" : "\"BIGINT\"" }, "queryContext" : [ { @@ -105,7 +105,7 @@ org.apache.spark.sql.AnalysisException "functionName" : "`range`", "inputSql" : "\"array(1, 2, 3)\"", "inputType" : "\"ARRAY\"", -"paramIndex" : "2", +"paramIndex" : "second", "requiredType" : "\"BIGINT\"" }, "queryContext" : [ { diff --git a/sql/core/src/test/resources/sql-tests/results/mode.sql.out b/sql/core/src/test/resources/sql-tests/results/mode.sql.out index 6ae7b2d29e9d..9eac2c40e3ee 100644 --- a/sql/core/src/test/resources/sql-tests/results/mode.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/mode.sql.out @@ -103,7 +103,7 @@ org.apache.spark.sql.AnalysisException "functionName" : "`mode`", "inputSql" : "\"true\"", "inputType" : "\"STRING\"", -"paramIndex" : "2", +"paramIndex" : "second", "requiredType" : "\"BOOLEAN\"" }, "queryContext" : [ { diff --git a/sql/core/src/test/reso
(spark) branch master updated: [SPARK-47137][PYTHON][CONNECT] Add getAll to spark.conf for feature parity with Scala
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 511839b6eac9 [SPARK-47137][PYTHON][CONNECT] Add getAll to spark.conf for feature parity with Scala 511839b6eac9 is described below commit 511839b6eac974351410a1713f5a90329e49abe9 Author: Takuya UESHIN AuthorDate: Thu Feb 22 20:22:43 2024 -0800 [SPARK-47137][PYTHON][CONNECT] Add getAll to spark.conf for feature parity with Scala ### What changes were proposed in this pull request? Adds `getAll` to `spark.conf` for feature parity with Scala. ```py >>> spark.conf.getAll {'spark.sql.warehouse.dir': ...} ``` ### Why are the changes needed? Scala API provides `spark.conf.getAll`; whereas Python doesn't. ```scala scala> spark.conf.getAll val res0: Map[String,String] = HashMap(spark.sql.warehouse.dir -> ... ``` ### Does this PR introduce _any_ user-facing change? Yes, `spark.conf.getAll` will be available in PySpark. ### How was this patch tested? Added the related tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45222 from ueshin/issues/SPARK-47137/getAll. Authored-by: Takuya UESHIN Signed-off-by: Dongjoon Hyun --- python/pyspark/sql/conf.py | 16 +- python/pyspark/sql/connect/conf.py | 15 +- python/pyspark/sql/tests/test_conf.py | 63 ++ .../scala/org/apache/spark/sql/RuntimeConfig.scala | 6 +++ 4 files changed, 75 insertions(+), 25 deletions(-) diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py index e77039565dd1..dd43991b0706 100644 --- a/python/pyspark/sql/conf.py +++ b/python/pyspark/sql/conf.py @@ -16,7 +16,7 @@ # import sys -from typing import Any, Optional, Union +from typing import Any, Dict, Optional, Union from py4j.java_gateway import JavaObject @@ -93,6 +93,20 @@ class RuntimeConfig: self._check_type(default, "default") return self._jconf.get(key, default) +@property +def getAll(self) -> Dict[str, str]: +""" +Returns all properties set in this conf. + +.. versionadded:: 4.0.0 + +Returns +--- +dict +A dictionary containing all properties set in this conf. +""" +return dict(self._jconf.getAllAsJava()) + def unset(self, key: str) -> None: """ Resets the configuration property for the given key. diff --git a/python/pyspark/sql/connect/conf.py b/python/pyspark/sql/connect/conf.py index 3548a31fef03..57a669aca889 100644 --- a/python/pyspark/sql/connect/conf.py +++ b/python/pyspark/sql/connect/conf.py @@ -19,7 +19,7 @@ from pyspark.sql.connect.utils import check_dependencies check_dependencies(__name__) -from typing import Any, Optional, Union, cast +from typing import Any, Dict, Optional, Union, cast import warnings from pyspark import _NoValue @@ -68,6 +68,19 @@ class RuntimeConf: get.__doc__ = PySparkRuntimeConfig.get.__doc__ +@property +def getAll(self) -> Dict[str, str]: +op_get_all = proto.ConfigRequest.GetAll() +operation = proto.ConfigRequest.Operation(get_all=op_get_all) +result = self._client.config(operation) +confs: Dict[str, str] = dict() +for key, value in result.pairs: +assert value is not None +confs[key] = value +return confs + +getAll.__doc__ = PySparkRuntimeConfig.getAll.__doc__ + def unset(self, key: str) -> None: op_unset = proto.ConfigRequest.Unset(keys=[key]) operation = proto.ConfigRequest.Operation(unset=op_unset) diff --git a/python/pyspark/sql/tests/test_conf.py b/python/pyspark/sql/tests/test_conf.py index 9b939205b1d1..68b147f09746 100644 --- a/python/pyspark/sql/tests/test_conf.py +++ b/python/pyspark/sql/tests/test_conf.py @@ -50,32 +50,49 @@ class ConfTestsMixin: def test_conf_with_python_objects(self): spark = self.spark -for value, expected in [(True, "true"), (False, "false")]: -spark.conf.set("foo", value) -self.assertEqual(spark.conf.get("foo"), expected) - -spark.conf.set("foo", 1) -self.assertEqual(spark.conf.get("foo"), "1") - -with self.assertRaises(IllegalArgumentException): -spark.conf.set("foo", None) - -with self.assertRaises(Exception): -spark.conf.set("foo", Decimal(1)) +try: +for value, expected in [(True, "true"), (False, "false")]: +spark.conf.set("foo", value) +self.assertEqual(spark.conf.get("foo"), expected) + +spark.conf.set("foo", 1) +self.assertEqual
(spark) branch master updated (6ae0abb64289 -> b90514c37755)
This is an automated email from the ASF dual-hosted git repository. yao pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 6ae0abb64289 [SPARK-43259][SQL][FOLLOWUP] Regenerate `sql-error-conditions.md` to recover `SparkThrowableSuite` add b90514c37755 [SPARK-47130][CORE] Use listStatus to bypass block location info when cleaning driver logs No new revisions were added by this update. Summary of changes: .../spark/deploy/history/FsHistoryProvider.scala | 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated: [SPARK-43259][SQL][FOLLOWUP] Regenerate `sql-error-conditions.md` to recover `SparkThrowableSuite`
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6ae0abb64289 [SPARK-43259][SQL][FOLLOWUP] Regenerate `sql-error-conditions.md` to recover `SparkThrowableSuite` 6ae0abb64289 is described below commit 6ae0abb64289c2124b2a2dd4043d010a06a14465 Author: Dongjoon Hyun AuthorDate: Thu Feb 22 17:26:32 2024 -0800 [SPARK-43259][SQL][FOLLOWUP] Regenerate `sql-error-conditions.md` to recover `SparkThrowableSuite` ### What changes were proposed in this pull request? This is a follow-up of #45095 ### Why are the changes needed? To recover the broken `master` branch. - https://github.com/apache/spark/actions/runs/8008631301/job/21875499011 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. I manually verified like the following. ``` [info] SparkThrowableSuite: [info] - No duplicate error classes (23 milliseconds) [info] - Error classes are correctly formatted (37 milliseconds) [info] - SQLSTATE is mandatory (1 millisecond) [info] - Error category and error state / SQLSTATE invariants (21 milliseconds) [info] - Message invariants (6 milliseconds) [info] - Message format invariants (9 milliseconds) [info] - Error classes match with document (54 milliseconds) [info] - Round trip (23 milliseconds) [info] - Error class names should contain only capital letters, numbers and underscores (5 milliseconds) [info] - Check if error class is missing (14 milliseconds) [info] - Check if message parameters match message format (2 milliseconds) [info] - Error message is formatted (0 milliseconds) [info] - Error message does not do substitution on values (0 milliseconds) [info] - Try catching legacy SparkError (1 millisecond) [info] - Try catching SparkError with error class (1 millisecond) [info] - Try catching internal SparkError (1 millisecond) [info] - Get message in the specified format (3 milliseconds) [info] - overwrite error classes (47 milliseconds) [info] - prohibit dots in error class names (15 milliseconds) [info] Run completed in 1 second, 90 milliseconds. [info] Total number of tests run: 19 [info] Suites: completed 1, aborted 0 [info] Tests: succeeded 19, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. [success] Total time: 7 s, completed Feb 22, 2024, 5:22:24 PM ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45226 from dongjoon-hyun/SPARK-43259. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- docs/sql-error-conditions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index 0745de995799..bb982a77fca0 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -1148,7 +1148,7 @@ Please increase executor memory using the --executor-memory option or "` [SQLSTATE: 42001](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) -Found an invalid expression encoder. Expects an instance of `ExpressionEncoder` but got ``. For more information consult '``/api/java/index.html?org/apache/spark/sql/Encoder.html'. +Found an invalid expression encoder. Expects an instance of ExpressionEncoder but got ``. For more information consult '``/api/java/index.html?org/apache/spark/sql/Encoder.html'. ### INVALID_EXTRACT_BASE_FIELD_TYPE - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated (241a95def31d -> 07226e6a8086)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 241a95def31d Revert "[SPARK-47115][INFRA] Use larger memory for Maven builds" add 07226e6a8086 [SPARK-47123][CORE] JDBCRDD does not correctly handle errors in getQueryOutputSchema No new revisions were added by this update. Summary of changes: .../spark/sql/execution/datasources/jdbc/JDBCRDD.scala | 16 1 file changed, 4 insertions(+), 12 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated (9bc273ee0dad -> 241a95def31d)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 9bc273ee0dad [SPARK-47136][CORE][TESTS] Fix `MavenUtilsSuite` to use `MavenUtils.resolveMavenCoordinates` properly add 241a95def31d Revert "[SPARK-47115][INFRA] Use larger memory for Maven builds" No new revisions were added by this update. Summary of changes: .github/workflows/maven_test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated: [SPARK-47136][CORE][TESTS] Fix `MavenUtilsSuite` to use `MavenUtils.resolveMavenCoordinates` properly
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 9bc273ee0dad [SPARK-47136][CORE][TESTS] Fix `MavenUtilsSuite` to use `MavenUtils.resolveMavenCoordinates` properly 9bc273ee0dad is described below commit 9bc273ee0daddef3a0d453ba6311e996bc56830d Author: Dongjoon Hyun AuthorDate: Thu Feb 22 15:26:01 2024 -0800 [SPARK-47136][CORE][TESTS] Fix `MavenUtilsSuite` to use `MavenUtils.resolveMavenCoordinates` properly ### What changes were proposed in this pull request? This PR aims the following. 1. Fix `MavenUtilsSuite` to use `MavenUtils.resolveMavenCoordinates` properly by using `ivyPath` parameter of `MavenUtils.loadIvySettings` method consistently. 2. Make all test cases isolated by adding `beforeEach` and `afterEach` instead of a single `beforeAll` ### Why are the changes needed? 1. `MavenUtils` assumes to set the following together inside if it receives `ivyPath`. https://github.com/apache/spark/blob/9debaeaa5a079a73605cddb90b1a77274c5284d3/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala#L337-L342 3. `MavenUtilsSuite` uses `tempIvyPath` for all `MavenUtils.resolveMavenCoordinates` except one test case. https://github.com/apache/spark/blob/9debaeaa5a079a73605cddb90b1a77274c5284d3/common/utils/src/test/scala/org/apache/spark/util/MavenUtilsSuite.scala#L175-L175 4. The following is the missed case and this PR aims to fix. https://github.com/apache/spark/blob/9debaeaa5a079a73605cddb90b1a77274c5284d3/common/utils/src/test/scala/org/apache/spark/util/MavenUtilsSuite.scala#L253 ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. ``` $ build/sbt "common-utils/testOnly *MavenUtilsSuite" ... [info] MavenUtilsSuite: [info] - incorrect maven coordinate throws error (9 milliseconds) [info] - create repo resolvers (19 milliseconds) [info] - create additional resolvers (7 milliseconds) :: loading settings :: url = jar:file:/Users/dongjoon/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/apache/ivy/ivy/2.5.1/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml [info] - add dependencies works correctly (29 milliseconds) [info] - excludes works correctly (2 milliseconds) [info] - ivy path works correctly (661 milliseconds) [info] - search for artifact at local repositories (405 milliseconds) [info] - dependency not found throws RuntimeException (198 milliseconds) :: loading settings :: url = jar:file:/Users/dongjoon/Library/Caches/Coursier/v1/https/repo1.maven.org/maven2/org/apache/ivy/ivy/2.5.1/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml [info] - neglects Spark and Spark's dependencies (388 milliseconds) [info] - exclude dependencies end to end (385 milliseconds) :: loading settings :: file = /Users/dongjoon/APACHE/spark-merge/target/tmp/ivy-9aa3863e-9dba-4002-996b-5e86b2f1281f/ivysettings.xml [info] - load ivy settings file (103 milliseconds) [info] - SPARK-10878: test resolution files cleaned after resolving artifact (70 milliseconds) Spark was unable to load org/apache/spark/log4j2-defaults.properties [info] - SPARK-34624: should ignore non-jar dependencies (247 milliseconds) [info] Run completed in 3 seconds, 16 milliseconds. [info] Total number of tests run: 13 [info] Suites: completed 1, aborted 0 [info] Tests: succeeded 13, failed 0, canceled 0, ignored 0, pending 0 [info] All tests passed. [success] Total time: 3 s, completed Feb 22, 2024, 2:21:18 PM ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45220 from dongjoon-hyun/SPARK-47136. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun --- .../scala/org/apache/spark/util/MavenUtilsSuite.scala| 16 ++-- 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/common/utils/src/test/scala/org/apache/spark/util/MavenUtilsSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/MavenUtilsSuite.scala index 642eca3cf933..d30422ca8dd5 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/MavenUtilsSuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/MavenUtilsSuite.scala @@ -28,14 +28,14 @@ import scala.jdk.CollectionConverters._ import org.apache.ivy.core.module.descriptor.MDArtifact import org.apache.ivy.core.settings.IvySettings import org.apache.ivy.plugins.resolver.{AbstractResolver, ChainResolver, FileSystemResolver, IBiblioResolver} -import org.scalatest.BeforeAndAfterAll +import org.scalatest.BeforeAndAfterEach import org.scalatest.funsuite.AnyFunSuite // scalastyle
(spark) branch master updated (9debaeaa5a07 -> 490467ff55bc)
This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 9debaeaa5a07 [SPARK-47069][PYTHON][CONNECT] Introduce `spark.profile.show/dump` for SparkSession-based profiling add 490467ff55bc Revert "[SPARK-47115][INFRA][FOLLOW-UP] Use larger runner for Maven build (macos-14-large)" No new revisions were added by this update. Summary of changes: .github/workflows/build_maven_java21_macos14.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated (6185e5cad7be -> 9debaeaa5a07)
This is an automated email from the ASF dual-hosted git repository. ueshin pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 6185e5cad7be [SPARK-47132][DOCS][PYTHON] Correct docstring for pyspark's dataframe.head add 9debaeaa5a07 [SPARK-47069][PYTHON][CONNECT] Introduce `spark.profile.show/dump` for SparkSession-based profiling No new revisions were added by this update. Summary of changes: python/pyspark/sql/connect/session.py | 30 ++ python/pyspark/sql/profiler.py | 70 ++ python/pyspark/sql/session.py | 33 ++ .../tests/connect/test_parity_memory_profiler.py | 2 +- .../sql/tests/connect/test_parity_udf_profiler.py | 2 +- python/pyspark/sql/tests/test_session.py | 63 ++- python/pyspark/sql/tests/test_udf_profiler.py | 28 - python/pyspark/tests/test_memory_profiler.py | 28 - 8 files changed, 171 insertions(+), 85 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated (6de527e9ee94 -> 6185e5cad7be)
This is an automated email from the ASF dual-hosted git repository. xinrong pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 6de527e9ee94 [SPARK-43259][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2024 add 6185e5cad7be [SPARK-47132][DOCS][PYTHON] Correct docstring for pyspark's dataframe.head No new revisions were added by this update. Summary of changes: python/pyspark/sql/dataframe.py | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated: [SPARK-43259][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2024
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6de527e9ee94 [SPARK-43259][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2024 6de527e9ee94 is described below commit 6de527e9ee941bf17aa1d2b26c2a63d36e0bb946 Author: Mihailo Milosevic AuthorDate: Thu Feb 22 20:32:54 2024 +0300 [SPARK-43259][SQL] Assign a name to the error class _LEGACY_ERROR_TEMP_2024 ### What changes were proposed in this pull request? In the PR, I propose to assign the proper name `INVALID_EXPRESSION_ENCODER` to the legacy error class `_LEGACY_ERROR_TEMP_2024`, and add a test to the suite which uses `checkError()`. Also this PR improves the error message. ### Why are the changes needed? Proper name improves user experience w/ Spark SQL. ### Does this PR introduce _any_ user-facing change? Yes, the PR changes an user-facing error message. ### How was this patch tested? By running the modified test suite: ``` ./build/mvn -Dtest=none -DwildcardSuites=org.apache.spark.sql.errors.QueryExecutionErrorsSuite test ``` Closes #45095 from mihailom-db/SPARK-43259. Authored-by: Mihailo Milosevic Signed-off-by: Max Gekk --- .../src/main/resources/error/error-classes.json| 11 ++- .../src/main/resources/error/error-states.json | 6 ++ docs/sql-error-conditions-sqlstates.md | 9 + docs/sql-error-conditions.md | 6 ++ .../spark/sql/catalyst/encoders/package.scala | 2 +- .../spark/sql/errors/QueryExecutionErrors.scala| 12 +++ .../sql/errors/QueryExecutionErrorsSuite.scala | 23 -- 7 files changed, 57 insertions(+), 12 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index d4bb4920db88..17ef8e5fe469 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -1877,6 +1877,12 @@ ], "sqlState" : "F" }, + "INVALID_EXPRESSION_ENCODER" : { +"message" : [ + "Found an invalid expression encoder. Expects an instance of ExpressionEncoder but got . For more information consult '/api/java/index.html?org/apache/spark/sql/Encoder.html'." +], +"sqlState" : "42001" + }, "INVALID_EXTRACT_BASE_FIELD_TYPE" : { "message" : [ "Can't extract a value from . Need a complex type [STRUCT, ARRAY, MAP] but got ." @@ -5714,11 +5720,6 @@ "Unresolved encoder expected, but was found." ] }, - "_LEGACY_ERROR_TEMP_2024" : { -"message" : [ - "Only expression encoders are supported for now." -] - }, "_LEGACY_ERROR_TEMP_2025" : { "message" : [ " must override either or ." diff --git a/common/utils/src/main/resources/error/error-states.json b/common/utils/src/main/resources/error/error-states.json index e278c75ae4fa..f696866bb15d 100644 --- a/common/utils/src/main/resources/error/error-states.json +++ b/common/utils/src/main/resources/error/error-states.json @@ -2933,6 +2933,12 @@ "standard": "Y", "usedBy": ["SQL/Foundation", "PostgreSQL", "Redshift", "Oracle", "SQL Server"] }, +"42001": { +"description": "Invalid encoder error", +"origin": "Spark", +"standard": "N", +"usedBy": ["Spark"] +}, "42501": { "description": "The authorization ID does not have the privilege to perform the specified operation on the identified object.", "origin": "DB2", diff --git a/docs/sql-error-conditions-sqlstates.md b/docs/sql-error-conditions-sqlstates.md index 85f1c5c69c33..b142c7340537 100644 --- a/docs/sql-error-conditions-sqlstates.md +++ b/docs/sql-error-conditions-sqlstates.md @@ -238,6 +238,15 @@ Spark SQL uses the following `SQLSTATE` classes: AMBIGUOUS_REFERENCE_TO_FIELDS, INVALID_COLUMN_OR_FIELD_DATA_TYPE, INVALID_EXTRACT_BASE_FIELD_TYPE, INVALID_EXTRACT_FIELD_TYPE, INVALID_FIELD_NAME + + + 42001 + Invalid encoder error + + + + INVALID_EXPRESSION_ENCODER + 42601 diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md index e458cd5a337b..0745de995799 100644 --- a/docs/sql-error-conditions.md +++ b/docs/sql-error-conditions.md @@ -1144,6 +1144,12 @@ SQLSTATE: F Executor memory `` must be at least ``. Please increase executor memory using the --executor-memory option or "``" in Spark configuration. +### INVALID_EXPRESSION_ENCODER + +[SQLSTATE: 42001](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation) + +Found an invalid expression encoder. Expects an instance of `ExpressionEncoder` but got ``. For more information consult '``/api/java/index.h
(spark) branch master updated (78f7c30e140f -> 94d0d96ac30a)
This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 78f7c30e140f [SPARK-42328][SQL] Remove _LEGACY_ERROR_TEMP_1175 from error classes add 94d0d96ac30a [SPARK-47127][INFRA][FOLLOWUP] Remove `3.5.1` from `SKIP_SPARK_RELEASE_VERSIONS` No new revisions were added by this update. Summary of changes: .github/workflows/build_maven.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
(spark) branch master updated: [SPARK-42328][SQL] Remove _LEGACY_ERROR_TEMP_1175 from error classes
This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 78f7c30e140f [SPARK-42328][SQL] Remove _LEGACY_ERROR_TEMP_1175 from error classes 78f7c30e140f is described below commit 78f7c30e140fd8cf4a80b783dd7e9ee4d1b4d7e2 Author: Nikola Mandic AuthorDate: Thu Feb 22 12:09:02 2024 +0300 [SPARK-42328][SQL] Remove _LEGACY_ERROR_TEMP_1175 from error classes ### What changes were proposed in this pull request? Only occurrence of `_LEGACY_ERROR_TEMP_1175` appears under conversion from Spark data types to Parquet. All supported documented [Spark data types](https://spark.apache.org/docs/latest/sql-ref-datatypes.html) are covered in the [conversion function](https://github.com/apache/spark/blob/3e0808c33f185c13808ce2d547ce9ba0057d31a6/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala#L517-L745) (`VarcharType` and `CharType` are not present [...] Convert the error class to `INTERNAL_ERROR`. ### Why are the changes needed? Remove legacy error classes as part of activity in [SPARK-37935](https://issues.apache.org/jira/browse/SPARK-37935). ### Does this PR introduce _any_ user-facing change? If the Spark works correctly, user shouldn't be able to run into `INTERNAL_ERROR` by using the public API. ### How was this patch tested? Added test to `QueryCompilationErrorsSuite` and tested with sbt: ``` project sql testOnly *QueryCompilationErrorsSuite ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #45183 from nikolamand-db/nikolamand-db/SPARK-42328. Authored-by: Nikola Mandic Signed-off-by: Max Gekk --- .../utils/src/main/resources/error/error-classes.json | 5 - .../spark/sql/errors/QueryCompilationErrors.scala | 5 +++-- .../sql/errors/QueryCompilationErrorsSuite.scala | 19 +++ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index c6149ce35a43..d4bb4920db88 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -5118,11 +5118,6 @@ "Unrecognized Parquet type: ." ] }, - "_LEGACY_ERROR_TEMP_1175" : { -"message" : [ - "Unsupported data type ." -] - }, "_LEGACY_ERROR_TEMP_1181" : { "message" : [ "Stream-stream join without equality predicate is not supported." diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala index 53338f38ed6d..e96474862b1d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala @@ -1908,8 +1908,9 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat def cannotConvertDataTypeToParquetTypeError(field: StructField): Throwable = { new AnalysisException( - errorClass = "_LEGACY_ERROR_TEMP_1175", - messageParameters = Map("dataType" -> field.dataType.catalogString)) + errorClass = "INTERNAL_ERROR", + messageParameters = Map("message" -> +s"Cannot convert Spark data type ${toSQLType(field.dataType)} to any Parquet type.")) } def incompatibleViewSchemaChangeError( diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala index e22399c326f6..d4e4a41155ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala @@ -24,6 +24,7 @@ import org.apache.spark.sql._ import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test} import org.apache.spark.sql.catalyst.expressions.{Coalesce, Literal, UnsafeRow} import org.apache.spark.sql.catalyst.parser.ParseException +import org.apache.spark.sql.execution.datasources.parquet.SparkToParquetSchemaConverter import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.expressions.SparkUserDefinedFunction import org.apache.spark.sql.functions._ @@ -962,6 +963,24 @@ class QueryCompilationErrorsSuite "methodName" -> "update", "className" -> "org.apache.spark.sql.catalyst.expressions.UnsafeRow")) } + + test("INTERNAL_ERROR: Convert unsupported data type from Spark to Parquet") { +val convert