[spark] branch master updated: [MINOR] Fix typo `Exlude` to `Exclude` in `HealthTracker`
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 720fe2f7e60 [MINOR] Fix typo `Exlude` to `Exclude` in `HealthTracker` 720fe2f7e60 is described below commit 720fe2f7e6054ba25bd06fcc154127c74d057c5d Author: sychen AuthorDate: Tue Jan 31 08:13:44 2023 -0600 [MINOR] Fix typo `Exlude` to `Exclude` in `HealthTracker` ### What changes were proposed in this pull request? Fix typo ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? exist UT Closes #39798 from cxzl25/typo_HealthTracker. Authored-by: sychen Signed-off-by: Sean Owen --- .../scala/org/apache/spark/scheduler/HealthTracker.scala | 12 ++-- .../org/apache/spark/scheduler/HealthTrackerSuite.scala | 6 +++--- .../spark/deploy/yarn/YarnAllocatorNodeHealthTracker.scala | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala index 6bd5668651a..d7ddeade2fd 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala @@ -62,7 +62,7 @@ private[scheduler] class HealthTracker ( HealthTracker.validateExcludeOnFailureConfs(conf) private val MAX_FAILURES_PER_EXEC = conf.get(config.MAX_FAILURES_PER_EXEC) private val MAX_FAILED_EXEC_PER_NODE = conf.get(config.MAX_FAILED_EXEC_PER_NODE) - val EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS = HealthTracker.getExludeOnFailureTimeout(conf) + val EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS = HealthTracker.getExcludeOnFailureTimeout(conf) private val EXCLUDE_FETCH_FAILURE_ENABLED = conf.get(config.EXCLUDE_ON_FAILURE_FETCH_FAILURE_ENABLED) private val EXCLUDE_ON_FAILURE_DECOMMISSION_ENABLED = @@ -93,7 +93,7 @@ private[scheduler] class HealthTracker ( * remove from this when executors are removed from spark, so we can track when we get multiple * successive excluded executors on one node. Nonetheless, it will not grow too large because * there cannot be many excluded executors on one node, before we stop requesting more - * executors on that node, and we clean up the list of exluded executors once an executor has + * executors on that node, and we clean up the list of excluded executors once an executor has * been excluded for EXCLUDE_ON_FAILURE_TIMEOUT_MILLIS. */ val nodeToExcludedExecs = new HashMap[String, HashSet[String]]() @@ -110,7 +110,7 @@ private[scheduler] class HealthTracker ( // Apply the timeout to excluded nodes and executors val execsToInclude = executorIdToExcludedStatus.filter(_._2.expiryTime < now).keys if (execsToInclude.nonEmpty) { -// Include any executors that have been exluded longer than the excludeOnFailure timeout. +// Include any executors that have been excluded longer than the excludeOnFailure timeout. logInfo(s"Removing executors $execsToInclude from exclude list because the " + s"the executors have reached the timed out") execsToInclude.foreach { exec => @@ -382,7 +382,7 @@ private[scheduler] class HealthTracker ( /** * Apply the timeout to individual tasks. This is to prevent one-off failures that are very * spread out in time (and likely have nothing to do with problems on the executor) from - * triggering exlusion. However, note that we do *not* remove executors and nodes from + * triggering exclusion. However, note that we do *not* remove executors and nodes from * being excluded as we expire individual task failures -- each have their own timeout. E.g., * suppose: * * timeout = 10, maxFailuresPerExec = 2 @@ -447,7 +447,7 @@ private[spark] object HealthTracker extends Logging { } } - def getExludeOnFailureTimeout(conf: SparkConf): Long = { + def getExcludeOnFailureTimeout(conf: SparkConf): Long = { conf.get(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF).getOrElse { conf.get(config.EXCLUDE_ON_FAILURE_LEGACY_TIMEOUT_CONF).getOrElse { Utils.timeStringAsMs(DEFAULT_TIMEOUT) @@ -484,7 +484,7 @@ private[spark] object HealthTracker extends Logging { } } -val timeout = getExludeOnFailureTimeout(conf) +val timeout = getExcludeOnFailureTimeout(conf) if (timeout <= 0) { // first, figure out where the timeout came from, to include the right conf in the message. conf.get(config.EXCLUDE_ON_FAILURE_TIMEOUT_CONF) match { diff --git a/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/H
[spark-website] branch asf-site updated: Change Gabor Somogyi company
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 2f766afbc Change Gabor Somogyi company 2f766afbc is described below commit 2f766afbcb77e495f0e5e7e6a81458075e6ade96 Author: Gabor Somogyi AuthorDate: Mon Jan 23 07:12:20 2023 -0600 Change Gabor Somogyi company Author: Gabor Somogyi Closes #432 from gaborgsomogyi/company. --- committers.md| 2 +- site/committers.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/committers.md b/committers.md index a16b33d31..827073a0d 100644 --- a/committers.md +++ b/committers.md @@ -74,7 +74,7 @@ navigation: |Kousuke Saruta|NTT Data| |Saisai Shao|Tencent| |Prashant Sharma|IBM| -|Gabor Somogyi|Cloudera| +|Gabor Somogyi|Apple| |Ram Sriharsha|Databricks| |Chao Sun|Apple| |Maciej Szymkiewicz|| diff --git a/site/committers.html b/site/committers.html index 5ac86d8db..c0543233b 100644 --- a/site/committers.html +++ b/site/committers.html @@ -390,7 +390,7 @@ Gabor Somogyi - Cloudera + Apple Ram Sriharsha - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][SHUFFLE] Include IOException in warning log of finalizeShuffleMerge
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 074894ce70e [MINOR][SHUFFLE] Include IOException in warning log of finalizeShuffleMerge 074894ce70e is described below commit 074894ce70e90717cbc81f7e6abc53d10872cda3 Author: Ted Yu AuthorDate: Sat Jan 21 15:56:40 2023 -0600 [MINOR][SHUFFLE] Include IOException in warning log of finalizeShuffleMerge ### What changes were proposed in this pull request? This PR adds `ioe` to the warning log of `finalizeShuffleMerge`. ### Why are the changes needed? With `ioe` logged, user would have more clue as to the root cause. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing test suite. Closes #39654 from tedyu/shuffle-ioe. Authored-by: Ted Yu Signed-off-by: Sean Owen --- .../org/apache/spark/network/shuffle/RemoteBlockPushResolver.java| 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java index fb3f8109a1a..a2e8219228a 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java @@ -814,8 +814,9 @@ public class RemoteBlockPushResolver implements MergedShuffleFileManager { } } catch (IOException ioe) { logger.warn("{} attempt {} shuffle {} shuffleMerge {}: exception while " + -"finalizing shuffle partition {}", msg.appId, msg.appAttemptId, msg.shuffleId, -msg.shuffleMergeId, partition.reduceId); +"finalizing shuffle partition {}. Exception message: {}", msg.appId, +msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId, partition.reduceId, +ioe.getMessage()); } finally { partition.closeAllFilesAndDeleteIfNeeded(false); } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (68af2fd106e -> e969bb2b7bc)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 68af2fd106e [SPARK-42082][SPARK-41598][PYTHON][CONNECT] Introduce `PySparkValueError` and `PySparkTypeError` add e969bb2b7bc [SPARK-41683][CORE] Fix issue of getting incorrect property numActiveStages in jobs API No new revisions were added by this update. Summary of changes: .../org/apache/spark/status/AppStatusListener.scala | 1 - .../apache/spark/status/AppStatusListenerSuite.scala | 19 +++ 2 files changed, 19 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (d8d604bc07b -> 9549898948c)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from d8d604bc07b [SPARK-40599][SQL] Add multiTransform methods to TreeNode to generate alternatives add 9549898948c [SPARK-42092][BUILD] Upgrade RoaringBitmap to 0.9.38 No new revisions were added by this update. Summary of changes: core/benchmarks/MapStatusesConvertBenchmark-jdk11-results.txt | 10 +- core/benchmarks/MapStatusesConvertBenchmark-jdk17-results.txt | 8 core/benchmarks/MapStatusesConvertBenchmark-results.txt | 10 +- dev/deps/spark-deps-hadoop-2-hive-2.3 | 4 ++-- dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++-- pom.xml | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41047][SQL] Improve docs for round
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 073b23ef2a9 [SPARK-41047][SQL] Improve docs for round 073b23ef2a9 is described below commit 073b23ef2a982370e0ff8836d55361bca3320e37 Author: panbingkun AuthorDate: Wed Jan 11 16:39:25 2023 -0600 [SPARK-41047][SQL] Improve docs for round ### What changes were proposed in this pull request? The pr aims to improve docs for round. ### Why are the changes needed? Reduce user misunderstanding. It is not necessary to enumerate the usage of legacy in the example. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Manual check. Closes #39511 from panbingkun/SPARK-41047. Authored-by: panbingkun Signed-off-by: Sean Owen --- .../org/apache/spark/sql/catalyst/expressions/mathExpressions.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index 16f081a0cc2..9ffc148180a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -1644,8 +1644,6 @@ abstract class RoundBase(child: Expression, scale: Expression, Examples: > SELECT _FUNC_(2.5, 0); 3 - > SELECT _FUNC_(25, -1); - 30 """, since = "1.5.0", group = "math_funcs") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][SQL][YARN] Fix a typo: less then -> less than
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c7ae0657f93 [MINOR][SQL][YARN] Fix a typo: less then -> less than c7ae0657f93 is described below commit c7ae0657f93639dfe1b2996d94f5cabd16adc65d Author: Yuming Wang AuthorDate: Wed Jan 11 11:20:33 2023 -0600 [MINOR][SQL][YARN] Fix a typo: less then -> less than ### What changes were proposed in this pull request? Fix a typo: less then -> less than. ### Why are the changes needed? Fix typo. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? N/A. Closes #39513 from wangyum/typo. Authored-by: Yuming Wang Signed-off-by: Sean Owen --- .../src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 2 +- .../main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 4 ++-- .../org/apache/spark/sql/execution/BaseScriptTransformationExec.scala | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index da4dd0cbb6b..313b19f919d 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -705,7 +705,7 @@ private[yarn] class YarnAllocator( containersToUse: ArrayBuffer[Container], remaining: ArrayBuffer[Container]): Unit = { // Match on the exact resource we requested so there shouldn't be a mismatch, -// we are relying on YARN to return a container with resources no less then we requested. +// we are relying on YARN to return a container with resources no less than we requested. // If we change this, or starting validating the container, be sure the logic covers SPARK-6050. val rpId = getResourceProfileIdFromPriority(allocatedContainer.getPriority) val resourceForRP = rpIdToYarnResource.get(rpId) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 960d7b4599b..eb85aee25ce 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -1707,9 +1707,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit * - Null-safe Equal: '<=>' * - Not Equal: '<>' or '!=' * - Less than: '<' - * - Less then or Equal: '<=' + * - Less than or Equal: '<=' * - Greater than: '>' - * - Greater then or Equal: '>=' + * - Greater than or Equal: '>=' */ override def visitComparison(ctx: ComparisonContext): Expression = withOrigin(ctx) { val left = expression(ctx.left) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala index bfc2bc7cd11..99d59901d58 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala @@ -123,8 +123,8 @@ trait BaseScriptTransformationExec extends UnaryExecNode { .map { case (data, writer) => writer(data) }) } else { // In schema less mode, hive will choose first two output column as output. -// If output column size less then 2, it will return NULL for columns with missing values. -// Here we split row string and choose first 2 values, if values's size less then 2, +// If output column size less than 2, it will return NULL for columns with missing values. +// Here we split row string and choose first 2 values, if values's size less than 2, // we pad NULL value until 2 to make behavior same with hive. val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType) prevLine: String => - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41890][CORE][SQL][UI] Reduce `toSeq` in `RDDOperationGraphWrapperSerializer`/`SparkPlanGraphWrapperSerializer` for Scala 2.13
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6f96a27c965 [SPARK-41890][CORE][SQL][UI] Reduce `toSeq` in `RDDOperationGraphWrapperSerializer`/`SparkPlanGraphWrapperSerializer` for Scala 2.13 6f96a27c965 is described below commit 6f96a27c9653c6028e39051a0288cf661ea4b971 Author: yangjie01 AuthorDate: Fri Jan 6 08:18:33 2023 -0600 [SPARK-41890][CORE][SQL][UI] Reduce `toSeq` in `RDDOperationGraphWrapperSerializer`/`SparkPlanGraphWrapperSerializer` for Scala 2.13 ### What changes were proposed in this pull request? Similar to SPARK-41709, this pr aims to avoid `toSeq` in `RDDOperationGraphWrapperSerializer` and `SparkPlanGraphWrapperSerializer` to make no performance difference between Scala 2.13 and Scala 2.12 when create ui objects from protobuf objects, the `Seq` in related `ui` class also explicitly defined as `collection.Seq` in this pr. ### Why are the changes needed? Avoid collection conversion when creating ui objects from protobuf objects for Scala 2.13. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #39399 from LuciferYang/SPARK-41709-FOLLOWUP. Authored-by: yangjie01 Signed-off-by: Sean Owen --- .../protobuf/RDDOperationGraphWrapperSerializer.scala | 10 +- .../src/main/scala/org/apache/spark/status/storeTypes.scala | 10 +- .../scala/org/apache/spark/ui/scope/RDDOperationGraph.scala | 6 +++--- project/MimaExcludes.scala | 13 - .../spark/sql/execution/ui/SQLAppStatusListener.scala | 3 ++- .../apache/spark/sql/execution/ui/SQLAppStatusStore.scala | 8 .../org/apache/spark/sql/execution/ui/SparkPlanGraph.scala | 9 + .../org/apache/spark/status/api/v1/sql/SqlResource.scala| 7 --- .../main/scala/org/apache/spark/status/api/v1/sql/api.scala | 6 +++--- .../protobuf/sql/SparkPlanGraphWrapperSerializer.scala | 10 +- .../apache/spark/status/api/v1/sql/SqlResourceSuite.scala | 2 +- 11 files changed, 49 insertions(+), 35 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/RDDOperationGraphWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/RDDOperationGraphWrapperSerializer.scala index 44622514ac9..c0d86ede198 100644 --- a/core/src/main/scala/org/apache/spark/status/protobuf/RDDOperationGraphWrapperSerializer.scala +++ b/core/src/main/scala/org/apache/spark/status/protobuf/RDDOperationGraphWrapperSerializer.scala @@ -49,9 +49,9 @@ class RDDOperationGraphWrapperSerializer extends ProtobufSerDe { val wrapper = StoreTypes.RDDOperationGraphWrapper.parseFrom(bytes) new RDDOperationGraphWrapper( stageId = wrapper.getStageId.toInt, - edges = wrapper.getEdgesList.asScala.map(deserializeRDDOperationEdge).toSeq, - outgoingEdges = wrapper.getOutgoingEdgesList.asScala.map(deserializeRDDOperationEdge).toSeq, - incomingEdges = wrapper.getIncomingEdgesList.asScala.map(deserializeRDDOperationEdge).toSeq, + edges = wrapper.getEdgesList.asScala.map(deserializeRDDOperationEdge), + outgoingEdges = wrapper.getOutgoingEdgesList.asScala.map(deserializeRDDOperationEdge), + incomingEdges = wrapper.getIncomingEdgesList.asScala.map(deserializeRDDOperationEdge), rootCluster = deserializeRDDOperationClusterWrapper(wrapper.getRootCluster) ) } @@ -75,9 +75,9 @@ class RDDOperationGraphWrapperSerializer extends ProtobufSerDe { new RDDOperationClusterWrapper( id = op.getId, name = op.getName, - childNodes = op.getChildNodesList.asScala.map(deserializeRDDOperationNode).toSeq, + childNodes = op.getChildNodesList.asScala.map(deserializeRDDOperationNode), childClusters = - op.getChildClustersList.asScala.map(deserializeRDDOperationClusterWrapper).toSeq + op.getChildClustersList.asScala.map(deserializeRDDOperationClusterWrapper) ) } diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala index 08bc4c89b47..b53455207a0 100644 --- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala +++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala @@ -490,8 +490,8 @@ private[spark] class StreamBlockData( private[spark] class RDDOperationClusterWrapper( val id: String, val name: String, -val childNodes: Seq[RDDOperationNode], -val childClusters: Seq[RDDOperationClusterWrapper]) { +val childNodes: collection.Seq[RDDOperationNode], +val childClusters: collection.Seq[RDDOperationClusterWrapper]) { def toRDDOperationCluster(): RDDOperationCluster
[spark] branch master updated: [SPARK-41800][BUILD] Upgrade commons-compress to 1.22
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 60e87b68d21 [SPARK-41800][BUILD] Upgrade commons-compress to 1.22 60e87b68d21 is described below commit 60e87b68d2106b8e62144b83bcd45281a87aa5e3 Author: panbingkun AuthorDate: Fri Jan 6 08:17:55 2023 -0600 [SPARK-41800][BUILD] Upgrade commons-compress to 1.22 ### What changes were proposed in this pull request? The pr aims to upgrade commons-compress from 1.21 to 1.22 ### Why are the changes needed? This will bring the latest improvements. https://user-images.githubusercontent.com/15246973/210133282-18ae7d50-9076-4c95-a9e3-9ac6a266cda0.png";> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #39326 from panbingkun/SPARK-41800. Authored-by: panbingkun Signed-off-by: Sean Owen --- core/pom.xml | 4 dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index fb032064eda..ba8c7824361 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -181,6 +181,10 @@ commons-codec commons-codec + + org.apache.commons + commons-compress + org.apache.commons commons-lang3 diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index a1fd06003bb..49c6742005c 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -39,7 +39,7 @@ commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-collections4/4.4//commons-collections4-4.4.jar commons-compiler/3.1.9//commons-compiler-3.1.9.jar -commons-compress/1.21//commons-compress-1.21.jar +commons-compress/1.22//commons-compress-1.22.jar commons-configuration/1.6//commons-configuration-1.6.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar commons-dbcp/1.4//commons-dbcp-1.4.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index adf9ec9452b..6f7b6372720 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -40,7 +40,7 @@ commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-collections4/4.4//commons-collections4-4.4.jar commons-compiler/3.1.9//commons-compiler-3.1.9.jar -commons-compress/1.21//commons-compress-1.21.jar +commons-compress/1.22//commons-compress-1.22.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar commons-dbcp/1.4//commons-dbcp-1.4.jar commons-io/2.11.0//commons-io-2.11.0.jar diff --git a/pom.xml b/pom.xml index a7a88543b96..5e28fd4edfe 100644 --- a/pom.xml +++ b/pom.xml @@ -188,7 +188,7 @@ 1.1.8.4 3.0.3 1.15 -1.21 +1.22 2.11.0 2.6 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Update 3.4 Release Window
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 563ab1396 Update 3.4 Release Window 563ab1396 is described below commit 563ab1396ccffcc4e070d67c7668909632ba73dc Author: Xinrong Meng AuthorDate: Fri Jan 6 07:35:36 2023 -0600 Update 3.4 Release Window Per the discussion via email, we postpone one day to cut banch-3.4 since January 15th, 2023 is a Sunday. Author: Xinrong Meng Closes #430 from xinrong-meng/3.4_cut. --- site/versioning-policy.html | 2 +- versioning-policy.md| 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/site/versioning-policy.html b/site/versioning-policy.html index 271f72a52..9923954ab 100644 --- a/site/versioning-policy.html +++ b/site/versioning-policy.html @@ -261,7 +261,7 @@ in between feature releases. Major releases do not happen according to a fixed s - January 15th 2023 + January 16th 2023 Code freeze. Release branch cut. diff --git a/versioning-policy.md b/versioning-policy.md index c1136de67..c5fbe1196 100644 --- a/versioning-policy.md +++ b/versioning-policy.md @@ -107,7 +107,7 @@ in between feature releases. Major releases do not happen according to a fixed s | Date | Event | | - | - | -| January 15th 2023 | Code freeze. Release branch cut.| +| January 16th 2023 | Code freeze. Release branch cut.| | Late January 2023 | QA period. Focus on bug fixes, tests, stability and docs. Generally, no new features merged.| | February 2023 | Release candidates (RC), voting, etc. until final release passes| - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41883][BUILD] Upgrade dropwizard metrics 4.2.15
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new eee9428ea76 [SPARK-41883][BUILD] Upgrade dropwizard metrics 4.2.15 eee9428ea76 is described below commit eee9428ea76f8f5603117d8be58028b11d75ff24 Author: yangjie01 AuthorDate: Thu Jan 5 07:32:00 2023 -0600 [SPARK-41883][BUILD] Upgrade dropwizard metrics 4.2.15 ### What changes were proposed in this pull request? This pr aims upgrade dropwizard metrics to 4.2.15. ### Why are the changes needed? The release notes as follows: - https://github.com/dropwizard/metrics/releases/tag/v4.2.14 - https://github.com/dropwizard/metrics/releases/tag/v4.2.15 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions Closes #39391 from LuciferYang/SPARK-41883. Authored-by: yangjie01 Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 10 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 10 +- pom.xml | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index e1141fbc558..a1fd06003bb 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -194,11 +194,11 @@ log4j-slf4j2-impl/2.19.0//log4j-slf4j2-impl-2.19.0.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar -metrics-core/4.2.13//metrics-core-4.2.13.jar -metrics-graphite/4.2.13//metrics-graphite-4.2.13.jar -metrics-jmx/4.2.13//metrics-jmx-4.2.13.jar -metrics-json/4.2.13//metrics-json-4.2.13.jar -metrics-jvm/4.2.13//metrics-jvm-4.2.13.jar +metrics-core/4.2.15//metrics-core-4.2.15.jar +metrics-graphite/4.2.15//metrics-graphite-4.2.15.jar +metrics-jmx/4.2.15//metrics-jmx-4.2.15.jar +metrics-json/4.2.15//metrics-json-4.2.15.jar +metrics-jvm/4.2.15//metrics-jvm-4.2.15.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.86.Final//netty-all-4.1.86.Final.jar netty-buffer/4.1.86.Final//netty-buffer-4.1.86.Final.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index d4157917e43..adf9ec9452b 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -178,11 +178,11 @@ log4j-slf4j2-impl/2.19.0//log4j-slf4j2-impl-2.19.0.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar -metrics-core/4.2.13//metrics-core-4.2.13.jar -metrics-graphite/4.2.13//metrics-graphite-4.2.13.jar -metrics-jmx/4.2.13//metrics-jmx-4.2.13.jar -metrics-json/4.2.13//metrics-json-4.2.13.jar -metrics-jvm/4.2.13//metrics-jvm-4.2.13.jar +metrics-core/4.2.15//metrics-core-4.2.15.jar +metrics-graphite/4.2.15//metrics-graphite-4.2.15.jar +metrics-jmx/4.2.15//metrics-jmx-4.2.15.jar +metrics-json/4.2.15//metrics-json-4.2.15.jar +metrics-jvm/4.2.15//metrics-jvm-4.2.15.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.86.Final//netty-all-4.1.86.Final.jar netty-buffer/4.1.86.Final//netty-buffer-4.1.86.Final.jar diff --git a/pom.xml b/pom.xml index 20a334e8c4b..e2ae0631f80 100644 --- a/pom.xml +++ b/pom.xml @@ -152,7 +152,7 @@ If you changes codahale.metrics.version, you also need to change the link to metrics.dropwizard.io in docs/monitoring.md. --> -4.2.13 +4.2.15 1.11.1 1.12.0 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (23e3c9b7c2f -> d0a598922e9)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 23e3c9b7c2f [SPARK-41828][CONNECT][PYTHON] Make `createDataFrame` support empty dataframe add d0a598922e9 [MINOR] Fix a typo "from from" -> "from" No new revisions were added by this update. Summary of changes: dev/ansible-for-test-node/roles/jenkins-worker/README.md | 2 +- docs/running-on-yarn.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (3c40be2dddc -> ec594236df4)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 3c40be2dddc [SPARK-41405][SQL] Centralize the column resolution logic add ec594236df4 [SPARK-41853][CORE] Use Map in place of SortedMap for ErrorClassesJsonReader No new revisions were added by this update. Summary of changes: core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SQL][MINOR] Use Diamond operator for constructing HashMap
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 18488158bee [SQL][MINOR] Use Diamond operator for constructing HashMap 18488158bee is described below commit 18488158beee5435f99899f99b2e90fb6e37f3d5 Author: Ted Yu AuthorDate: Thu Dec 29 08:07:50 2022 -0600 [SQL][MINOR] Use Diamond operator for constructing HashMap ### What changes were proposed in this pull request? This PR uses Diamond operator for constructing HashMap and Tuple2 for type inference. ### Why are the changes needed? The change follows Java practices for creating new HashMap. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing test suite. Closes #39250 from tedyu/hash-map. Authored-by: Ted Yu Signed-off-by: Sean Owen --- .../test/org/apache/spark/sql/JavaBeanDeserializationSuite.java | 6 +++--- .../java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java index da626b4d873..66c985bdda0 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java @@ -590,9 +590,9 @@ public class JavaBeanDeserializationSuite implements Serializable { .reduceGroups(rf); List> expectedRecords = Arrays.asList( -new Tuple2("a", new Item("a", 8)), -new Tuple2("b", new Item("b", 3)), -new Tuple2("c", new Item("c", 2))); +new Tuple2<>("a", new Item("a", 8)), +new Tuple2<>("b", new Item("b", 3)), +new Tuple2<>("c", new Item("c", 2))); List> result = finalDs.collectAsList(); diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java index 1836cc403c3..bee77616b7e 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java @@ -86,7 +86,7 @@ public class JavaColumnExpressionSuite { AnalysisException e = Assert.assertThrows(AnalysisException.class, () -> df.filter(df.col("a").isInCollection(Arrays.asList(new Column("b"); Assert.assertTrue(e.getErrorClass().equals("DATATYPE_MISMATCH.DATA_DIFF_TYPES")); -Map messageParameters = new HashMap(); +Map messageParameters = new HashMap<>(); messageParameters.put("functionName", "`in`"); messageParameters.put("dataType", "[\"INT\", \"ARRAY\"]"); messageParameters.put("sqlExpr", "\"(a IN (b))\""); - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (ba4a2abdac8 -> 8cceb3946bd)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from ba4a2abdac8 [SPARK-41750][BUILD] Upgrade `dev.ludovic.netlib` to 3.0.3 add 8cceb3946bd [SPARK-41709][CORE][SQL][UI] Explicitly define `Seq` as `collection.Seq` to avoid `toSeq` when create ui objects from protobuf objects for Scala 2.13 No new revisions were added by this update. Summary of changes: .../org/apache/spark/scheduler/SparkListener.scala | 3 ++- .../apache/spark/status/AppStatusListener.scala| 2 +- .../org/apache/spark/status/AppStatusStore.scala | 2 +- .../scala/org/apache/spark/status/LiveEntity.scala | 4 ++-- .../scala/org/apache/spark/status/api/v1/api.scala | 24 +++--- ...plicationEnvironmentInfoWrapperSerializer.scala | 12 +-- .../ApplicationInfoWrapperSerializer.scala | 2 +- .../status/protobuf/JobDataWrapperSerializer.scala | 2 +- .../protobuf/RDDStorageInfoWrapperSerializer.scala | 6 +++--- .../protobuf/TaskDataWrapperSerializer.scala | 2 +- .../scala/org/apache/spark/status/storeTypes.scala | 2 +- .../scala/org/apache/spark/ui/PagedTable.scala | 4 ++-- .../main/scala/org/apache/spark/ui/UIUtils.scala | 6 -- .../org/apache/spark/ui/storage/RDDPage.scala | 6 +++--- .../main/scala/org/apache/spark/util/Utils.scala | 13 +++- .../org/apache/spark/util/JsonProtocolSuite.scala | 12 +++ project/MimaExcludes.scala | 22 +++- .../org/apache/spark/sql/internal/SQLConf.scala| 2 +- .../sql/execution/ui/SQLAppStatusListener.scala| 2 +- .../spark/sql/execution/ui/SQLAppStatusStore.scala | 2 +- .../sql/SQLExecutionUIDataSerializer.scala | 2 +- 21 files changed, 81 insertions(+), 51 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41714][BUILD] Update maven-checkstyle-plugin from 3.1.2 to 3.2.0
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 37788b780a5 [SPARK-41714][BUILD] Update maven-checkstyle-plugin from 3.1.2 to 3.2.0 37788b780a5 is described below commit 37788b780a5b1998676672c2722a00801c3326e4 Author: panbingkun AuthorDate: Tue Dec 27 12:52:05 2022 -0600 [SPARK-41714][BUILD] Update maven-checkstyle-plugin from 3.1.2 to 3.2.0 ### What changes were proposed in this pull request? This PR aims to update maven-checkstyle-plugin from 3.1.2 to 3.2.0. ### Why are the changes needed? This will bring the latest bug fixes. v3.1.2 VS v3.2.0 https://github.com/apache/maven-checkstyle-plugin/compare/maven-checkstyle-plugin-3.1.2...maven-checkstyle-plugin-3.2.0 https://user-images.githubusercontent.com/15246973/209523129-5af8ddd5-75e6-4e18-9b57-c0462d9970af.png";> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #39218 from panbingkun/SPARK-41714. Authored-by: panbingkun Signed-off-by: Sean Owen --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 46142c4e50b..783893e71c6 100644 --- a/pom.xml +++ b/pom.xml @@ -3312,7 +3312,7 @@ org.apache.maven.plugins maven-checkstyle-plugin -3.1.2 +3.2.0 false true - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [CORE][MINOR] Correct spelling for RPC in log
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 02fdefddcfa [CORE][MINOR] Correct spelling for RPC in log 02fdefddcfa is described below commit 02fdefddcfa29aedc3b41548b61c4f8f0fd6c995 Author: Ted Yu AuthorDate: Sat Dec 24 08:38:43 2022 -0600 [CORE][MINOR] Correct spelling for RPC in log ### What changes were proposed in this pull request? This PR corrects spelling mistake for `RPC` in log for `sendRpc`. Similar error in context.R is also fixed. ### Why are the changes needed? The spelling mistake confuses users. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing test suite Closes #39200 from tedyu/trans-rpc. Authored-by: Ted Yu Signed-off-by: Sean Owen --- R/pkg/R/context.R | 2 +- .../src/main/java/org/apache/spark/network/client/TransportClient.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R index cca6c2c817d..eea83aa5ab5 100644 --- a/R/pkg/R/context.R +++ b/R/pkg/R/context.R @@ -170,7 +170,7 @@ parallelize <- function(sc, coll, numSlices = 1) { serializedSlices <- lapply(slices, serialize, connection = NULL) # The RPC backend cannot handle arguments larger than 2GB (INT_MAX) - # If serialized data is safely less than that threshold we send it over the PRC channel. + # If serialized data is safely less than that threshold we send it over the RPC channel. # Otherwise, we write it to a file and send the file name if (objectSize < sizeLimit) { jrdd <- callJStatic("org.apache.spark.api.r.RRDD", "createRDDFromArray", sc, serializedSlices) diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java index dd2fdb08ee5..4a0a1566998 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java +++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java @@ -274,7 +274,7 @@ public class TransportClient implements Closeable { copy.flip(); result.set(copy); } catch (Throwable t) { - logger.warn("Error in responding PRC callback", t); + logger.warn("Error in responding RPC callback", t); result.setException(t); } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41008][MLLIB] Follow-up isotonic regression features deduplica…
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f92c827acab [SPARK-41008][MLLIB] Follow-up isotonic regression features deduplica… f92c827acab is described below commit f92c827acabccf547d5a1dff4f7ec371bc370230 Author: Ahmed Mahran AuthorDate: Sun Dec 11 15:01:15 2022 -0600 [SPARK-41008][MLLIB] Follow-up isotonic regression features deduplica… ### What changes were proposed in this pull request? A follow-up on https://github.com/apache/spark/pull/38966 to update relevant documentation and remove redundant sort key. ### Why are the changes needed? For isotonic regression, another method for breaking ties of repeated features was introduced in https://github.com/apache/spark/pull/38966. This will aggregate points having the same feature value by computing the weighted average of the labels. - This only requires points to be sorted by features instead of features and labels. So, we should remove label as a secondary sorting key. - Isotonic regression documentation needs to be updated to reflect the new behavior. ### Does this PR introduce _any_ user-facing change? Isotonic regression documentation update. The documentation described the behavior of the algorithm when there are points in the input with repeated features. Since this behavior has changed, documentation needs to describe the new behavior. ### How was this patch tested? Existing tests passed. No need to add new tests since existing tests are already comprehensive. srowen Closes #38996 from ahmed-mahran/ml-isotonic-reg-dups-follow-up. Authored-by: Ahmed Mahran Signed-off-by: Sean Owen --- docs/mllib-isotonic-regression.md | 18 ++--- .../mllib/regression/IsotonicRegression.scala | 82 ++ .../mllib/regression/IsotonicRegressionSuite.scala | 29 +--- 3 files changed, 67 insertions(+), 62 deletions(-) diff --git a/docs/mllib-isotonic-regression.md b/docs/mllib-isotonic-regression.md index 95be32a819e..711e828bd80 100644 --- a/docs/mllib-isotonic-regression.md +++ b/docs/mllib-isotonic-regression.md @@ -43,7 +43,14 @@ best fitting the original data points. which uses an approach to [parallelizing isotonic regression](https://doi.org/10.1007/978-3-642-99789-1_10). The training input is an RDD of tuples of three double values that represent -label, feature and weight in this order. Additionally, IsotonicRegression algorithm has one +label, feature and weight in this order. In case there are multiple tuples with +the same feature then these tuples are aggregated into a single tuple as follows: + +* Aggregated label is the weighted average of all labels. +* Aggregated feature is the unique feature value. +* Aggregated weight is the sum of all weights. + +Additionally, IsotonicRegression algorithm has one optional parameter called $isotonic$ defaulting to true. This argument specifies if the isotonic regression is isotonic (monotonically increasing) or antitonic (monotonically decreasing). @@ -53,17 +60,12 @@ labels for both known and unknown features. The result of isotonic regression is treated as piecewise linear function. The rules for prediction therefore are: * If the prediction input exactly matches a training feature - then associated prediction is returned. In case there are multiple predictions with the same - feature then one of them is returned. Which one is undefined - (same as java.util.Arrays.binarySearch). + then associated prediction is returned. * If the prediction input is lower or higher than all training features then prediction with lowest or highest feature is returned respectively. - In case there are multiple predictions with the same feature - then the lowest or highest is returned respectively. * If the prediction input falls between two training features then prediction is treated as piecewise linear function and interpolated value is calculated from the - predictions of the two closest features. In case there are multiple values - with the same feature then the same rules as in previous point are used. + predictions of the two closest features. ### Examples diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index 0b2bf147501..fbf0dc9c357 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -23,7 +23,6 @@ import java.util.Arrays.binarySearch import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer -import org.apache.commons.math3
[spark] branch branch-3.3 updated: [SPARK-41458][BUILD][YARN][SHUFFLE] Correctly transform the SPI services for Yarn Shuffle Service
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 88d20e4c71e [SPARK-41458][BUILD][YARN][SHUFFLE] Correctly transform the SPI services for Yarn Shuffle Service 88d20e4c71e is described below commit 88d20e4c71e757103984316eaab283c4169aa38c Author: Cheng Pan AuthorDate: Fri Dec 9 08:14:17 2022 -0600 [SPARK-41458][BUILD][YARN][SHUFFLE] Correctly transform the SPI services for Yarn Shuffle Service ### What changes were proposed in this pull request? Correctly transform the SPI services for Yarn Shuffle Service by configuring `ServicesResourceTransformer`. ### Why are the changes needed? SPARK-12807 relocated the Jackson classes, but did not handle SPI services properly. It affects Spark 2.0 and above, so this PR is for 3.2/3.3/master. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually checked the output jar. Before: https://user-images.githubusercontent.com/26535726/206632421-acbec562-c600-4497-83a3-f9b2f6caba74.png";> After: https://user-images.githubusercontent.com/26535726/206632440-4c8ed745-dbc8-4b6e-a9e7-f285521aa8b4.png";> Closes #38989 from pan3793/SPARK-41458. Authored-by: Cheng Pan Signed-off-by: Sean Owen (cherry picked from commit be52d67fbe98110eeabf1b2a7c16741dceefdca6) Signed-off-by: Sean Owen --- common/network-yarn/pom.xml | 5 + 1 file changed, 5 insertions(+) diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 81146a36c98..cded75b2f8e 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -136,6 +136,11 @@ shade + + + + + - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41458][BUILD][YARN][SHUFFLE] Correctly transform the SPI services for Yarn Shuffle Service
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new be52d67fbe9 [SPARK-41458][BUILD][YARN][SHUFFLE] Correctly transform the SPI services for Yarn Shuffle Service be52d67fbe9 is described below commit be52d67fbe98110eeabf1b2a7c16741dceefdca6 Author: Cheng Pan AuthorDate: Fri Dec 9 08:14:17 2022 -0600 [SPARK-41458][BUILD][YARN][SHUFFLE] Correctly transform the SPI services for Yarn Shuffle Service ### What changes were proposed in this pull request? Correctly transform the SPI services for Yarn Shuffle Service by configuring `ServicesResourceTransformer`. ### Why are the changes needed? SPARK-12807 relocated the Jackson classes, but did not handle SPI services properly. It affects Spark 2.0 and above, so this PR is for 3.2/3.3/master. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually checked the output jar. Before: https://user-images.githubusercontent.com/26535726/206632421-acbec562-c600-4497-83a3-f9b2f6caba74.png";> After: https://user-images.githubusercontent.com/26535726/206632440-4c8ed745-dbc8-4b6e-a9e7-f285521aa8b4.png";> Closes #38989 from pan3793/SPARK-41458. Authored-by: Cheng Pan Signed-off-by: Sean Owen --- common/network-yarn/pom.xml | 5 + 1 file changed, 5 insertions(+) diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index a4073969dbf..a77732bb8b8 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -136,6 +136,11 @@ shade + + + + + - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41008][MLLIB] Dedup isotonic regression duplicate features
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 3d05c7e037e [SPARK-41008][MLLIB] Dedup isotonic regression duplicate features 3d05c7e037e is described below commit 3d05c7e037eff79de8ef9f6231aca8340bcc65ef Author: Ahmed Mahran AuthorDate: Thu Dec 8 08:28:48 2022 -0600 [SPARK-41008][MLLIB] Dedup isotonic regression duplicate features ### What changes were proposed in this pull request? Adding a pre-processing step to isotonic regression in mllib to handle duplicate features. This is to match `sklearn` implementation. Input points of duplicate feature values are aggregated into a single point using as label the weighted average of the labels of the points with duplicate feature values. All points for a unique feature values are aggregated as: - Aggregated label is the weighted average of all labels - Aggregated feature is the weighted average of all equal features. It is possible that feature values to be equal up to a resolution due to representation errors, since we cannot know which feature value to use in that case, we compute the weighted average of the features. Ideally, all feature values will be equal and the weighted average is just the value at any point. - Aggregated weight is the sum of all weights ### Why are the changes needed? As per discussion on ticket [[SPARK-41008]](https://issues.apache.org/jira/browse/SPARK-41008), it is a bug and results should match `sklearn`. ### Does this PR introduce _any_ user-facing change? There are no changes to the API, documentation or error messages. However, the user should expect results to change. ### How was this patch tested? Existing test cases for duplicate features failed. These tests were adjusted accordingly. Also, new tests are added. Here is a python snippet that can be used to verify the results: ```python from sklearn.isotonic import IsotonicRegression def test(x, y, x_test, isotonic=True): ir = IsotonicRegression(out_of_bounds='clip', increasing=isotonic).fit(x, y) y_test = ir.predict(x_test) def print_array(label, a): print(f"{label}: [{', '.join([str(i) for i in a])}]") print_array("boundaries", ir.X_thresholds_) print_array("predictions", ir.y_thresholds_) print_array("y_test", y_test) test( x = [0.6, 0.6, 0.333, 0.333, 0.333, 0.20, 0.20, 0.20, 0.20], y = [1, 0, 0, 1, 0, 1, 0, 0, 0], x_test = [0.6, 0.6, 0.333, 0.333, 0.333, 0.20, 0.20, 0.20, 0.20] ) ``` srowen zapletal-martin Closes #38966 from ahmed-mahran/ml-isotonic-reg-dups. Authored-by: Ahmed Mahran Signed-off-by: Sean Owen --- .../mllib/regression/IsotonicRegression.scala | 141 +--- .../mllib/regression/IsotonicRegressionSuite.scala | 180 - 2 files changed, 262 insertions(+), 59 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index 649f9816e6a..0b2bf147501 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.spark.mllib.regression import java.io.Serializable @@ -24,6 +23,7 @@ import java.util.Arrays.binarySearch import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer +import org.apache.commons.math3.util.Precision import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ @@ -307,6 +307,65 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]]) } + /** + * Aggregates points of duplicate feature values into a single point using as label the weighted + * average of the labels of the points with duplicate feature values. All points for a unique + * feature values are aggregated as: + * + * - Aggregated label is the weighted average of all labels + * - Aggregated feature is the weighted average of all equal features[1] + * - Aggregated weight is the sum of all weights + * + * [1] Note: It is possible that feature values to be equal up to a resolution due to + * representation errors, since we cannot know which feature value to use in that case, we + * compute the weighted average of the featur
[spark] branch master updated: [SPARK-41408][BUILD] Upgrade scala-maven-plugin to 4.8.0
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new a3a755d3613 [SPARK-41408][BUILD] Upgrade scala-maven-plugin to 4.8.0 a3a755d3613 is described below commit a3a755d36136295473a4873a6df33c295c29213e Author: yangjie01 AuthorDate: Thu Dec 8 07:41:19 2022 -0600 [SPARK-41408][BUILD] Upgrade scala-maven-plugin to 4.8.0 ### What changes were proposed in this pull request? This pr aims upgrade scala-maven-plugin to 4.8.0 ### Why are the changes needed? This version upgrade zinc to 1.8.0 and inlcude some bug fix, the all change from 4.7.2 as follows: - https://github.com/davidB/scala-maven-plugin/compare/4.7.2...4.8.0 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38936 from LuciferYang/sm-plugin-480. Authored-by: yangjie01 Signed-off-by: Sean Owen --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index b2e5979f467..a93954b3c5d 100644 --- a/pom.xml +++ b/pom.xml @@ -175,7 +175,7 @@ errors building different Hadoop versions. See: SPARK-36547, SPARK-38394. --> -4.7.2 +4.8.0 true true - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41376][CORE] Correct the Netty preferDirectBufs check logic on executor start
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e6be3002fe8 [SPARK-41376][CORE] Correct the Netty preferDirectBufs check logic on executor start e6be3002fe8 is described below commit e6be3002fe8fbba6b29783363c91dcb0982c4ddb Author: Cheng Pan AuthorDate: Wed Dec 7 18:15:21 2022 -0600 [SPARK-41376][CORE] Correct the Netty preferDirectBufs check logic on executor start ### What changes were proposed in this pull request? Fix the condition for judging Netty prefer direct memory on executor start, the logic should match `org.apache.spark.network.client.TransportClientFactory`. ### Why are the changes needed? The check logical was added in SPARK-27991, the original intention is to avoid potential Netty OOM issue when Netty uses direct memory to consume shuffle data, but the condition is not sufficient, this PR updates the logic to match `org.apache.spark.network.client.TransportClientFactory` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manual testing. Closes #38901 from pan3793/SPARK-41376. Authored-by: Cheng Pan Signed-off-by: Sean Owen --- .../java/org/apache/spark/network/util/NettyUtils.java | 14 ++ .../spark/executor/CoarseGrainedExecutorBackend.scala | 5 - 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java index 4f070f02a12..cc4657efe39 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java @@ -179,4 +179,18 @@ public class NettyUtils { allowCache ? PooledByteBufAllocator.defaultUseCacheForAllThreads() : false ); } + + /** + * ByteBuf allocator prefers to allocate direct ByteBuf iif both Spark allows to create direct + * ByteBuf and Netty enables directBufferPreferred. + */ + public static boolean preferDirectBufs(TransportConf conf) { +boolean allowDirectBufs; +if (conf.sharedByteBufAllocators()) { + allowDirectBufs = conf.preferDirectBufsForSharedByteBufAllocators(); +} else { + allowDirectBufs = conf.preferDirectBufs(); +} +return allowDirectBufs && PlatformDependent.directBufferPreferred(); + } } diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index a94e63656e1..4903421f906 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -35,6 +35,8 @@ import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.worker.WorkerWatcher import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ +import org.apache.spark.network.netty.SparkTransportConf +import org.apache.spark.network.util.NettyUtils import org.apache.spark.resource.ResourceInformation import org.apache.spark.resource.ResourceProfile import org.apache.spark.resource.ResourceProfile._ @@ -85,7 +87,8 @@ private[spark] class CoarseGrainedExecutorBackend( logInfo("Connecting to driver: " + driverUrl) try { - if (PlatformDependent.directBufferPreferred() && + val shuffleClientTransportConf = SparkTransportConf.fromSparkConf(env.conf, "shuffle") + if (NettyUtils.preferDirectBufs(shuffleClientTransportConf) && PlatformDependent.maxDirectMemory() < env.conf.get(MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM)) { throw new SparkException(s"Netty direct memory should at least be bigger than " + s"'${MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM.key}', but got " + - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Change download text for spark 3.2.3. from Apache Hadoop 3.3 to Apache Hadoop 3.2
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 402123288 Change download text for spark 3.2.3. from Apache Hadoop 3.3 to Apache Hadoop 3.2 402123288 is described below commit 402123288523e823ad080188fd906dfcbe1b8bc4 Author: Bjørn Jørgensen AuthorDate: Tue Dec 6 11:50:20 2022 -0600 Change download text for spark 3.2.3. from Apache Hadoop 3.3 to Apache Hadoop 3.2 This is only for Apache Spark 3.2.3 Change Apache Hadoop 3.3 to Apache Hadoop 3.2 I have not Make sure that you generate site HTML with `bundle exec jekyll build`, and include the changes to the HTML in your pull request. See README.md for more information. Author: Bjørn Jørgensen Author: Bjørn Closes #429 from bjornjorgensen/patch-1. --- js/downloads.js | 4 ++-- site/js/downloads.js | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/js/downloads.js b/js/downloads.js index 6b6c0e570..4b1ed14ef 100644 --- a/js/downloads.js +++ b/js/downloads.js @@ -14,8 +14,8 @@ function addRelease(version, releaseDate, packages, mirrored) { var sources = {pretty: "Source Code", tag: "sources"}; var hadoopFree = {pretty: "Pre-built with user-provided Apache Hadoop", tag: "without-hadoop"}; var hadoop2p7 = {pretty: "Pre-built for Apache Hadoop 2.7", tag: "hadoop2.7"}; -var hadoop3p3 = {pretty: "Pre-built for Apache Hadoop 3.3 and later", tag: "hadoop3.2"}; -var hadoop3p3scala213 = {pretty: "Pre-built for Apache Hadoop 3.3 and later (Scala 2.13)", tag: "hadoop3.2-scala2.13"}; +var hadoop3p3 = {pretty: "Pre-built for Apache Hadoop 3.2 and later", tag: "hadoop3.2"}; +var hadoop3p3scala213 = {pretty: "Pre-built for Apache Hadoop 3.2 and later (Scala 2.13)", tag: "hadoop3.2-scala2.13"}; var hadoop2p = {pretty: "Pre-built for Apache Hadoop 2.7", tag: "hadoop2"}; var hadoop3p = {pretty: "Pre-built for Apache Hadoop 3.3 and later", tag: "hadoop3"}; var hadoop3pscala213 = {pretty: "Pre-built for Apache Hadoop 3.3 and later (Scala 2.13)", tag: "hadoop3-scala2.13"}; diff --git a/site/js/downloads.js b/site/js/downloads.js index 6b6c0e570..4b1ed14ef 100644 --- a/site/js/downloads.js +++ b/site/js/downloads.js @@ -14,8 +14,8 @@ function addRelease(version, releaseDate, packages, mirrored) { var sources = {pretty: "Source Code", tag: "sources"}; var hadoopFree = {pretty: "Pre-built with user-provided Apache Hadoop", tag: "without-hadoop"}; var hadoop2p7 = {pretty: "Pre-built for Apache Hadoop 2.7", tag: "hadoop2.7"}; -var hadoop3p3 = {pretty: "Pre-built for Apache Hadoop 3.3 and later", tag: "hadoop3.2"}; -var hadoop3p3scala213 = {pretty: "Pre-built for Apache Hadoop 3.3 and later (Scala 2.13)", tag: "hadoop3.2-scala2.13"}; +var hadoop3p3 = {pretty: "Pre-built for Apache Hadoop 3.2 and later", tag: "hadoop3.2"}; +var hadoop3p3scala213 = {pretty: "Pre-built for Apache Hadoop 3.2 and later (Scala 2.13)", tag: "hadoop3.2-scala2.13"}; var hadoop2p = {pretty: "Pre-built for Apache Hadoop 2.7", tag: "hadoop2"}; var hadoop3p = {pretty: "Pre-built for Apache Hadoop 3.3 and later", tag: "hadoop3"}; var hadoop3pscala213 = {pretty: "Pre-built for Apache Hadoop 3.3 and later (Scala 2.13)", tag: "hadoop3-scala2.13"}; - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41313][SPARK-3900][SPARK-21138] Combine fixes for and
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new d62c18b7497 [SPARK-41313][SPARK-3900][SPARK-21138] Combine fixes for and d62c18b7497 is described below commit d62c18b7497997188ec587e1eb62e75c979c1c93 Author: Xing Lin AuthorDate: Sun Dec 4 08:24:37 2022 -0600 [SPARK-41313][SPARK-3900][SPARK-21138] Combine fixes for and ### What changes were proposed in this pull request? spark-3900 fixed the illegalStateException in cleanupStagingDir in ApplicationMaster's shutdownhook. However, spark-21138 accidentally reverted/undid that change when fixing the "Wrong FS" bug. Now, we are seeing spark-3900 reported by our users at Linkedin. We need to bring back the fix for spark-3900. The illegalStateException when creating a new filesystem object is due to the limitation in hadoop that we can not register a shutdownhook during shutdown. So, when a spark job fails during pre-launch, as part of shutdown, cleanupStagingDir would be called. Then, if we attempt to create a new filesystem object for the first time, hadoop would try to register a hook to shutdown KeyProviderCache when creating a ClientContext for DFSClient. As a result, we hit the illegalStateException. [...] ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Closes #38832 from xinglin/SPARK-41313. Authored-by: Xing Lin Signed-off-by: Sean Owen --- .../org/apache/spark/deploy/yarn/ApplicationMaster.scala| 13 ++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index a7676fe24f6..69dd72720a5 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -240,6 +240,9 @@ private[spark] class ApplicationMaster( logInfo("ApplicationAttemptId: " + appAttemptId) + // During shutdown, we may not be able to create an FileSystem object. So, pre-create here. + val stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR")) + val stagingDirFs = stagingDirPath.getFileSystem(yarnConf) // This shutdown hook should run *after* the SparkContext is shut down. val priority = ShutdownHookManager.SPARK_CONTEXT_SHUTDOWN_PRIORITY - 1 ShutdownHookManager.addShutdownHook(priority) { () => @@ -261,14 +264,14 @@ private[spark] class ApplicationMaster( if (!unregistered) { // we only want to unregister if we don't want the RM to retry if (isLastAttempt) { - cleanupStagingDir(new Path(System.getenv("SPARK_YARN_STAGING_DIR"))) + cleanupStagingDir(stagingDirFs, stagingDirPath) unregister(finalStatus, finalMsg) } else if (finalStatus == FinalApplicationStatus.SUCCEEDED) { // When it's not the last attempt, if unregister failed caused by timeout exception, // YARN will rerun the application, AM should not clean staging dir before unregister // success. unregister(finalStatus, finalMsg) - cleanupStagingDir(new Path(System.getenv("SPARK_YARN_STAGING_DIR"))) + cleanupStagingDir(stagingDirFs, stagingDirPath) } } } catch { @@ -686,11 +689,15 @@ private[spark] class ApplicationMaster( * Clean up the staging directory. */ private def cleanupStagingDir(stagingDirPath: Path): Unit = { +val stagingDirFs = stagingDirPath.getFileSystem(yarnConf) +cleanupStagingDir(stagingDirFs, stagingDirPath) + } + + private def cleanupStagingDir(fs: FileSystem, stagingDirPath: Path): Unit = { try { val preserveFiles = sparkConf.get(PRESERVE_STAGING_FILES) if (!preserveFiles) { logInfo("Deleting staging directory " + stagingDirPath) -val fs = stagingDirPath.getFileSystem(yarnConf) fs.delete(stagingDirPath, true) } } catch { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (5b13a51dc0a -> 70502d7a043)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 5b13a51dc0a [SPARK-41335][CONNECT][PYTHON] Support IsNull and IsNotNull in Column add 70502d7a043 [SPARK-41276][SQL][ML][MLLIB][PROTOBUF][PYTHON][R][SS][AVRO] Optimize constructor use of `StructType` No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/avro/SchemaConverters.scala | 4 ++-- .../spark/sql/protobuf/utils/SchemaConverters.scala | 2 +- .../main/scala/org/apache/spark/ml/fpm/FPGrowth.scala| 4 ++-- .../main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala | 2 +- .../scala/org/apache/spark/ml/image/ImageSchema.scala| 16 .../scala/org/apache/spark/ml/linalg/MatrixUDT.scala | 2 +- .../scala/org/apache/spark/ml/linalg/VectorUDT.scala | 2 +- .../apache/spark/ml/source/libsvm/LibSVMRelation.scala | 2 +- .../scala/org/apache/spark/mllib/linalg/Matrices.scala | 2 +- .../scala/org/apache/spark/mllib/linalg/Vectors.scala| 2 +- .../spark/ml/source/libsvm/LibSVMRelationSuite.scala | 6 +++--- .../spark/sql/catalyst/expressions/jsonExpressions.scala | 2 +- .../apache/spark/sql/catalyst/json/JsonInferSchema.scala | 2 +- .../sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala| 6 +++--- .../apache/spark/sql/catalyst/parser/AstBuilder.scala| 8 .../catalyst/plans/logical/basicLogicalOperators.scala | 2 +- .../scala/org/apache/spark/sql/types/StructType.scala| 6 +++--- .../scala/org/apache/spark/sql/util/ArrowUtils.scala | 4 ++-- .../main/scala/org/apache/spark/sql/api/r/SQLUtils.scala | 2 +- .../apache/spark/sql/execution/command/SetCommand.scala | 16 .../apache/spark/sql/execution/command/functions.scala | 2 +- .../org/apache/spark/sql/execution/command/tables.scala | 2 +- .../datasources/binaryfile/BinaryFileFormat.scala| 10 +- .../spark/sql/execution/datasources/orc/OrcUtils.scala | 2 +- .../sql/execution/datasources/v2/text/TextTable.scala| 2 +- .../sql/execution/python/AggregateInPandasExec.scala | 2 +- .../spark/sql/execution/python/EvalPythonExec.scala | 2 +- .../spark/sql/execution/python/MapInBatchExec.scala | 2 +- .../streaming/sources/RatePerMicroBatchProvider.scala| 2 +- .../execution/streaming/sources/RateStreamProvider.scala | 2 +- .../streaming/sources/TextSocketSourceProvider.scala | 6 +++--- .../scala/org/apache/spark/sql/hive/HiveInspectors.scala | 2 +- .../main/scala/org/apache/spark/sql/hive/HiveUtils.scala | 2 +- .../apache/spark/sql/hive/client/HiveClientImpl.scala| 2 +- .../main/scala/org/apache/spark/sql/hive/hiveUDFs.scala | 2 +- 35 files changed, 67 insertions(+), 67 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-41254][YARN] bugfix wrong usage when check YarnAllocator.rpIdToYarnResource key existence
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 090bebd6a63 [SPARK-41254][YARN] bugfix wrong usage when check YarnAllocator.rpIdToYarnResource key existence 090bebd6a63 is described below commit 090bebd6a63fdd69b14d08c459fd5bd2301948e4 Author: John Caveman AuthorDate: Mon Nov 28 08:25:00 2022 -0600 [SPARK-41254][YARN] bugfix wrong usage when check YarnAllocator.rpIdToYarnResource key existence ### What changes were proposed in this pull request? bugfix, a misuse of ConcurrentHashMap.contains causing map YarnAllocator.rpIdToYarnResource always updated ### Why are the changes needed? It causing duplicated log during yarn resource allocation and unnecessary object creation and gc ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #38790 from CavemanIV/SPARK-41254. Authored-by: John Caveman Signed-off-by: Sean Owen (cherry picked from commit bccfe5bca600b3091ea93b4c5d6437af8381973f) Signed-off-by: Sean Owen --- .../src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index a85b7174673..16cae4810e4 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -278,7 +278,7 @@ private[yarn] class YarnAllocator( // if a ResourceProfile hasn't been seen yet, create the corresponding YARN Resource for it private def createYarnResourceForResourceProfile(rp: ResourceProfile): Unit = synchronized { -if (!rpIdToYarnResource.contains(rp.id)) { +if (!rpIdToYarnResource.containsKey(rp.id)) { // track the resource profile if not already there getOrUpdateRunningExecutorForRPId(rp.id) logInfo(s"Resource profile ${rp.id} doesn't exist, adding it") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated: [SPARK-41254][YARN] bugfix wrong usage when check YarnAllocator.rpIdToYarnResource key existence
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.2 by this push: new 19450452dcd [SPARK-41254][YARN] bugfix wrong usage when check YarnAllocator.rpIdToYarnResource key existence 19450452dcd is described below commit 19450452dcd6134853f6c0db8e755e78cddef922 Author: John Caveman AuthorDate: Mon Nov 28 08:25:00 2022 -0600 [SPARK-41254][YARN] bugfix wrong usage when check YarnAllocator.rpIdToYarnResource key existence ### What changes were proposed in this pull request? bugfix, a misuse of ConcurrentHashMap.contains causing map YarnAllocator.rpIdToYarnResource always updated ### Why are the changes needed? It causing duplicated log during yarn resource allocation and unnecessary object creation and gc ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #38790 from CavemanIV/SPARK-41254. Authored-by: John Caveman Signed-off-by: Sean Owen (cherry picked from commit bccfe5bca600b3091ea93b4c5d6437af8381973f) Signed-off-by: Sean Owen --- .../src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 54ab643f275..26535d672d7 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -276,7 +276,7 @@ private[yarn] class YarnAllocator( // if a ResourceProfile hasn't been seen yet, create the corresponding YARN Resource for it private def createYarnResourceForResourceProfile(rp: ResourceProfile): Unit = synchronized { -if (!rpIdToYarnResource.contains(rp.id)) { +if (!rpIdToYarnResource.containsKey(rp.id)) { // track the resource profile if not already there getOrUpdateRunningExecutorForRPId(rp.id) logInfo(s"Resource profile ${rp.id} doesn't exist, adding it") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41254][YARN] bugfix wrong usage when check YarnAllocator.rpIdToYarnResource key existence
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new bccfe5bca60 [SPARK-41254][YARN] bugfix wrong usage when check YarnAllocator.rpIdToYarnResource key existence bccfe5bca60 is described below commit bccfe5bca600b3091ea93b4c5d6437af8381973f Author: John Caveman AuthorDate: Mon Nov 28 08:25:00 2022 -0600 [SPARK-41254][YARN] bugfix wrong usage when check YarnAllocator.rpIdToYarnResource key existence ### What changes were proposed in this pull request? bugfix, a misuse of ConcurrentHashMap.contains causing map YarnAllocator.rpIdToYarnResource always updated ### Why are the changes needed? It causing duplicated log during yarn resource allocation and unnecessary object creation and gc ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Existing tests Closes #38790 from CavemanIV/SPARK-41254. Authored-by: John Caveman Signed-off-by: Sean Owen --- .../src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index a90ab180d86..ee1d10c204a 100644 --- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -298,7 +298,7 @@ private[yarn] class YarnAllocator( // if a ResourceProfile hasn't been seen yet, create the corresponding YARN Resource for it private def createYarnResourceForResourceProfile(rp: ResourceProfile): Unit = synchronized { -if (!rpIdToYarnResource.contains(rp.id)) { +if (!rpIdToYarnResource.containsKey(rp.id)) { // track the resource profile if not already there getOrUpdateRunningExecutorForRPId(rp.id) logInfo(s"Resource profile ${rp.id} doesn't exist, adding it") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (ac029d6ec0f -> 483e3c93ddb)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from ac029d6ec0f [SPARK-41224][SPARK-41165][SPARK-41184] Optimized Arrow-based collect implementation to stream from server to client add 483e3c93ddb [SPARK-41097][CORE][SQL][SS][PROTOBUF] Remove redundant collection conversion base on Scala 2.13 code No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala | 2 +- .../org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala | 2 +- .../org/apache/spark/sql/protobuf/utils/SchemaConverters.scala | 2 +- .../src/main/scala/org/apache/spark/ExecutorAllocationManager.scala | 2 +- core/src/main/scala/org/apache/spark/status/api/v1/api.scala| 2 +- .../src/main/scala/org/apache/spark/sql/types/StructType.scala | 2 +- sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala | 2 +- .../apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala | 2 +- .../apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala | 6 +++--- .../main/scala/org/apache/spark/sql/execution/command/tables.scala | 2 +- .../spark/sql/execution/datasources/v2/ShowFunctionsExec.scala | 2 +- .../sql/execution/datasources/v2/ShowTablePropertiesExec.scala | 2 +- .../scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala | 4 ++-- 13 files changed, 16 insertions(+), 16 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41239][BUILD] Upgrade jackson to 2.14.1
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new bf687ad677f [SPARK-41239][BUILD] Upgrade jackson to 2.14.1 bf687ad677f is described below commit bf687ad677f688dfabf4d58368966eb41dbb1cf9 Author: yangjie01 AuthorDate: Wed Nov 23 11:12:33 2022 -0600 [SPARK-41239][BUILD] Upgrade jackson to 2.14.1 ### What changes were proposed in this pull request? This pr aims upgrade `Jackson` related dependencies to 2.14.1 ### Why are the changes needed? This version include an optimization of heap memory usage for Jackson 2.14.x: - https://github.com/FasterXML/jackson-databind/issues/3665 The full release notes as follows: - https://github.com/FasterXML/jackson/wiki/Jackson-Release-2.14.1 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38771 from LuciferYang/SPARK-41239. Authored-by: yangjie01 Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 14 +++--- dev/deps/spark-deps-hadoop-3-hive-2.3 | 14 +++--- pom.xml | 4 ++-- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 374d327c49f..9a08928c233 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -110,16 +110,16 @@ httpclient/4.5.13//httpclient-4.5.13.jar httpcore/4.4.14//httpcore-4.4.14.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.1//ivy-2.5.1.jar -jackson-annotations/2.14.0//jackson-annotations-2.14.0.jar +jackson-annotations/2.14.1//jackson-annotations-2.14.1.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.14.0//jackson-core-2.14.0.jar -jackson-databind/2.14.0//jackson-databind-2.14.0.jar -jackson-dataformat-cbor/2.14.0//jackson-dataformat-cbor-2.14.0.jar -jackson-dataformat-yaml/2.14.0//jackson-dataformat-yaml-2.14.0.jar -jackson-datatype-jsr310/2.14.0//jackson-datatype-jsr310-2.14.0.jar +jackson-core/2.14.1//jackson-core-2.14.1.jar +jackson-databind/2.14.1//jackson-databind-2.14.1.jar +jackson-dataformat-cbor/2.14.1//jackson-dataformat-cbor-2.14.1.jar +jackson-dataformat-yaml/2.14.1//jackson-dataformat-yaml-2.14.1.jar +jackson-datatype-jsr310/2.14.1//jackson-datatype-jsr310-2.14.1.jar jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-scala_2.12/2.14.0//jackson-module-scala_2.12-2.14.0.jar +jackson-module-scala_2.12/2.14.1//jackson-module-scala_2.12-2.14.1.jar jackson-xc/1.9.13//jackson-xc-1.9.13.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index a454019eb3a..f5f032e1ea0 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -98,15 +98,15 @@ httpcore/4.4.14//httpcore-4.4.14.jar ini4j/0.5.4//ini4j-0.5.4.jar istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar ivy/2.5.1//ivy-2.5.1.jar -jackson-annotations/2.14.0//jackson-annotations-2.14.0.jar +jackson-annotations/2.14.1//jackson-annotations-2.14.1.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar -jackson-core/2.14.0//jackson-core-2.14.0.jar -jackson-databind/2.14.0//jackson-databind-2.14.0.jar -jackson-dataformat-cbor/2.14.0//jackson-dataformat-cbor-2.14.0.jar -jackson-dataformat-yaml/2.14.0//jackson-dataformat-yaml-2.14.0.jar -jackson-datatype-jsr310/2.14.0//jackson-datatype-jsr310-2.14.0.jar +jackson-core/2.14.1//jackson-core-2.14.1.jar +jackson-databind/2.14.1//jackson-databind-2.14.1.jar +jackson-dataformat-cbor/2.14.1//jackson-dataformat-cbor-2.14.1.jar +jackson-dataformat-yaml/2.14.1//jackson-dataformat-yaml-2.14.1.jar +jackson-datatype-jsr310/2.14.1//jackson-datatype-jsr310-2.14.1.jar jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar -jackson-module-scala_2.12/2.14.0//jackson-module-scala_2.12-2.14.0.jar +jackson-module-scala_2.12/2.14.1//jackson-module-scala_2.12-2.14.1.jar jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar diff --git a/pom.xml b/pom.xml index 12b1db65d72..0605ec34cf8 100644 --- a/pom.xml +++ b/pom.xml @@ -175,8 +175,8 @@ true true 1.9.13 -2.14.0 - 2.14.0 +2.14.1 + 2.14.1 1.1.8.4 3.0.2 1.15 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41223][BUILD] Upgrade slf4j to 2.0.4
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 2dfb81f898c [SPARK-41223][BUILD] Upgrade slf4j to 2.0.4 2dfb81f898c is described below commit 2dfb81f898c141100d73a7047fa5df089c92d322 Author: yangjie01 AuthorDate: Wed Nov 23 08:10:26 2022 -0600 [SPARK-41223][BUILD] Upgrade slf4j to 2.0.4 ### What changes were proposed in this pull request? This pr aims upgrade slf4j related dependencies from 2.0.3 to 2.0.4. ### Why are the changes needed? A bug fix version: - [LoggerFactory only loads services from context class loader](https://jira.qos.ch/browse/SLF4J-544) The release notes as follows: - https://www.slf4j.org/news.html#2.0.4 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GA Closes #38758 from LuciferYang/SPARK-41223. Authored-by: yangjie01 Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 6 +++--- dev/deps/spark-deps-hadoop-3-hive-2.3 | 6 +++--- pom.xml | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index f8c4e5674f6..374d327c49f 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -134,7 +134,7 @@ javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcl-over-slf4j/2.0.3//jcl-over-slf4j-2.0.3.jar +jcl-over-slf4j/2.0.4//jcl-over-slf4j-2.0.4.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jersey-client/2.36//jersey-client-2.36.jar jersey-common/2.36//jersey-common-2.36.jar @@ -158,7 +158,7 @@ json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsp-api/2.1//jsp-api-2.1.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar -jul-to-slf4j/2.0.3//jul-to-slf4j-2.0.3.jar +jul-to-slf4j/2.0.4//jul-to-slf4j-2.0.4.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client-api/6.2.0//kubernetes-client-api-6.2.0.jar kubernetes-client/6.2.0//kubernetes-client-6.2.0.jar @@ -247,7 +247,7 @@ scala-parser-combinators_2.12/2.1.1//scala-parser-combinators_2.12-2.1.1.jar scala-reflect/2.12.17//scala-reflect-2.12.17.jar scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar shims/0.9.35//shims-0.9.35.jar -slf4j-api/2.0.3//slf4j-api-2.0.3.jar +slf4j-api/2.0.4//slf4j-api-2.0.4.jar snakeyaml/1.33//snakeyaml-1.33.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 704a99a8de2..a454019eb3a 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -119,7 +119,7 @@ javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar jaxb-api/2.2.11//jaxb-api-2.2.11.jar jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar -jcl-over-slf4j/2.0.3//jcl-over-slf4j-2.0.3.jar +jcl-over-slf4j/2.0.4//jcl-over-slf4j-2.0.4.jar jdo-api/3.0.1//jdo-api-3.0.1.jar jdom2/2.0.6//jdom2-2.0.6.jar jersey-client/2.36//jersey-client-2.36.jar @@ -142,7 +142,7 @@ json4s-jackson_2.12/3.7.0-M11//json4s-jackson_2.12-3.7.0-M11.jar json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar jsr305/3.0.0//jsr305-3.0.0.jar jta/1.1//jta-1.1.jar -jul-to-slf4j/2.0.3//jul-to-slf4j-2.0.3.jar +jul-to-slf4j/2.0.4//jul-to-slf4j-2.0.4.jar kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar kubernetes-client-api/6.2.0//kubernetes-client-api-6.2.0.jar kubernetes-client/6.2.0//kubernetes-client-6.2.0.jar @@ -234,7 +234,7 @@ scala-parser-combinators_2.12/2.1.1//scala-parser-combinators_2.12-2.1.1.jar scala-reflect/2.12.17//scala-reflect-2.12.17.jar scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar shims/0.9.35//shims-0.9.35.jar -slf4j-api/2.0.3//slf4j-api-2.0.3.jar +slf4j-api/2.0.4//slf4j-api-2.0.4.jar snakeyaml/1.33//snakeyaml-1.33.jar snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar diff --git a/pom.xml b/pom.xml index 6a1bcd19afc..12b1db65d72 100644 --- a/pom.xml +++ b/pom.xml @@ -114,7 +114,7 @@ 3.8.6 1.6.0 spark -2.0.3 +2.0.4 2.19.0 3.3.4 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40633][BUILD] Upgrade janino to 3.1.9
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 49e102b63ca [SPARK-40633][BUILD] Upgrade janino to 3.1.9 49e102b63ca is described below commit 49e102b63ca0a0af592fdbaad38e62e8de6e4b2f Author: yangjie01 AuthorDate: Wed Nov 23 08:09:41 2022 -0600 [SPARK-40633][BUILD] Upgrade janino to 3.1.9 ### What changes were proposed in this pull request? This pr aims upgrade janino from 3.1.7 to 3.1.9 ### Why are the changes needed? This version bring some improvement and bug fix, and janino 3.1.9 will no longer test Java 12, 15, 16 because these STS versions have been EOL: - https://github.com/janino-compiler/janino/compare/v3.1.7...v3.1.9 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GitHub Actions - Manual test this pr with Scala 2.13, all test passed Closes #38075 from LuciferYang/SPARK-40633. Lead-authored-by: yangjie01 Co-authored-by: YangJie Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 4 ++-- dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++-- pom.xml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 7b7c3ac7fb3..f8c4e5674f6 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -38,7 +38,7 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-collections4/4.4//commons-collections4-4.4.jar -commons-compiler/3.1.7//commons-compiler-3.1.7.jar +commons-compiler/3.1.9//commons-compiler-3.1.9.jar commons-compress/1.21//commons-compress-1.21.jar commons-configuration/1.6//commons-configuration-1.6.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar @@ -127,7 +127,7 @@ jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar -janino/3.1.7//janino-3.1.7.jar +janino/3.1.9//janino-3.1.9.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.inject/1//javax.inject-1.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index c648f8896c3..704a99a8de2 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -39,7 +39,7 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar commons-codec/1.15//commons-codec-1.15.jar commons-collections/3.2.2//commons-collections-3.2.2.jar commons-collections4/4.4//commons-collections4-4.4.jar -commons-compiler/3.1.7//commons-compiler-3.1.7.jar +commons-compiler/3.1.9//commons-compiler-3.1.9.jar commons-compress/1.21//commons-compress-1.21.jar commons-crypto/1.1.0//commons-crypto-1.1.0.jar commons-dbcp/1.4//commons-dbcp-1.4.jar @@ -113,7 +113,7 @@ jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar -janino/3.1.7//janino-3.1.7.jar +janino/3.1.9//janino-3.1.9.jar javassist/3.25.0-GA//javassist-3.25.0-GA.jar javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar javolution/5.5.1//javolution-5.5.1.jar diff --git a/pom.xml b/pom.xml index e07b01ea955..6a1bcd19afc 100644 --- a/pom.xml +++ b/pom.xml @@ -190,7 +190,7 @@ 2.11.1 4.1.17 14.0.1 -3.1.7 +3.1.9 2.36 2.12.1 3.5.2 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41106][SQL] Reduce collection conversion when create AttributeMap
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f24f8489f80 [SPARK-41106][SQL] Reduce collection conversion when create AttributeMap f24f8489f80 is described below commit f24f8489f8096c5324a6e7084437ee2238311103 Author: YangJie AuthorDate: Thu Nov 17 10:53:04 2022 -0600 [SPARK-41106][SQL] Reduce collection conversion when create AttributeMap ### What changes were proposed in this pull request? This pr aims to reduce collection conversion when create AttributeMap as following ways: 1. Add a new `apply` method to `AttributeMap` ``` def apply[A](kvs: Iterable[(Attribute, A)]): AttributeMap[A] = { new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap) } ``` and use it in applicable scenarios to avoid additional collection conversion. Although the new `apply` method is more generic, I did not delete the old ones for forward compatibility. 2. For the following 2 scenarios, `leftStats.attributeStats ++ rightStats.attributeStats` is `AttributeMap ++ AttributeMap`, will return a new `AttributeMap`, so this pr remove the redundant collection conversion. https://github.com/apache/spark/blob/7d320d784a2d637fd1a8fd0798da3d2a39b4d7cd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L86 https://github.com/apache/spark/blob/7d320d784a2d637fd1a8fd0798da3d2a39b4d7cd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L148 3. For the following scenario, `attributePercentiles` is a `Map` and there is a corresponding `apply` method can accept `Map` input, so remove the redundant `toSeq` in this pr https://github.com/apache/spark/blob/7d320d784a2d637fd1a8fd0798da3d2a39b4d7cd/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala#L323 ### Why are the changes needed? Minor performance improvements, reducing collection conversion ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GitHub Actions - Manual test ``` dev/change-scala-version.sh 2.13 build/mvn clean install -Phadoop-3 -Phadoop-cloud -Pmesos -Pyarn -Pkinesis-asl -Phive-thriftserver -Pspark-ganglia-lgpl -Pkubernetes -Phive -Pscala-2.13 -fn ``` All Test passed Closes #38610 from LuciferYang/AttributeMap. Lead-authored-by: YangJie Co-authored-by: yangjie01 Signed-off-by: Sean Owen --- .../org/apache/spark/sql/catalyst/expressions/AttributeMap.scala | 4 .../org/apache/spark/sql/catalyst/expressions/AttributeMap.scala | 4 .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 4 ++-- .../spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala | 2 +- .../spark/sql/catalyst/optimizer/NestedColumnAliasing.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 2 +- .../org/apache/spark/sql/catalyst/optimizer/expressions.scala | 4 ++-- .../scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala | 4 ++-- .../scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala | 2 +- .../catalyst/plans/logical/statsEstimation/FilterEstimation.scala | 2 +- .../catalyst/plans/logical/statsEstimation/JoinEstimation.scala | 8 +++- .../apache/spark/sql/execution/columnar/InMemoryRelation.scala| 2 +- .../org/apache/spark/sql/execution/command/CommandUtils.scala | 2 +- 13 files changed, 24 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala index 3a424574b97..c55c542d957 100644 --- a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala +++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala @@ -31,6 +31,10 @@ object AttributeMap { new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap) } + def apply[A](kvs: Iterable[(Attribute, A)]): AttributeMap[A] = { +new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap) + } + def empty[A]: AttributeMap[A] = new AttributeMap(Map.empty) } diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala index 1f1df2d2e1d..3d5d6471d26 100644 --- a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala +++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql
[spark] branch master updated (7cbf7dd148d -> 24adac30539)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 7cbf7dd148d [SPARK-40372][SQL] Migrate failures of array type checks onto error classes add 24adac30539 [SPARK-41093][BUILD] Remove netty-tcnative-classes from Spark dependencyList No new revisions were added by this update. Summary of changes: common/network-common/pom.xml | 4 core/pom.xml | 4 dev/deps/spark-deps-hadoop-2-hive-2.3 | 1 - dev/deps/spark-deps-hadoop-3-hive-2.3 | 1 - pom.xml | 14 -- 5 files changed, 24 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-41089][YARN][SHUFFLE] Relocate Netty native arm64 libs
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new bea58e4b863 [SPARK-41089][YARN][SHUFFLE] Relocate Netty native arm64 libs bea58e4b863 is described below commit bea58e4b8634fbe3497431cb50351aa186107fb3 Author: Cheng Pan AuthorDate: Thu Nov 10 08:57:56 2022 -0600 [SPARK-41089][YARN][SHUFFLE] Relocate Netty native arm64 libs ### What changes were proposed in this pull request? SPARK-27610 relocated the netty x86 native libs, and the recent version netty ships arm64 native libs as well, we should do same thing to make it works on arm64 platform. ### Why are the changes needed? Align arm64 behavior w/ x86 ### Does this PR introduce _any_ user-facing change? Yes, bug fix for ARM64 platform. ### How was this patch tested? Before patch ``` ➜ apache-spark git:(SPARK-41089) ll common/network-yarn/target/exploded/META-INF/native total 752 -rw-r--r-- 1 chengpan staff 101K Oct 11 23:24 libnetty_transport_native_epoll_aarch_64.so -rw-r--r-- 1 chengpan staff94K Oct 11 17:57 libnetty_transport_native_kqueue_aarch_64.jnilib -rw-r--r-- 1 chengpan staff93K Oct 11 23:27 liborg_sparkproject_netty_transport_native_epoll_x86_64.so -rw-r--r-- 1 chengpan staff77K Oct 11 17:51 liborg_sparkproject_netty_transport_native_kqueue_x86_64.jnilib drwxr-xr-x 3 chengpan staff96B Nov 9 13:46 linux32 drwxr-xr-x 3 chengpan staff96B Nov 9 13:46 linux64 drwxr-xr-x 3 chengpan staff96B Nov 9 13:46 osx drwxr-xr-x 3 chengpan staff96B Nov 9 13:46 windows32 drwxr-xr-x 3 chengpan staff96B Nov 9 13:46 windows64 ``` After patch ``` ➜ apache-spark git:(SPARK-41089) ll common/network-yarn/target/exploded/META-INF/native total 752 -rw-r--r-- 1 chengpan staff 101K Oct 11 23:24 liborg_sparkproject_netty_transport_native_epoll_aarch_64.so -rw-r--r-- 1 chengpan staff93K Oct 11 23:27 liborg_sparkproject_netty_transport_native_epoll_x86_64.so -rw-r--r-- 1 chengpan staff94K Oct 11 17:57 liborg_sparkproject_netty_transport_native_kqueue_aarch_64.jnilib -rw-r--r-- 1 chengpan staff77K Oct 11 17:51 liborg_sparkproject_netty_transport_native_kqueue_x86_64.jnilib drwxr-xr-x 3 chengpan staff96B Nov 10 12:07 linux32 drwxr-xr-x 3 chengpan staff96B Nov 10 12:07 linux64 drwxr-xr-x 3 chengpan staff96B Nov 10 12:07 osx drwxr-xr-x 3 chengpan staff96B Nov 10 12:07 windows32 drwxr-xr-x 3 chengpan staff96B Nov 10 12:07 windows64 ``` Closes #38593 from pan3793/SPARK-41089. Authored-by: Cheng Pan Signed-off-by: Sean Owen (cherry picked from commit c72d39990182ad2207c8cdd523af06ee4dc02fc5) Signed-off-by: Sean Owen --- common/network-yarn/pom.xml | 4 1 file changed, 4 insertions(+) diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 14d41802a8b..81146a36c98 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -174,6 +174,10 @@ tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_epoll_x86_64.so" /> + + - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (c5d27603f29 -> c72d3999018)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from c5d27603f29 [SPARK-41064][CONNECT][PYTHON] Implement `DataFrame.crosstab` and `DataFrame.stat.crosstab` add c72d3999018 [SPARK-41089][YARN][SHUFFLE] Relocate Netty native arm64 libs No new revisions were added by this update. Summary of changes: common/network-yarn/pom.xml | 4 1 file changed, 4 insertions(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [MINOR][DOCS] Fix links in the sql-pyspark-pandas-with-arrow
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 74bf9fe8eb5 [MINOR][DOCS] Fix links in the sql-pyspark-pandas-with-arrow 74bf9fe8eb5 is described below commit 74bf9fe8eb5d5512de929e87f510ed6e64d6063e Author: panbingkun AuthorDate: Wed Nov 9 07:07:32 2022 -0600 [MINOR][DOCS] Fix links in the sql-pyspark-pandas-with-arrow ### What changes were proposed in this pull request? The pr aims to fix links in the sql-pyspark-pandas-with-arrow. ### Why are the changes needed? https://spark.apache.org/docs/latest/sql-pyspark-pandas-with-arrow.html https://user-images.githubusercontent.com/15246973/200457446-250e8c9b-3712-4e79-b6e9-6bdabf322206.png";> when click [this page](https://spark.apache.org/docs/latest/api/python/user_guide/arrow_pandas.html), will jump to https://spark.apache.org/docs/latest/api/python/user_guide/arrow_pandas.html, as follow: https://user-images.githubusercontent.com/15246973/200457489-2561b9df-3107-4e19-960d-881f31851f82.png";> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually verified. Closes #38545 from panbingkun/arrow_pandas_doc. Authored-by: panbingkun Signed-off-by: Sean Owen (cherry picked from commit 70bc5dfc96810e47f11f0f39054b1ceb61066f77) Signed-off-by: Sean Owen --- docs/sql-pyspark-pandas-with-arrow.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-pyspark-pandas-with-arrow.md b/docs/sql-pyspark-pandas-with-arrow.md index 6895376dfb3..7697588d1a8 100644 --- a/docs/sql-pyspark-pandas-with-arrow.md +++ b/docs/sql-pyspark-pandas-with-arrow.md @@ -19,4 +19,4 @@ license: | limitations under the License. --- -The Arrow usage guide is now archived on [this page](https://spark.apache.org/docs/latest/api/python/user_guide/arrow_pandas.html). +The Arrow usage guide is now archived on [this page](https://spark.apache.org/docs/latest/api/python/user_guide/sql/arrow_pandas.html). - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][DOCS] Fix links in the sql-pyspark-pandas-with-arrow
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 70bc5dfc968 [MINOR][DOCS] Fix links in the sql-pyspark-pandas-with-arrow 70bc5dfc968 is described below commit 70bc5dfc96810e47f11f0f39054b1ceb61066f77 Author: panbingkun AuthorDate: Wed Nov 9 07:07:32 2022 -0600 [MINOR][DOCS] Fix links in the sql-pyspark-pandas-with-arrow ### What changes were proposed in this pull request? The pr aims to fix links in the sql-pyspark-pandas-with-arrow. ### Why are the changes needed? https://spark.apache.org/docs/latest/sql-pyspark-pandas-with-arrow.html https://user-images.githubusercontent.com/15246973/200457446-250e8c9b-3712-4e79-b6e9-6bdabf322206.png";> when click [this page](https://spark.apache.org/docs/latest/api/python/user_guide/arrow_pandas.html), will jump to https://spark.apache.org/docs/latest/api/python/user_guide/arrow_pandas.html, as follow: https://user-images.githubusercontent.com/15246973/200457489-2561b9df-3107-4e19-960d-881f31851f82.png";> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually verified. Closes #38545 from panbingkun/arrow_pandas_doc. Authored-by: panbingkun Signed-off-by: Sean Owen --- docs/sql-pyspark-pandas-with-arrow.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/sql-pyspark-pandas-with-arrow.md b/docs/sql-pyspark-pandas-with-arrow.md index 6895376dfb3..7697588d1a8 100644 --- a/docs/sql-pyspark-pandas-with-arrow.md +++ b/docs/sql-pyspark-pandas-with-arrow.md @@ -19,4 +19,4 @@ license: | limitations under the License. --- -The Arrow usage guide is now archived on [this page](https://spark.apache.org/docs/latest/api/python/user_guide/arrow_pandas.html). +The Arrow usage guide is now archived on [this page](https://spark.apache.org/docs/latest/api/python/user_guide/sql/arrow_pandas.html). - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (0add57a1c02 -> 2071c960fc1)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 0add57a1c02 [SPARK-41035][SQL] Don't patch foldable children of aggregate functions in `RewriteDistinctAggregates` add 2071c960fc1 [SPARK-41039][BUILD] Upgrade `scala-parallel-collections` to 1.0.4 for Scala 2.13 No new revisions were added by this update. Summary of changes: pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41050][BUILD] Upgrade scalafmt from 3.5.9 to 3.6.1
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 4eef44ee9fb [SPARK-41050][BUILD] Upgrade scalafmt from 3.5.9 to 3.6.1 4eef44ee9fb is described below commit 4eef44ee9fb2ec90580cfb1c1933ce2460a187ee Author: panbingkun AuthorDate: Tue Nov 8 18:22:31 2022 -0600 [SPARK-41050][BUILD] Upgrade scalafmt from 3.5.9 to 3.6.1 ### What changes were proposed in this pull request? The pr aims to upgrade scalafmt from 3.5.9 to 3.6.1 ### Why are the changes needed? A. Release note: > https://github.com/scalameta/scalafmt/releases B. V3.5.9 VS V3.6.1 > https://github.com/scalameta/scalafmt/compare/v3.5.9...v3.6.1 C. Bring bug fix: https://user-images.githubusercontent.com/15246973/200554901-ac6678f8-a865-4aae-bace-5a6ba4fc9804.png";> https://user-images.githubusercontent.com/15246973/200554977-1ad218df-d8b0-426f-ac71-0697852bbaec.png";> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually run: sh ./dev/scalafmt Closes #38559 from panbingkun/upgrade_scalafmt_3_6_1. Authored-by: panbingkun Signed-off-by: Sean Owen --- dev/.scalafmt.conf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/.scalafmt.conf b/dev/.scalafmt.conf index f9c908b6680..e06ea5bbfd2 100644 --- a/dev/.scalafmt.conf +++ b/dev/.scalafmt.conf @@ -32,4 +32,4 @@ fileOverride { runner.dialect = scala213 } } -version = 3.5.9 +version = 3.6.1 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-41029][SQL] Optimize constructor use of `GenericArrayData` for Scala 2.13
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 0d435411ec5 [SPARK-41029][SQL] Optimize constructor use of `GenericArrayData` for Scala 2.13 0d435411ec5 is described below commit 0d435411ec5c69e6fd94636986f9749abbcf09a1 Author: yangjie01 AuthorDate: Tue Nov 8 08:42:35 2022 -0600 [SPARK-41029][SQL] Optimize constructor use of `GenericArrayData` for Scala 2.13 ### What changes were proposed in this pull request? This pr change to use a more appropriate constructor when the input is `ArrayBuffer` or `Empty Collection` to improve the construction performance of `GenericArrayData` with Scala 2.13. ### Why are the changes needed? Minor performance improvement. `GenericArrayData ` has the following constructor https://github.com/apache/spark/blob/57d492556768eb341f525ce7eb5c934089fa9e7e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala#L30 When the input type is `ArrayBuffer`, the following code is similar in Spark ``` new GenericArrayData(arrayBuffer.toSeq) ``` For Scala 2.12, there will be no performance gap between `new GenericArrayData(arrayBuffer.toSeq)` and `new GenericArrayData(arrayBuffer)`. However, when Scala 2.13 is used, there will be a performance gap, because 'toSeq' will cause a redundant memory copy. For the following test case: ```scala val valuesPerIteration: Long = 1000 * 1000 * 10 val buffer = if (bufferSize == 0) { ArrayBuffer.empty[Any] } else { ArrayBuffer.fill[Any](bufferSize)(() => 1) } val benchmark = new Benchmark(s"constructor with buffer size = $bufferSize", valuesPerIteration, output = output) benchmark.addCase("toSeq and construct") { _ => var n = 0 while (n < valuesPerIteration) { new GenericArrayData(buffer.toSeq) n += 1 } } benchmark.addCase("construct directly") { _ => var n = 0 while (n < valuesPerIteration) { new GenericArrayData(buffer) n += 1 } } ``` When bufferSize=10, there is a performance gap of more than 5 times between a and b, and the performance gap increases almost linearly with the increase of bufferSize There will be more than 5 times performance gap between `new GenericArrayData(buffer.toSeq)` and `new GenericArrayData(buffer)` when `bufferSize = 10` and the performance gap will increase with the increase of bufferSize. ``` OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1022-azure Intel(R) Xeon(R) Platinum 8370C CPU 2.80GHz constructor with buffer size = 10:Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative toSeq and construct2617 2622 7 3.8 261.7 1.0X construct directly 399406 11 25.1 39.9 6.6X OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1022-azure Intel(R) Xeon(R) Platinum 8370C CPU 2.80GHz constructor with buffer size = 100: Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative toSeq and construct 12512 12554 60 0.81251.2 1.0X construct directly 779781 2 12.8 77.9 16.1X OpenJDK 64-Bit Server VM 1.8.0_345-b01 on Linux 5.15.0-1022-azure Intel(R) Xeon(R) Platinum 8370C CPU 2.80GHz constructor with buffer size = 1000: Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative toSeq and construct 108882 109400 732 0.1 10888.2 1.0X construct directly 5717 5731 20 1.7 571.7 19.0X ``` We can safely change `new GenericArrayData(buffer.toSeq)` to `new GenericArrayData(buffer)` due to `ArrayBuffer` is still `scala.collection.Seq` in Scala 2.13. On the other hand, when input is an empty set, using `Array.empty` is 10% faster than using `Seq.empty`
[spark] branch branch-3.3 updated: [SPARK-41031][BUILD] Upgrade `xz` to 1.9 for `avro` 1.11.1
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new b01dd4c7519 [SPARK-41031][BUILD] Upgrade `xz` to 1.9 for `avro` 1.11.1 b01dd4c7519 is described below commit b01dd4c7519b5ca40969822109453cff7cdf3eff Author: yangjie01 AuthorDate: Mon Nov 7 19:36:29 2022 -0600 [SPARK-41031][BUILD] Upgrade `xz` to 1.9 for `avro` 1.11.1 This pr aims to upgrade `xz` to 1.9 for `avro` 1.11.1. Spark depend on `avro` 1.11.1 and `avro` 1.11.1 use `xz` as an optional dependency, we need to manually check `xz` version when upgrading `avro`. https://github.com/apache/avro/blob/3a9e5a789b5165e0c8c4da799c387fdf84bfb75e/lang/java/pom.xml#L59 https://github.com/apache/avro/blob/3a9e5a789b5165e0c8c4da799c387fdf84bfb75e/lang/java/avro/pom.xml#L238-L242 The release notes as follows: - https://git.tukaani.org/?p=xz-java.git;a=blob;f=NEWS;hb=HEAD No Pass Github Actions Closes #38538 from LuciferYang/SPARK-41031. Authored-by: yangjie01 Signed-off-by: Sean Owen (cherry picked from commit e9503c84c4d8d4b51844a195523ebf064bdf185e) Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 6 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index d517d556feb..6bcd447dc64 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -263,7 +263,7 @@ xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar xercesImpl/2.12.2//xercesImpl-2.12.2.jar xml-apis/1.4.01//xml-apis-1.4.01.jar xmlenc/0.52//xmlenc-0.52.jar -xz/1.8//xz-1.8.jar +xz/1.9//xz-1.9.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar zookeeper/3.6.2//zookeeper-3.6.2.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 54e7fe23e5b..7429ecab6b9 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -250,7 +250,7 @@ univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar velocity/1.5//velocity-1.5.jar wildfly-openssl/1.0.7.Final//wildfly-openssl-1.0.7.Final.jar xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar -xz/1.8//xz-1.8.jar +xz/1.9//xz-1.9.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar zookeeper/3.6.2//zookeeper-3.6.2.jar diff --git a/pom.xml b/pom.xml index d6b20512f6d..34043d43758 100644 --- a/pom.xml +++ b/pom.xml @@ -1440,10 +1440,14 @@ + org.tukaani xz -1.8 +1.9
[spark] branch master updated: [SPARK-41031][BUILD] Upgrade `xz` to 1.9 for `avro` 1.11.1
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e9503c84c4d [SPARK-41031][BUILD] Upgrade `xz` to 1.9 for `avro` 1.11.1 e9503c84c4d is described below commit e9503c84c4d8d4b51844a195523ebf064bdf185e Author: yangjie01 AuthorDate: Mon Nov 7 19:36:29 2022 -0600 [SPARK-41031][BUILD] Upgrade `xz` to 1.9 for `avro` 1.11.1 ### What changes were proposed in this pull request? This pr aims to upgrade `xz` to 1.9 for `avro` 1.11.1. ### Why are the changes needed? Spark depend on `avro` 1.11.1 and `avro` 1.11.1 use `xz` as an optional dependency, we need to manually check `xz` version when upgrading `avro`. https://github.com/apache/avro/blob/3a9e5a789b5165e0c8c4da799c387fdf84bfb75e/lang/java/pom.xml#L59 https://github.com/apache/avro/blob/3a9e5a789b5165e0c8c4da799c387fdf84bfb75e/lang/java/avro/pom.xml#L238-L242 The release notes as follows: - https://git.tukaani.org/?p=xz-java.git;a=blob;f=NEWS;hb=HEAD ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions Closes #38538 from LuciferYang/SPARK-41031. Authored-by: yangjie01 Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 6 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 283d93a4e60..6b87c27d4bd 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -267,7 +267,7 @@ xbean-asm9-shaded/4.22//xbean-asm9-shaded-4.22.jar xercesImpl/2.12.2//xercesImpl-2.12.2.jar xml-apis/1.4.01//xml-apis-1.4.01.jar xmlenc/0.52//xmlenc-0.52.jar -xz/1.8//xz-1.8.jar +xz/1.9//xz-1.9.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar zookeeper/3.6.2//zookeeper-3.6.2.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index fb9beebeaa0..db5af8881c2 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -252,7 +252,7 @@ univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar velocity/1.5//velocity-1.5.jar wildfly-openssl/1.0.7.Final//wildfly-openssl-1.0.7.Final.jar xbean-asm9-shaded/4.22//xbean-asm9-shaded-4.22.jar -xz/1.8//xz-1.8.jar +xz/1.9//xz-1.9.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar zookeeper/3.6.2//zookeeper-3.6.2.jar diff --git a/pom.xml b/pom.xml index 1c494669455..38ba2b14008 100644 --- a/pom.xml +++ b/pom.xml @@ -1482,10 +1482,14 @@ + org.tukaani xz -1.8 +1.9
[spark] branch master updated: [SPARK-41007][SQL] Add missing serializer for java.math.BigInteger
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 0087a2a19dd [SPARK-41007][SQL] Add missing serializer for java.math.BigInteger 0087a2a19dd is described below commit 0087a2a19dd081b524e96d6a407d3940cab1f2c0 Author: Daniel Fiterman AuthorDate: Mon Nov 7 19:33:21 2022 -0600 [SPARK-41007][SQL] Add missing serializer for java.math.BigInteger ### What changes were proposed in this pull request? The JavaTypeInference class used by the [Bean Encoder](https://spark.apache.org/docs/3.2.0/api/java/org/apache/spark/sql/Encoders.html#bean-java.lang.Class-) to create serialize/deserialize a Java Bean was missing a case statement to serialize java.math.BigInteger. This adds the missing case statement. ### Why are the changes needed? This fixes the bug mentioned in the description ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? - Unit Test - Manually tested creating a new dataset with a Java Bean containing a java.math.BigInteger field Closes #38500 from dfit99/SPARK-41007. Authored-by: Daniel Fiterman Signed-off-by: Sean Owen --- .../spark/sql/catalyst/JavaTypeInference.scala | 3 ++ .../sql/catalyst/JavaTypeInferenceSuite.scala | 42 ++ 2 files changed, 45 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index 903072ae29d..dccaf1c4835 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -424,6 +424,9 @@ object JavaTypeInference { case c if c == classOf[java.time.Period] => createSerializerForJavaPeriod(inputObject) +case c if c == classOf[java.math.BigInteger] => + createSerializerForJavaBigInteger(inputObject) + case c if c == classOf[java.math.BigDecimal] => createSerializerForJavaBigDecimal(inputObject) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala new file mode 100644 index 000..9c1d0c1 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst + +import java.math.BigInteger + +import scala.beans.BeanProperty + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.expressions.{CheckOverflow, Expression, Literal} +import org.apache.spark.sql.types.DecimalType + +class DummyBean() { + @BeanProperty var bigInteger = null: BigInteger +} + +class JavaTypeInferenceSuite extends SparkFunSuite { + + test("SPARK-41007: JavaTypeInference returns the correct serializer for BigInteger") { +var serializer = JavaTypeInference.serializerFor(classOf[DummyBean]) +var bigIntegerFieldName: Expression = serializer.children(0) +assert(bigIntegerFieldName.asInstanceOf[Literal].value.toString == "bigInteger") +var bigIntegerFieldExpression: Expression = serializer.children(1) +assert(bigIntegerFieldExpression.asInstanceOf[CheckOverflow].dataType == + DecimalType.BigIntDecimal) + } +} - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (57d49255676 -> eb6d1980fa8)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 57d49255676 [SPARK-40948][SQL] Introduce new error class: PATH_NOT_FOUND add eb6d1980fa8 [SPARK-41023][BUILD] Upgrade Jackson to 2.14.0 No new revisions were added by this update. Summary of changes: dev/deps/spark-deps-hadoop-2-hive-2.3 | 16 dev/deps/spark-deps-hadoop-3-hive-2.3 | 16 pom.xml | 4 ++-- 3 files changed, 18 insertions(+), 18 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][DOC] revisions for spark sql performance tuning to improve readability and grammar
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c4d159a368d [MINOR][DOC] revisions for spark sql performance tuning to improve readability and grammar c4d159a368d is described below commit c4d159a368d554a8567271dbfec8f291d1de70a5 Author: Dustin William Smith AuthorDate: Sun Nov 6 18:04:10 2022 -0600 [MINOR][DOC] revisions for spark sql performance tuning to improve readability and grammar ### What changes were proposed in this pull request? I made some small grammar fixes related to dependent clause followed but independent clauses, starting a sentence with an introductory phrase, using the plural with when are is present in the sentence, and other small fixes to improve readability. https://spark.apache.org/docs/latest/sql-performance-tuning.html https://user-images.githubusercontent.com/7563201/18862-d9418bc1-2fcd-4eff-be8e-af412add6946.png";> ### Why are the changes needed? These changes improve the readability of the Spark documentation for new users or those studying up. ### Does this PR introduce _any_ user-facing change? Yes, these changes impact the spark documentation. ### How was this patch tested? No test were created as these changes were solely in markdown. Closes #38510 from dwsmith1983/minor-doc-revisions. Lead-authored-by: Dustin William Smith Co-authored-by: dustin Co-authored-by: Dustin Smith Signed-off-by: Sean Owen --- docs/sql-performance-tuning.md | 21 ++--- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md index d736ff8f83f..6ac39d90527 100644 --- a/docs/sql-performance-tuning.md +++ b/docs/sql-performance-tuning.md @@ -40,7 +40,7 @@ Configuration of in-memory caching can be done using the `setConf` method on `Sp spark.sql.inMemoryColumnarStorage.compressed true -When set to true Spark SQL will automatically select a compression codec for each column based +When set to true, Spark SQL will automatically select a compression codec for each column based on statistics of the data. 1.0.1 @@ -77,8 +77,8 @@ that these options will be deprecated in future release as more optimizations ar spark.sql.files.openCostInBytes 4194304 (4 MB) - The estimated cost to open a file, measured by the number of bytes could be scanned in the same - time. This is used when putting multiple files into a partition. It is better to over-estimated, + The estimated cost to open a file, measured by the number of bytes that could be scanned in the same + time. This is used when putting multiple files into a partition. It is better to over-estimate, then the partitions with small files will be faster than partitions with bigger files (which is scheduled first). This configuration is effective only when using file-based sources such as Parquet, JSON and ORC. @@ -110,7 +110,7 @@ that these options will be deprecated in future release as more optimizations ar 10485760 (10 MB) Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when - performing a join. By setting this value to -1 broadcasting can be disabled. Note that currently + performing a join. By setting this value to -1, broadcasting can be disabled. Note that currently statistics are only supported for Hive Metastore tables where the command ANALYZE TABLE <tableName> COMPUTE STATISTICS noscan has been run. @@ -140,8 +140,7 @@ that these options will be deprecated in future release as more optimizations ar 1 Configures the maximum listing parallelism for job input paths. In case the number of input - paths is larger than this value, it will be throttled down to use this value. Same as above, - this configuration is only effective when using file-based data sources such as Parquet, ORC + paths is larger than this value, it will be throttled down to use this value. This configuration is only effective when using file-based data sources such as Parquet, ORC and JSON. 2.1.1 @@ -215,8 +214,8 @@ For more details please refer to the documentation of [Join Hints](sql-ref-synta ## Coalesce Hints for SQL Queries -Coalesce hints allows the Spark SQL users to control the number of output files just like the -`coalesce`, `repartition` and `repartitionByRange` in Dataset API, they can be used for performance +Coalesce hints allow Spark SQL users to control the number of output files just like +`coalesce`, `repartition` and `repartitionByRange` in the Dataset API, they c
[spark] branch master updated: [MINOR][DOC] updated some grammar and a missed period in the tuning doc
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 5d7be0877b0 [MINOR][DOC] updated some grammar and a missed period in the tuning doc 5d7be0877b0 is described below commit 5d7be0877b0bf36098e63297a4125807a933625b Author: Dustin William Smith AuthorDate: Sun Nov 6 09:05:44 2022 -0600 [MINOR][DOC] updated some grammar and a missed period in the tuning doc ### What changes were proposed in this pull request? I changed some grammatical issues in the documentation. One potential change could be debatable though so please take a look. https://spark.apache.org/docs/latest/tuning.html ### Why are the changes needed? Some grammatical mistakes in the documentation. ### Does this PR introduce _any_ user-facing change? Yes, this corrects some issues in documentation related to Tuning Spark. The following changes were made Check if there are too many garbage collections by collecting GC stats. If a full GC is invoked multiple times ~~for~~ before a task completes, it means that there isn't enough memory available for executing tasks. with `-XX:G1HeapRegionSize`. (added missing period). we can estimate the size of Eden to be `4*3*128MiB`. (added the to estimate the size--this one I guess debatable) ### How was this patch tested? No tests added as this was markdown documentation for the user facing page. Closes #38499 from dwsmith1983/master. Lead-authored-by: Dustin William Smith Co-authored-by: dustin Signed-off-by: Sean Owen --- docs/tuning.md | 22 +++--- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/tuning.md b/docs/tuning.md index 18d4a6205f4..550ffb0f357 100644 --- a/docs/tuning.md +++ b/docs/tuning.md @@ -217,7 +217,7 @@ The goal of GC tuning in Spark is to ensure that only long-lived RDDs are stored the Young generation is sufficiently sized to store short-lived objects. This will help avoid full GCs to collect temporary objects created during task execution. Some steps which may be useful are: -* Check if there are too many garbage collections by collecting GC stats. If a full GC is invoked multiple times for +* Check if there are too many garbage collections by collecting GC stats. If a full GC is invoked multiple times before a task completes, it means that there isn't enough memory available for executing tasks. * If there are too many minor collections but not many major GCs, allocating more memory for Eden would help. You @@ -235,12 +235,12 @@ temporary objects created during task execution. Some steps which may be useful * Try the G1GC garbage collector with `-XX:+UseG1GC`. It can improve performance in some situations where garbage collection is a bottleneck. Note that with large executor heap sizes, it may be important to increase the [G1 region size](http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html) - with `-XX:G1HeapRegionSize` + with `-XX:G1HeapRegionSize`. * As an example, if your task is reading data from HDFS, the amount of memory used by the task can be estimated using the size of the data block read from HDFS. Note that the size of a decompressed block is often 2 or 3 times the size of the block. So if we wish to have 3 or 4 tasks' worth of working space, and the HDFS block size is 128 MiB, - we can estimate size of Eden to be `4*3*128MiB`. + we can estimate the size of Eden to be `4*3*128MiB`. * Monitor how the frequency and time taken by garbage collection changes with the new settings. @@ -293,14 +293,14 @@ available in `SparkContext` can greatly reduce the size of each serialized task, of launching a job over a cluster. If your tasks use any large object from the driver program inside of them (e.g. a static lookup table), consider turning it into a broadcast variable. Spark prints the serialized size of each task on the master, so you can look at that to -decide whether your tasks are too large; in general tasks larger than about 20 KiB are probably +decide whether your tasks are too large; in general, tasks larger than about 20 KiB are probably worth optimizing. ## Data Locality Data locality can have a major impact on the performance of Spark jobs. If data and the code that -operates on it are together then computation tends to be fast. But if code and data are separated, -one must move to the other. Typically it is faster to ship serialized code from place to place than +operates on it are together, then computation tends to be fast. But if code and data are separated, +one must move to the other. Typically, it is faster to ship serialized code from place to place than a chunk of data because code size is mu
[spark] branch master updated (c83ad5d9504 -> 9b35d1512a7)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from c83ad5d9504 [SPARK-41024][BUILD] Upgrade scala-maven-plugin to 4.7.2 add 9b35d1512a7 [SPARK-40950][BUILD][FOLLOWUP] Fix Scala 2.13 Mima check No new revisions were added by this update. Summary of changes: project/MimaExcludes.scala | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.2 updated (1aef8b702cb -> 5d62f4707d8)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git from 1aef8b702cb [SPARK-40869][K8S] Resource name prefix should not start with a hyphen add 5d62f4707d8 [SPARK-40801][BUILD][3.2] Upgrade `Apache commons-text` to 1.10 No new revisions were added by this update. Summary of changes: dev/deps/spark-deps-hadoop-2.7-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3.2-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40996][BUILD] Upgrade `sbt-checkstyle-plugin` to 4.0.0 to resolve `dev/sbt-checkstyle` run failed with sbt 1.7.3
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 67d0dc8f74d [SPARK-40996][BUILD] Upgrade `sbt-checkstyle-plugin` to 4.0.0 to resolve `dev/sbt-checkstyle` run failed with sbt 1.7.3 67d0dc8f74d is described below commit 67d0dc8f74d78678446f190145a34f71e60efa99 Author: yangjie01 AuthorDate: Thu Nov 3 08:32:41 2022 -0500 [SPARK-40996][BUILD] Upgrade `sbt-checkstyle-plugin` to 4.0.0 to resolve `dev/sbt-checkstyle` run failed with sbt 1.7.3 ### What changes were proposed in this pull request? This pr aims upgrade `sbt-checkstyle-plugin` to 4.0.0 to resolve `dev/sbt-checkstyle` run failed with sbt 1.7.3, the new version will check the generated source code, so some new suppression rules have been added to `dev/checkstyle-suppressions.xml` ### Why are the changes needed? https://github.com/apache/spark/pull/38476 revert sbt 1.7.3 upgrade due to run `dev/sbt-checkstyle` failed: ``` [error] org.xml.sax.SAXParseException; lineNumber: 18; columnNumber: 10; DOCTYPE is disallowed when the feature "http://apache.org/xml/features/disallow-doctype-decl"; set to true. [error] at com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.createSAXParseException(ErrorHandlerWrapper.java:203) [error] at com.sun.org.apache.xerces.internal.util.ErrorHandlerWrapper.fatalError(ErrorHandlerWrapper.java:177) [error] at com.sun.org.apache.xerces.internal.impl.XMLErrorReporter.reportError(XMLErrorReporter.java:400) [error] at com.sun.org.apache.xerces.internal.impl.XMLErrorReporter.reportError(XMLErrorReporter.java:327) [error] at com.sun.org.apache.xerces.internal.impl.XMLScanner.reportFatalError(XMLScanner.java:1473) [error] at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl$PrologDriver.next(XMLDocumentScannerImpl.java:914) [error] at com.sun.org.apache.xerces.internal.impl.XMLDocumentScannerImpl.next(XMLDocumentScannerImpl.java:602) [error] at com.sun.org.apache.xerces.internal.impl.XMLDocumentFragmentScannerImpl.scanDocument(XMLDocumentFragmentScannerImpl.java:505) [error] at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:842) [error] at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(XML11Configuration.java:771) [error] at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(XMLParser.java:141) [error] at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(AbstractSAXParser.java:1213) [error] at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(SAXParserImpl.java:643) [error] at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl.parse(SAXParserImpl.java:327) [error] at scala.xml.factory.XMLLoader.parse(XMLLoader.scala:73) [error] at scala.xml.factory.XMLLoader.loadXML(XMLLoader.scala:54) [error] at scala.xml.factory.XMLLoader.loadXML$(XMLLoader.scala:53) [error] at scala.xml.XML$.loadXML(XML.scala:62) [error] at scala.xml.factory.XMLLoader.loadString(XMLLoader.scala:92) [error] at scala.xml.factory.XMLLoader.loadString$(XMLLoader.scala:92) [error] at scala.xml.XML$.loadString(XML.scala:62) [error] at com.etsy.sbt.checkstyle.Checkstyle$.checkstyle(Checkstyle.scala:35) [error] at com.etsy.sbt.checkstyle.CheckstylePlugin$autoImport$.$anonfun$checkstyleTask$1(CheckstylePlugin.scala:36) [error] at com.etsy.sbt.checkstyle.CheckstylePlugin$autoImport$.$anonfun$checkstyleTask$1$adapted(CheckstylePlugin.scala:34) [error] at scala.Function1.$anonfun$compose$1(Function1.scala:49) [error] at sbt.internal.util.$tilde$greater.$anonfun$$u2219$1(TypeFunctions.scala:62) [error] at sbt.std.Transform$$anon$4.work(Transform.scala:68) [error] at sbt.Execute.$anonfun$submit$2(Execute.scala:282) [error] at sbt.internal.util.ErrorHandling$.wideConvert(ErrorHandling.scala:23) [error] at sbt.Execute.work(Execute.scala:291) [error] at sbt.Execute.$anonfun$submit$1(Execute.scala:282) [error] at sbt.ConcurrentRestrictions$$anon$4.$anonfun$submitValid$1(ConcurrentRestrictions.scala:265) [error] at sbt.CompletionService$$anon$2.call(CompletionService.scala:64) [error] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [error] at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) [error] at java.util.concurrent.FutureTask.run(FutureTask.java:266) [error]
[spark] branch master updated: [SPARK-40985][BUILD] Upgrade RoaringBitmap to 0.9.35
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new c4e6b2cecee [SPARK-40985][BUILD] Upgrade RoaringBitmap to 0.9.35 c4e6b2cecee is described below commit c4e6b2cecee612035651c32ff5aba3bd2a17a283 Author: yangjie01 AuthorDate: Wed Nov 2 10:46:55 2022 -0500 [SPARK-40985][BUILD] Upgrade RoaringBitmap to 0.9.35 ### What changes were proposed in this pull request? This pr aims upgrade RoaringBitmap 0.9.35 ### Why are the changes needed? This version bring some bug fix: - https://github.com/RoaringBitmap/RoaringBitmap/pull/587 - https://github.com/RoaringBitmap/RoaringBitmap/issues/588 other changes as follows: https://github.com/RoaringBitmap/RoaringBitmap/compare/0.9.32...0.9.35 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38465 from LuciferYang/rbitmap-0935. Authored-by: yangjie01 Signed-off-by: Sean Owen --- core/benchmarks/MapStatusesConvertBenchmark-jdk11-results.txt | 8 core/benchmarks/MapStatusesConvertBenchmark-jdk17-results.txt | 10 +- core/benchmarks/MapStatusesConvertBenchmark-results.txt | 10 +- dev/deps/spark-deps-hadoop-2-hive-2.3 | 4 ++-- dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++-- pom.xml | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/core/benchmarks/MapStatusesConvertBenchmark-jdk11-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-jdk11-results.txt index adac80834e4..06f7cc7c92c 100644 --- a/core/benchmarks/MapStatusesConvertBenchmark-jdk11-results.txt +++ b/core/benchmarks/MapStatusesConvertBenchmark-jdk11-results.txt @@ -2,12 +2,12 @@ MapStatuses Convert Benchmark -OpenJDK 64-Bit Server VM 11.0.16+8-LTS on Linux 5.15.0-1019-azure +OpenJDK 64-Bit Server VM 11.0.16.1+1 on Linux 5.15.0-1022-azure Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz MapStatuses Convert: Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative -Num Maps: 5 Fetch partitions:500 1269 1276 8 0.0 1268666001.0 1.0X -Num Maps: 5 Fetch partitions:1000 2672 2695 39 0.0 2671542753.0 0.5X -Num Maps: 5 Fetch partitions:1500 4034 4069 50 0.0 4033696987.0 0.3X +Num Maps: 5 Fetch partitions:500 1227 1262 47 0.0 1226744907.0 1.0X +Num Maps: 5 Fetch partitions:1000 2620 2637 15 0.0 2620288061.0 0.5X +Num Maps: 5 Fetch partitions:1500 3975 3990 17 0.0 3974979610.0 0.3X diff --git a/core/benchmarks/MapStatusesConvertBenchmark-jdk17-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-jdk17-results.txt index 9911ae3326f..3b6f5c6695e 100644 --- a/core/benchmarks/MapStatusesConvertBenchmark-jdk17-results.txt +++ b/core/benchmarks/MapStatusesConvertBenchmark-jdk17-results.txt @@ -2,12 +2,12 @@ MapStatuses Convert Benchmark -OpenJDK 64-Bit Server VM 17.0.4+8-LTS on Linux 5.15.0-1019-azure -Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz +OpenJDK 64-Bit Server VM 17.0.4.1+1 on Linux 5.15.0-1022-azure +Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz MapStatuses Convert: Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative -Num Maps: 5 Fetch partitions:500 1228 1238 17 0.0 1228191051.0 1.0X -Num Maps: 5 Fetch partitions:1000 2380 2393 16 0.0 2379601524.0 0.5X -Num Maps: 5 Fetch partitions:1500 3803 3857 55 0.0 3802550172.0 0.3X +Num Maps: 5 Fetch partitions:500 1159 1184 38 0.0 1159155979.0 1.0X +Num Maps: 5 Fetch partitions:1000 2329 2387 57 0.0 2328833805.0 0.5X +Num Maps: 5 Fetch partitions:1500 3608 3712 92 0.0 3607631972.0
[spark] branch branch-3.2 updated (c12b4e20d9e -> b9d22aca2fb)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch branch-3.2 in repository https://gitbox.apache.org/repos/asf/spark.git from c12b4e20d9e [SPARK-40983][DOC] Remove Hadoop requirements for zstd mentioned in Parquet compression codec add b9d22aca2fb [MINOR][BUILD] Correct the `files` contend in `checkstyle-suppressions.xml` No new revisions were added by this update. Summary of changes: dev/checkstyle-suppressions.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [MINOR][BUILD] Correct the `files` contend in `checkstyle-suppressions.xml`
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 067c4277712 [MINOR][BUILD] Correct the `files` contend in `checkstyle-suppressions.xml` 067c4277712 is described below commit 067c4277712e1427a0f86bc31c622eed398e2431 Author: yangjie01 AuthorDate: Tue Nov 1 18:10:36 2022 -0500 [MINOR][BUILD] Correct the `files` contend in `checkstyle-suppressions.xml` ### What changes were proposed in this pull request? The pr aims to change the suppress files from `sql/core/src/main/java/org/apache/spark/sql/api.java/*` to `sql/core/src/main/java/org/apache/spark/sql/api/java/*`, the former seems to be a wrong code path. ### Why are the changes needed? Correct the `files` contend in `checkstyle-suppressions.xml` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38469 from LuciferYang/fix-java-supperessions. Authored-by: yangjie01 Signed-off-by: Sean Owen (cherry picked from commit 5457193dc095bc6c97259e31fa3df44184822f65) Signed-off-by: Sean Owen --- dev/checkstyle-suppressions.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml index 804a178a5fe..22acc505f4f 100644 --- a/dev/checkstyle-suppressions.xml +++ b/dev/checkstyle-suppressions.xml @@ -33,7 +33,7 @@ + files="sql/core/src/main/java/org/apache/spark/sql/api/java/*"/>
[spark] branch master updated (8f6b18536e4 -> 5457193dc09)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 8f6b18536e4 [SPARK-40663][SQL] Migrate execution errors onto error classes: _LEGACY_ERROR_TEMP_2251-2275 add 5457193dc09 [MINOR][BUILD] Correct the `files` contend in `checkstyle-suppressions.xml` No new revisions were added by this update. Summary of changes: dev/checkstyle-suppressions.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Remove Spark 3.1.x from downloads as it is EOL
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 40f58f884 Remove Spark 3.1.x from downloads as it is EOL 40f58f884 is described below commit 40f58f884bd258d6a332d583dc91c717b6b461f0 Author: Sean Owen AuthorDate: Tue Nov 1 10:24:58 2022 -0500 Remove Spark 3.1.x from downloads as it is EOL Author: Sean Owen Closes #425 from srowen/Remove31x. --- js/downloads.js | 4 site/js/downloads.js | 4 2 files changed, 8 deletions(-) diff --git a/js/downloads.js b/js/downloads.js index 27800b672..d20732211 100644 --- a/js/downloads.js +++ b/js/downloads.js @@ -14,15 +14,12 @@ function addRelease(version, releaseDate, packages, mirrored) { var sources = {pretty: "Source Code", tag: "sources"}; var hadoopFree = {pretty: "Pre-built with user-provided Apache Hadoop", tag: "without-hadoop"}; var hadoop2p7 = {pretty: "Pre-built for Apache Hadoop 2.7", tag: "hadoop2.7"}; -var hadoop3p2 = {pretty: "Pre-built for Apache Hadoop 3.2 and later", tag: "hadoop3.2"}; var hadoop3p3 = {pretty: "Pre-built for Apache Hadoop 3.3 and later", tag: "hadoop3.2"}; var hadoop3p3scala213 = {pretty: "Pre-built for Apache Hadoop 3.3 and later (Scala 2.13)", tag: "hadoop3.2-scala2.13"}; var hadoop2p = {pretty: "Pre-built for Apache Hadoop 2.7", tag: "hadoop2"}; var hadoop3p = {pretty: "Pre-built for Apache Hadoop 3.3 and later", tag: "hadoop3"}; var hadoop3pscala213 = {pretty: "Pre-built for Apache Hadoop 3.3 and later (Scala 2.13)", tag: "hadoop3-scala2.13"}; -// 3.1.0+ -var packagesV11 = [hadoop3p2, hadoop2p7, hadoopFree, sources]; // 3.2.0+ var packagesV12 = [hadoop3p3, hadoop3p3scala213, hadoop2p7, hadoopFree, sources]; // 3.3.0+ @@ -30,7 +27,6 @@ var packagesV13 = [hadoop3p, hadoop3pscala213, hadoop2p, hadoopFree, sources]; addRelease("3.3.1", new Date("10/25/2022"), packagesV13, true); addRelease("3.2.2", new Date("07/17/2022"), packagesV12, true); -addRelease("3.1.3", new Date("02/18/2022"), packagesV11, true); function append(el, contents) { el.innerHTML += contents; diff --git a/site/js/downloads.js b/site/js/downloads.js index 27800b672..d20732211 100644 --- a/site/js/downloads.js +++ b/site/js/downloads.js @@ -14,15 +14,12 @@ function addRelease(version, releaseDate, packages, mirrored) { var sources = {pretty: "Source Code", tag: "sources"}; var hadoopFree = {pretty: "Pre-built with user-provided Apache Hadoop", tag: "without-hadoop"}; var hadoop2p7 = {pretty: "Pre-built for Apache Hadoop 2.7", tag: "hadoop2.7"}; -var hadoop3p2 = {pretty: "Pre-built for Apache Hadoop 3.2 and later", tag: "hadoop3.2"}; var hadoop3p3 = {pretty: "Pre-built for Apache Hadoop 3.3 and later", tag: "hadoop3.2"}; var hadoop3p3scala213 = {pretty: "Pre-built for Apache Hadoop 3.3 and later (Scala 2.13)", tag: "hadoop3.2-scala2.13"}; var hadoop2p = {pretty: "Pre-built for Apache Hadoop 2.7", tag: "hadoop2"}; var hadoop3p = {pretty: "Pre-built for Apache Hadoop 3.3 and later", tag: "hadoop3"}; var hadoop3pscala213 = {pretty: "Pre-built for Apache Hadoop 3.3 and later (Scala 2.13)", tag: "hadoop3-scala2.13"}; -// 3.1.0+ -var packagesV11 = [hadoop3p2, hadoop2p7, hadoopFree, sources]; // 3.2.0+ var packagesV12 = [hadoop3p3, hadoop3p3scala213, hadoop2p7, hadoopFree, sources]; // 3.3.0+ @@ -30,7 +27,6 @@ var packagesV13 = [hadoop3p, hadoop3pscala213, hadoop2p, hadoopFree, sources]; addRelease("3.3.1", new Date("10/25/2022"), packagesV13, true); addRelease("3.2.2", new Date("07/17/2022"), packagesV12, true); -addRelease("3.1.3", new Date("02/18/2022"), packagesV11, true); function append(el, contents) { el.innerHTML += contents; - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Add CVE-2022-31777
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 9fbf9cdf9 Add CVE-2022-31777 9fbf9cdf9 is described below commit 9fbf9cdf924a103304619c8491c0596fb2c0349b Author: Sean Owen AuthorDate: Tue Nov 1 10:24:21 2022 -0500 Add CVE-2022-31777 Author: Sean Owen Closes #426 from srowen/CVE202231777. --- security.md| 26 ++ site/security.html | 31 +++ 2 files changed, 57 insertions(+) diff --git a/security.md b/security.md index a4b470cd6..c648bbbe7 100644 --- a/security.md +++ b/security.md @@ -18,6 +18,32 @@ non-public list that will reach the Apache Security team, as well as the Spark P Known security issues +CVE-2022-31777: Apache Spark XSS vulnerability in log viewer UI Javascript + +Severity: Medium + +Vendor: The Apache Software Foundation + +Versions Affected: + +- 3.2.1 and earlier +- 3.3.0 + +Description: + +A stored cross-site scripting (XSS) vulnerability in Apache Spark 3.2.1 and earlier, and 3.3.0, allows remote +attackers to execute arbitrary JavaScript in the web browser of a user, by including a malicious payload into +the logs which would be returned in logs rendered in the UI. + +Mitigation: + +- Upgrade to Spark 3.2.2, or 3.3.1 or later + +Credit: + +- Florian Walter (Veracode) + + CVE-2022-33891: Apache Spark shell command injection vulnerability via Spark UI Severity: Important diff --git a/site/security.html b/site/security.html index b265ae8a6..31e772a67 100644 --- a/site/security.html +++ b/site/security.html @@ -133,6 +133,37 @@ non-public list that will reach the Apache Security team, as well as the Spark P Known security issues +CVE-2022-31777: Apache Spark XSS vulnerability in log viewer UI Javascript + +Severity: Medium + +Vendor: The Apache Software Foundation + +Versions Affected: + + + 3.2.1 and earlier + 3.3.0 + + +Description: + +A stored cross-site scripting (XSS) vulnerability in Apache Spark 3.2.1 and earlier, and 3.3.0, allows remote +attackers to execute arbitrary JavaScript in the web browser of a user, by including a malicious payload into +the logs which would be returned in logs rendered in the UI. + +Mitigation: + + + Upgrade to Spark 3.2.2, or 3.3.1 or later + + +Credit: + + + Florian Walter (Veracode) + + CVE-2022-33891: Apache Spark shell command injection vulnerability via Spark UI Severity: Important - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40391][SQL][TESTS][FOLLOWUP] Change to use `mockito-inline` instead of manually write MockMaker
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 05ad1027a89 [SPARK-40391][SQL][TESTS][FOLLOWUP] Change to use `mockito-inline` instead of manually write MockMaker 05ad1027a89 is described below commit 05ad1027a897b63a9f82f7131f6a024732a7e64d Author: yangjie01 AuthorDate: Mon Oct 24 08:30:34 2022 -0500 [SPARK-40391][SQL][TESTS][FOLLOWUP] Change to use `mockito-inline` instead of manually write MockMaker ### What changes were proposed in this pull request? This pr aims use `mockito-inline` instead of manually write `MockMaker` ### Why are the changes needed? `mockito-inline` is a more recommended [way](https://javadoc.io/doc/org.mockito/mockito-core/latest/org/mockito/Mockito.html#39) to use mockito to mocking final types, enums and final methods and `mllib` and `mllib-local` module is already using `mockito-inline`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - Pass GitHub Actions - Manual test:run `build/sbt clean "sql/testOnly *QueryExecutionErrorsSuite"` with Java 8u352, 11.0.17 and 17.0.5, all 3 Java versions passed Closes #38372 from LuciferYang/SPARK-40391. Authored-by: yangjie01 Signed-off-by: Sean Owen --- sql/core/pom.xml | 5 + .../mockito-extensions/org.mockito.plugins.MockMaker | 18 -- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 7203fc59108..cfcf7455ad0 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -193,6 +193,11 @@ mockito-core test + + org.mockito + mockito-inline + test + org.seleniumhq.selenium selenium-java diff --git a/sql/core/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker b/sql/core/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker deleted file mode 100644 index eb074c6ae3f..000 --- a/sql/core/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker +++ /dev/null @@ -1,18 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -#http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -mock-maker-inline - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40739][SPARK-40738] Fixes for cygwin/msys2/mingw sbt build and bash scripts
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 880d9bb3fcb [SPARK-40739][SPARK-40738] Fixes for cygwin/msys2/mingw sbt build and bash scripts 880d9bb3fcb is described below commit 880d9bb3fcb69001512886496f2988ed17cc4c50 Author: Phil AuthorDate: Mon Oct 24 08:28:54 2022 -0500 [SPARK-40739][SPARK-40738] Fixes for cygwin/msys2/mingw sbt build and bash scripts This fixes two problems that affect development in a Windows shell environment, such as `cygwin` or `msys2`. ### The fixed build error Running `./build/sbt packageBin` from A Windows cygwin `bash` session fails. This occurs if `WSL` is installed, because `project\SparkBuild.scala` creates a `bash` process, but `WSL bash` is called, even though `cygwin bash` appears earlier in the `PATH`. In addition, file path arguments to bash contain backslashes.The fix is to insure that the correct `bash` is called, and that arguments passed to `bash` are passed with slashes rather than **slashes.** ### The build error message: ```bash ./build.sbt packageBin ``` [info] compiling 9 Java sources to C:\Users\philwalk\workspace\spark\common\sketch\target\scala-2.12\classes ... /bin/bash: C:Usersphilwalkworkspacesparkcore/../build/spark-build-info: No such file or directory [info] compiling 1 Scala source to C:\Users\philwalk\workspace\spark\tools\target\scala-2.12\classes ... [info] compiling 5 Scala sources to C:\Users\philwalk\workspace\spark\mllib-local\target\scala-2.12\classes ... [info] Compiling 5 protobuf files to C:\Users\philwalk\workspace\spark\connector\connect\target\scala-2.12\src_managed\main [error] stack trace is suppressed; run last core / Compile / managedResources for the full output [error] (core / Compile / managedResources) Nonzero exit value: 127 [error] Total time: 42 s, completed Oct 8, 2022, 4:49:12 PM sbt:spark-parent> sbt:spark-parent> last core /Compile /managedResources last core /Compile /managedResources [error] java.lang.RuntimeException: Nonzero exit value: 127 [error] at scala.sys.package$.error(package.scala:30) [error] at scala.sys.process.ProcessBuilderImpl$AbstractBuilder.slurp(ProcessBuilderImpl.scala:138) [error] at scala.sys.process.ProcessBuilderImpl$AbstractBuilder.$bang$bang(ProcessBuilderImpl.scala:108) [error] at Core$.$anonfun$settings$4(SparkBuild.scala:604) [error] at scala.Function1.$anonfun$compose$1(Function1.scala:49) [error] at sbt.internal.util.$tilde$greater.$anonfun$$u2219$1(TypeFunctions.scala:62) [error] at sbt.std.Transform$$anon$4.work(Transform.scala:68) [error] at sbt.Execute.$anonfun$submit$2(Execute.scala:282) [error] at sbt.internal.util.ErrorHandling$.wideConvert(ErrorHandling.scala:23) [error] at sbt.Execute.work(Execute.scala:291) [error] at sbt.Execute.$anonfun$submit$1(Execute.scala:282) [error] at sbt.ConcurrentRestrictions$$anon$4.$anonfun$submitValid$1(ConcurrentRestrictions.scala:265) [error] at sbt.CompletionService$$anon$2.call(CompletionService.scala:64) [error] at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) [error] at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515) [error] at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264) [error] at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) [error] at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) [error] at java.base/java.lang.Thread.run(Thread.java:834) [error] (core / Compile / managedResources) Nonzero exit value: 127 ### bash scripts fail when run from `cygwin` or `msys2` The other problem fixed by the PR is to address problems preventing the `bash` scripts (`spark-shell`, `spark-submit`, etc.) from being used in Windows `SHELL` environments. The problem is that the bash version of `spark-class` fails in a Windows shell environment, the result of `launcher/src/main/java/org/apache/spark/launcher/Main.java` not following the convention expected by `spark-class`, and also appending CR to line endings. The resulting error message not helpful. There are two parts to this fix: 1. modify `Main.java` to treat a `SHELL` session on Windows as a `bash` session 2. remove the appended CR character when parsing the output produced by `Main.java` ### Does this PR introduce _any_ user-facing change? These changes should NOT affect anyone who is not trying build or ru
[spark] branch master updated (e2e449e83cd -> 363b8539059)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from e2e449e83cd [SPARK-40897][DOCS] Add some PySpark APIs to References add 363b8539059 [SPARK-39977][BUILD] Remove unnecessary guava exclusion from jackson-module-scala No new revisions were added by this update. Summary of changes: pom.xml | 8 1 file changed, 8 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-40886][BUILD] Bump Jackson Databind 2.13.4.2
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new e674356725d [SPARK-40886][BUILD] Bump Jackson Databind 2.13.4.2 e674356725d is described below commit e674356725de1063760926e66c93dab4813a7aa8 Author: Cheng Pan AuthorDate: Sun Oct 23 11:37:42 2022 -0500 [SPARK-40886][BUILD] Bump Jackson Databind 2.13.4.2 ### What changes were proposed in this pull request? Bump Jackson Databind from 2.13.4.1 to 2.13.4.2 ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? There is a regression about Gradle in 2.13.4.1 and got fixed in 2.13.4.2 https://github.com/FasterXML/jackson-databind/issues/3627 ### How was this patch tested? Existing UT. Closes #38355 from pan3793/SPARK-40886. Authored-by: Cheng Pan Signed-off-by: Sean Owen (cherry picked from commit e73b157f5c4d20c49ec0e3a7bd82a72d3271f766) Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index c7a7b3cbce9..d517d556feb 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -115,7 +115,7 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar +jackson-databind/2.13.4.2//jackson-databind-2.13.4.2.jar jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 259efd760e2..54e7fe23e5b 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -105,7 +105,7 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar +jackson-databind/2.13.4.2//jackson-databind-2.13.4.2.jar jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar diff --git a/pom.xml b/pom.xml index 2804a215fd9..d6b20512f6d 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ true 1.9.13 2.13.4 - 2.13.4.1 + 2.13.4.2 1.1.8.4 1.1.2 2.2.1 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40886][BUILD] Bump Jackson Databind 2.13.4.2
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new e73b157f5c4 [SPARK-40886][BUILD] Bump Jackson Databind 2.13.4.2 e73b157f5c4 is described below commit e73b157f5c4d20c49ec0e3a7bd82a72d3271f766 Author: Cheng Pan AuthorDate: Sun Oct 23 11:37:42 2022 -0500 [SPARK-40886][BUILD] Bump Jackson Databind 2.13.4.2 ### What changes were proposed in this pull request? Bump Jackson Databind from 2.13.4.1 to 2.13.4.2 ### Why are the changes needed? ### Does this PR introduce _any_ user-facing change? There is a regression about Gradle in 2.13.4.1 and got fixed in 2.13.4.2 https://github.com/FasterXML/jackson-databind/issues/3627 ### How was this patch tested? Existing UT. Closes #38355 from pan3793/SPARK-40886. Authored-by: Cheng Pan Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 1d1061aaadb..6756dd58312 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -113,7 +113,7 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar +jackson-databind/2.13.4.2//jackson-databind-2.13.4.2.jar jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 39a0e617058..d29a10c1230 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -101,7 +101,7 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar +jackson-databind/2.13.4.2//jackson-databind-2.13.4.2.jar jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar diff --git a/pom.xml b/pom.xml index d933c1c6f6d..78936392b85 100644 --- a/pom.xml +++ b/pom.xml @@ -176,7 +176,7 @@ true 1.9.13 2.13.4 - 2.13.4.1 + 2.13.4.2 1.1.8.4 3.0.2 1.15 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40863][BUILD] Upgrade dropwizard metrics 4.2.12
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 748fa2792e4 [SPARK-40863][BUILD] Upgrade dropwizard metrics 4.2.12 748fa2792e4 is described below commit 748fa2792e488a6b923b32e2898d9bb6e16fb4ca Author: yangjie01 AuthorDate: Fri Oct 21 08:53:29 2022 -0500 [SPARK-40863][BUILD] Upgrade dropwizard metrics 4.2.12 ### What changes were proposed in this pull request? This pr aims upgrade dropwizard metrics from 4.2.10 to 4.2.12. ### Why are the changes needed? The release notes as follows: - https://github.com/dropwizard/metrics/releases/tag/v4.2.11 - https://github.com/dropwizard/metrics/releases/tag/v4.2.12 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions Closes #38328 from LuciferYang/metrics-4212. Authored-by: yangjie01 Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 10 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 10 +- pom.xml | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index b7850d2fe60..1d1061aaadb 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -195,11 +195,11 @@ log4j-slf4j2-impl/2.19.0//log4j-slf4j2-impl-2.19.0.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar -metrics-core/4.2.10//metrics-core-4.2.10.jar -metrics-graphite/4.2.10//metrics-graphite-4.2.10.jar -metrics-jmx/4.2.10//metrics-jmx-4.2.10.jar -metrics-json/4.2.10//metrics-json-4.2.10.jar -metrics-jvm/4.2.10//metrics-jvm-4.2.10.jar +metrics-core/4.2.12//metrics-core-4.2.12.jar +metrics-graphite/4.2.12//metrics-graphite-4.2.12.jar +metrics-jmx/4.2.12//metrics-jmx-4.2.12.jar +metrics-json/4.2.12//metrics-json-4.2.12.jar +metrics-jvm/4.2.12//metrics-jvm-4.2.12.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.84.Final//netty-all-4.1.84.Final.jar netty-buffer/4.1.84.Final//netty-buffer-4.1.84.Final.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 0f497c99ff9..39a0e617058 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -179,11 +179,11 @@ log4j-slf4j2-impl/2.19.0//log4j-slf4j2-impl-2.19.0.jar logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar lz4-java/1.8.0//lz4-java-1.8.0.jar mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar -metrics-core/4.2.10//metrics-core-4.2.10.jar -metrics-graphite/4.2.10//metrics-graphite-4.2.10.jar -metrics-jmx/4.2.10//metrics-jmx-4.2.10.jar -metrics-json/4.2.10//metrics-json-4.2.10.jar -metrics-jvm/4.2.10//metrics-jvm-4.2.10.jar +metrics-core/4.2.12//metrics-core-4.2.12.jar +metrics-graphite/4.2.12//metrics-graphite-4.2.12.jar +metrics-jmx/4.2.12//metrics-jmx-4.2.12.jar +metrics-json/4.2.12//metrics-json-4.2.12.jar +metrics-jvm/4.2.12//metrics-jvm-4.2.12.jar minlog/1.3.0//minlog-1.3.0.jar netty-all/4.1.84.Final//netty-all-4.1.84.Final.jar netty-buffer/4.1.84.Final//netty-buffer-4.1.84.Final.jar diff --git a/pom.xml b/pom.xml index f8f3aa2fd4f..d933c1c6f6d 100644 --- a/pom.xml +++ b/pom.xml @@ -145,7 +145,7 @@ If you changes codahale.metrics.version, you also need to change the link to metrics.dropwizard.io in docs/monitoring.md. --> -4.2.10 +4.2.12 1.11.1 1.12.0 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40865][BUILD] Upgrade jodatime to 2.12.0
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 9b7c9051930 [SPARK-40865][BUILD] Upgrade jodatime to 2.12.0 9b7c9051930 is described below commit 9b7c90519307eb40b6eaa641d98c894915b1bcdc Author: yangjie01 AuthorDate: Fri Oct 21 08:52:41 2022 -0500 [SPARK-40865][BUILD] Upgrade jodatime to 2.12.0 ### What changes were proposed in this pull request? This pr aims upgrade jodatime to 2.12.0. ### Why are the changes needed? This version includes: - Add translations for ca, el, eu, fi, hi, hu, in, iw, ms, nn, ro, sk, sv, zh. - DateTimeZone data updated to version 2022egtz. The release notes as following: - https://www.joda.org/joda-time/changes-report.html#a2.12.0 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38329 from LuciferYang/joda-212. Authored-by: yangjie01 Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index ee9977e2592..b7850d2fe60 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -147,7 +147,7 @@ jetty-util/6.1.26//jetty-util-6.1.26.jar jetty-util/9.4.49.v20220914//jetty-util-9.4.49.v20220914.jar jetty/6.1.26//jetty-6.1.26.jar jline/2.14.6//jline-2.14.6.jar -joda-time/2.11.2//joda-time-2.11.2.jar +joda-time/2.12.0//joda-time-2.12.0.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar json/1.8//json-1.8.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 766a28503e4..0f497c99ff9 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -132,7 +132,7 @@ jettison/1.1//jettison-1.1.jar jetty-util-ajax/9.4.49.v20220914//jetty-util-ajax-9.4.49.v20220914.jar jetty-util/9.4.49.v20220914//jetty-util-9.4.49.v20220914.jar jline/2.14.6//jline-2.14.6.jar -joda-time/2.11.2//joda-time-2.11.2.jar +joda-time/2.12.0//joda-time-2.12.0.jar jodd-core/3.5.2//jodd-core-3.5.2.jar jpam/1.1//jpam-1.1.jar json/1.8//json-1.8.jar diff --git a/pom.xml b/pom.xml index 65dfcdb2234..f8f3aa2fd4f 100644 --- a/pom.xml +++ b/pom.xml @@ -192,7 +192,7 @@ 14.0.1 3.1.7 2.36 -2.11.2 +2.12.0 3.5.2 3.0.0 0.12.0 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (0643d02e4f0 -> 3b60637d91b)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 0643d02e4f0 [SPARK-40853][INFRA] Pin `mypy-protobuf==3.3.0` add 3b60637d91b [SPARK-40843][CORE][TESTS] Clean up deprecated api usage in SparkThrowableSuite No new revisions were added by this update. Summary of changes: .../scala/org/apache/spark/SparkThrowableSuite.scala | 18 ++ 1 file changed, 10 insertions(+), 8 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40728][BUILD] Upgrade ASM to 9.4
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new ac0ae9ebc2c [SPARK-40728][BUILD] Upgrade ASM to 9.4 ac0ae9ebc2c is described below commit ac0ae9ebc2c17116eba2371e96c7729f0324e9e5 Author: yangjie01 AuthorDate: Sun Oct 16 11:30:20 2022 -0500 [SPARK-40728][BUILD] Upgrade ASM to 9.4 ### What changes were proposed in this pull request? This PR aims to upgrade ASM to 9.4. ### Why are the changes needed? xbean-asm9-shaded 4.22 upgrade to use ASM 9.4: - https://github.com/apache/geronimo-xbean/pull/32 - https://asm.ow2.io/versions.html - https://issues.apache.org/jira/browse/XBEAN-336 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38189 from LuciferYang/asm-94. Lead-authored-by: yangjie01 Co-authored-by: YangJie Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 6 +++--- project/plugins.sbt | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 934b5e7e407..ee9977e2592 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -263,7 +263,7 @@ tink/1.7.0//tink-1.7.0.jar transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar velocity/1.5//velocity-1.5.jar -xbean-asm9-shaded/4.21//xbean-asm9-shaded-4.21.jar +xbean-asm9-shaded/4.22//xbean-asm9-shaded-4.22.jar xercesImpl/2.12.2//xercesImpl-2.12.2.jar xml-apis/1.4.01//xml-apis-1.4.01.jar xmlenc/0.52//xmlenc-0.52.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index 81452650f0e..766a28503e4 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -251,7 +251,7 @@ transaction-api/1.1//transaction-api-1.1.jar univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar velocity/1.5//velocity-1.5.jar wildfly-openssl/1.0.7.Final//wildfly-openssl-1.0.7.Final.jar -xbean-asm9-shaded/4.21//xbean-asm9-shaded-4.21.jar +xbean-asm9-shaded/4.22//xbean-asm9-shaded-4.22.jar xz/1.8//xz-1.8.jar zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar diff --git a/pom.xml b/pom.xml index 0071a6eb246..21aa29ef3b9 100644 --- a/pom.xml +++ b/pom.xml @@ -474,7 +474,7 @@ org.apache.xbean xbean-asm9-shaded -4.21 +4.22
[spark] branch master updated (99abc94039e -> f8403f9e0a8)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 99abc94039e [SPARK-40801][BUILD] Upgrade `Apache commons-text` to 1.10 add f8403f9e0a8 [SPARK-40771][CORE] Fix overflow in broadcast estimatedTotalSize log message No new revisions were added by this update. Summary of changes: core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Add GraalSystems in Powered By solutions and companies
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new b70a0c524 Add GraalSystems in Powered By solutions and companies b70a0c524 is described below commit b70a0c524cfab55f0307b3d9cfd652b18756d81a Author: Vincent Devillers AuthorDate: Thu Oct 13 17:27:45 2022 -0500 Add GraalSystems in Powered By solutions and companies Author: Vincent Devillers Author: Vincent Devillers <> Closes #420 from Treydone/patch-1. --- powered-by.md| 2 +- site/powered-by.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/powered-by.md b/powered-by.md index 9f14fda66..048108882 100644 --- a/powered-by.md +++ b/powered-by.md @@ -118,7 +118,7 @@ and external data sources, driving holistic and actionable insights. activity in real time - http://www.fundacionctic.org";>Fundacion CTIC - https://graal.systems";>GraalSystems - - GraalSystems is a cloud-native data platform that can be used erverywhere, on cloud + - GraalSystems is a cloud-native data platform that can be used everywhere, on cloud environments or on bare-metal infrastructures. - https://www.groupon.com";>Groupon - https://www.godatadriven.com";>GoDataDriven diff --git a/site/powered-by.html b/site/powered-by.html index 5da8af5b3..42892e605 100644 --- a/site/powered-by.html +++ b/site/powered-by.html @@ -295,7 +295,7 @@ activity in real time http://www.fundacionctic.org";>Fundacion CTIC https://graal.systems";>GraalSystems - GraalSystems is a cloud-native data platform that can be used erverywhere, on cloud + GraalSystems is a cloud-native data platform that can be used everywhere, on cloud environments or on bare-metal infrastructures. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Add GraalSystems in Powered By solutions and companies
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 0ead62b00 Add GraalSystems in Powered By solutions and companies 0ead62b00 is described below commit 0ead62b00504d43666dd5105a517093969684a33 Author: Vincent Devillers AuthorDate: Thu Oct 13 13:42:05 2022 -0500 Add GraalSystems in Powered By solutions and companies Author: Vincent Devillers Author: Vincent Devillers <> Closes #419 from Treydone/patch-1. --- powered-by.md| 3 +++ site/powered-by.html | 6 ++ 2 files changed, 9 insertions(+) diff --git a/powered-by.md b/powered-by.md index 07d109d7e..9f14fda66 100644 --- a/powered-by.md +++ b/powered-by.md @@ -117,6 +117,9 @@ and external data sources, driving holistic and actionable insights. - We are using Spark for analyzing and visualizing patterns in large-scale recordings of brain activity in real time - http://www.fundacionctic.org";>Fundacion CTIC +- https://graal.systems";>GraalSystems + - GraalSystems is a cloud-native data platform that can be used erverywhere, on cloud + environments or on bare-metal infrastructures. - https://www.groupon.com";>Groupon - https://www.godatadriven.com";>GoDataDriven - Amsterdam based consultancy company helping companies to be successful with Spark diff --git a/site/powered-by.html b/site/powered-by.html index 6b0466b3a..5da8af5b3 100644 --- a/site/powered-by.html +++ b/site/powered-by.html @@ -293,6 +293,12 @@ activity in real time http://www.fundacionctic.org";>Fundacion CTIC + https://graal.systems";>GraalSystems + + GraalSystems is a cloud-native data platform that can be used erverywhere, on cloud +environments or on bare-metal infrastructures. + + https://www.groupon.com";>Groupon https://www.godatadriven.com";>GoDataDriven - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-40782][BUILD] Upgrade `jackson-databind` to 2.13.4.1
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 27ca30aaad4 [SPARK-40782][BUILD] Upgrade `jackson-databind` to 2.13.4.1 27ca30aaad4 is described below commit 27ca30aaad41e4dd50834d255720fb46a36d9e6d Author: yangjie01 AuthorDate: Thu Oct 13 10:29:59 2022 -0500 [SPARK-40782][BUILD] Upgrade `jackson-databind` to 2.13.4.1 ### What changes were proposed in this pull request? This pr aims upgrade `jackson-databind` to 2.13.4.1. ### Why are the changes needed? This is a bug fix version related to [CVE-2022-42003] - https://github.com/FasterXML/jackson-databind/pull/3621 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass GitHub Actions Closes #38235 from LuciferYang/SPARK-40782. Authored-by: yangjie01 Signed-off-by: Sean Owen (cherry picked from commit 2a8b2a136d5a705526bb76697596f5ad01ce391d) Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index fb9c36a26a1..55515614ab8 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -115,7 +115,7 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4//jackson-databind-2.13.4.jar +jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index f6e09eff50a..9fc9dca09b0 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -105,7 +105,7 @@ ivy/2.5.0//ivy-2.5.0.jar jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar jackson-core/2.13.4//jackson-core-2.13.4.jar -jackson-databind/2.13.4//jackson-databind-2.13.4.jar +jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar diff --git a/pom.xml b/pom.xml index d7ed56329fd..43f9c30422f 100644 --- a/pom.xml +++ b/pom.xml @@ -172,7 +172,7 @@ true 1.9.13 2.13.4 - 2.13.4 + 2.13.4.1 1.1.8.4 1.1.2 2.2.1 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (ac07cea234f -> 2a8b2a136d5)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from ac07cea234f [SPARK-40611][SQL] Improve the performance of `setInterval` & `getInterval` for `UnsafeRow` add 2a8b2a136d5 [SPARK-40782][BUILD] Upgrade `jackson-databind` to 2.13.4.1 No new revisions were added by this update. Summary of changes: dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][BUILD] Add `dist`dir to `fileset` of`maven-clean-plugin`
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new cb53e34343b [MINOR][BUILD] Add `dist`dir to `fileset` of`maven-clean-plugin` cb53e34343b is described below commit cb53e34343b3fb7481e5da98347772a8359d27e1 Author: yangjie01 AuthorDate: Wed Oct 12 18:39:01 2022 -0500 [MINOR][BUILD] Add `dist`dir to `fileset` of`maven-clean-plugin` ### What changes were proposed in this pull request? This pr add add `dist` to `fileset` of`maven-clean-plugin` to make `mvn clean` can delete the `dist` dir which created by `dev/make-distribution.sh`. ### Why are the changes needed? `dev/make-distribution.sh` will create a dist dir but no one cleaned it up. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Local test to confirm that `dist` dir can be cleaned Closes #38215 from LuciferYang/clean-dist. Authored-by: yangjie01 Signed-off-by: Sean Owen --- pom.xml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pom.xml b/pom.xml index 97e19fa604b..cab9929954b 100644 --- a/pom.xml +++ b/pom.xml @@ -3086,6 +3086,9 @@ spark-warehouse + +dist + - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][BUILD] Handle empty PR body in merge script
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 8d8fac2f591 [MINOR][BUILD] Handle empty PR body in merge script 8d8fac2f591 is described below commit 8d8fac2f59122e101a2e7f74cd4971c1d7152797 Author: Sean Owen AuthorDate: Tue Oct 11 13:59:36 2022 -0500 [MINOR][BUILD] Handle empty PR body in merge script ### What changes were proposed in this pull request? Handle the case where the PR body is empty, when merging a PR with the merge script. ### Why are the changes needed? The script fails otherwise. Although we should not have empty PR descriptions, it should at least not break the script. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A Closes #38207 from srowen/DevMergePrBody. Authored-by: Sean Owen Signed-off-by: Sean Owen --- dev/merge_spark_pr.py | 10 +- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py index e21a39a6881..1621432c01c 100755 --- a/dev/merge_spark_pr.py +++ b/dev/merge_spark_pr.py @@ -508,8 +508,11 @@ def main(): else: title = pr["title"] -modified_body = re.sub(re.compile(r"\n?", re.DOTALL), "", pr["body"]).lstrip() -if modified_body != pr["body"]: +body = pr["body"] +if body is None: +body = "" +modified_body = re.sub(re.compile(r"\n?", re.DOTALL), "", body).lstrip() +if modified_body != body: print("=" * 80) print(modified_body) print("=" * 80) @@ -519,13 +522,10 @@ def main(): body = modified_body print("Using modified body:") else: -body = pr["body"] print("Using original body:") print("=" * 80) print(body) print("=" * 80) -else: -body = pr["body"] target_ref = pr["base"]["ref"] user_login = pr["user"]["login"] base_ref = pr["head"]["ref"] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (1103d29f168 -> 6bbf4f5f4e6)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 1103d29f168 [MINOR] Fix grammar in error message add 6bbf4f5f4e6 [SPARK-40745][MLLIB] Reduce the shuffle size of ALS in `.mllib` No new revisions were added by this update. Summary of changes: .../spark/mllib/rdd/MLPairRDDFunctions.scala | 34 +- 1 file changed, 26 insertions(+), 8 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (996e407bd32 -> 1103d29f168)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 996e407bd32 [SPARK-40663][SQL] Migrate execution errors onto error classes: _LEGACY_ERROR_TEMP_2076-2100 add 1103d29f168 [MINOR] Fix grammar in error message No new revisions were added by this update. Summary of changes: .../sql/catalyst/analysis/CheckAnalysis.scala | 4 +- .../sql/catalyst/analysis/AnalysisErrorSuite.scala | 12 +- .../sql/catalyst/analysis/AnalysisSuite.scala | 20 +-- .../resources/sql-tests/results/except-all.sql.out | 2 +- .../sql-tests/results/intersect-all.sql.out| 2 +- .../native/widenSetOperationTypes.sql.out | 140 ++--- .../sql-tests/results/udf/udf-except-all.sql.out | 2 +- .../results/udf/udf-intersect-all.sql.out | 2 +- .../spark/sql/DataFrameSetOperationsSuite.scala| 8 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 +- 10 files changed, 97 insertions(+), 97 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (8e31554bf07 -> efd9ef99bd7)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 8e31554bf07 [SPARK-40742][CORE][SQL] Fix Java compilation warnings related to generic type add efd9ef99bd7 [SPARK-40735] Consistently invoke bash with /usr/bin/env bash in scripts to make code more portable No new revisions were added by this update. Summary of changes: R/check-cran.sh | 2 +- R/create-docs.sh| 2 +- R/create-rd.sh | 2 +- R/find-r.sh | 2 +- R/install-dev.sh| 2 +- R/install-source-package.sh | 2 +- R/run-tests.sh | 2 +- bin/sparkR | 2 +- binder/postBuild| 2 +- connector/connect/dev/generate_protos.sh| 2 ++ connector/docker/build | 2 +- connector/docker/spark-test/build | 2 +- connector/docker/spark-test/master/default_cmd | 2 +- connector/docker/spark-test/worker/default_cmd | 2 +- core/src/test/scala/org/apache/spark/util/UtilsSuite.scala | 2 +- .../kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh | 2 +- sql/create-docs.sh | 2 +- 17 files changed, 18 insertions(+), 16 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-3.3 updated: [SPARK-40705][SQL] Handle case of using mutable array when converting Row to JSON for Scala 2.13
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new fdc51c73fb0 [SPARK-40705][SQL] Handle case of using mutable array when converting Row to JSON for Scala 2.13 fdc51c73fb0 is described below commit fdc51c73fb08eb2cd234cdaf1032a4e54ff0b1a4 Author: Ait Zeouay Amrane AuthorDate: Mon Oct 10 10:18:51 2022 -0500 [SPARK-40705][SQL] Handle case of using mutable array when converting Row to JSON for Scala 2.13 ### What changes were proposed in this pull request? I encountered an issue using Spark while reading JSON files based on a schema it throws every time an exception related to conversion of types. >Note: This issue can be reproduced only with Scala `2.13`, I'm not having this issue with `2.12` Failed to convert value ArraySeq(1, 2, 3) (class of class scala.collection.mutable.ArraySeq$ofRef}) with the type of ArrayType(StringType,true) to JSON. java.lang.IllegalArgumentException: Failed to convert value ArraySeq(1, 2, 3) (class of class scala.collection.mutable.ArraySeq$ofRef}) with the type of ArrayType(StringType,true) to JSON. If I add ArraySeq to the matching cases, the test that I added passed successfully ![image](https://user-images.githubusercontent.com/28459763/194669557-2f13032f-126f-4c2e-bc6d-1a4cfd0a009d.png) With the current code source, the test fails and we have this following error ![image](https://user-images.githubusercontent.com/28459763/194669654-19cefb13-180c-48ac-9206-69d8f672f64c.png) ### Why are the changes needed? If the person is using Scala 2.13, they can't parse an array. Which means they need to fallback to 2.12 to keep the project functioning ### How was this patch tested? I added a sample unit test for the case, but I can add more if you want to. Closes #38154 from Amraneze/fix/spark_40705. Authored-by: Ait Zeouay Amrane Signed-off-by: Sean Owen (cherry picked from commit 9a97f8c62bcd1ad9f34c6318792ae443af46ea85) Signed-off-by: Sean Owen --- sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala| 2 ++ .../src/test/scala/org/apache/spark/sql/RowTest.scala | 11 +++ 2 files changed, 13 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index 4f6c9a8c703..72e1dd94c94 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -584,6 +584,8 @@ trait Row extends Serializable { case (i: CalendarInterval, _) => JString(i.toString) case (a: Array[_], ArrayType(elementType, _)) => iteratorToJsonArray(a.iterator, elementType) + case (a: mutable.ArraySeq[_], ArrayType(elementType, _)) => +iteratorToJsonArray(a.iterator, elementType) case (s: Seq[_], ArrayType(elementType, _)) => iteratorToJsonArray(s.iterator, elementType) case (m: Map[String @unchecked, _], MapType(StringType, valueType, _)) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala index 385f7497368..82731cdb220 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala @@ -17,6 +17,9 @@ package org.apache.spark.sql +import scala.collection.mutable.ArraySeq + +import org.json4s.JsonAST.{JArray, JObject, JString} import org.scalatest.funspec.AnyFunSpec import org.scalatest.matchers.must.Matchers import org.scalatest.matchers.should.Matchers._ @@ -91,6 +94,14 @@ class RowTest extends AnyFunSpec with Matchers { it("getAs() on type extending AnyVal does not throw exception when value is null") { sampleRowWithoutCol3.getAs[String](sampleRowWithoutCol3.fieldIndex("col1")) shouldBe null } + +it("json should convert a mutable array to JSON") { + val schema = new StructType().add(StructField("list", ArrayType(StringType))) + val values = ArraySeq("1", "2", "3") + val row = new GenericRowWithSchema(Array(values), schema) + val expectedList = JArray(JString("1") :: JString("2") :: JString("3") :: Nil) + row.jsonValue shouldBe new JObject(("list", expectedList) :: Nil) +} } describe("row equals") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (9e8198d3115 -> 9a97f8c62bc)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 9e8198d3115 [SPARK-40726][DOCS] Supplement undocumented orc configurations in documentation add 9a97f8c62bc [SPARK-40705][SQL] Handle case of using mutable array when converting Row to JSON for Scala 2.13 No new revisions were added by this update. Summary of changes: sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala| 2 ++ .../src/test/scala/org/apache/spark/sql/RowTest.scala | 11 +++ 2 files changed, 13 insertions(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40726][DOCS] Supplement undocumented orc configurations in documentation
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 9e8198d3115 [SPARK-40726][DOCS] Supplement undocumented orc configurations in documentation 9e8198d3115 is described below commit 9e8198d3115848ba87b4c71b43fd7212a1b729c3 Author: Qian.Sun AuthorDate: Mon Oct 10 09:59:37 2022 -0500 [SPARK-40726][DOCS] Supplement undocumented orc configurations in documentation ### What changes were proposed in this pull request? This PR aims to supplement undocumented orc configurations in documentation. ### Why are the changes needed? Help users to confirm configurations through documentation instead of code. ### Does this PR introduce _any_ user-facing change? Yes, more configurations in documentations. ### How was this patch tested? Pass the GA. Closes #38188 from dcoliversun/SPARK-40726. Authored-by: Qian.Sun Signed-off-by: Sean Owen --- docs/sql-data-sources-orc.md | 37 + 1 file changed, 37 insertions(+) diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md index 28e237a382d..200037a7dea 100644 --- a/docs/sql-data-sources-orc.md +++ b/docs/sql-data-sources-orc.md @@ -153,6 +153,24 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC 2.3.0 + +spark.sql.orc.columnarReaderBatchSize +4096 + + The number of rows to include in an orc vectorized reader batch. The number should + be carefully chosen to minimize overhead and avoid OOMs in reading data. + +2.4.0 + + +spark.sql.orc.columnarWriterBatchSize +1024 + + The number of rows to include in an orc vectorized writer batch. The number should + be carefully chosen to minimize overhead and avoid OOMs in writing data. + +3.4.0 + spark.sql.orc.enableNestedColumnVectorizedReader false @@ -163,6 +181,25 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC 3.2.0 + +spark.sql.orc.filterPushdown +true + + When true, enable filter pushdown for ORC files. + +1.4.0 + + +spark.sql.orc.aggregatePushdown +false + + If true, aggregates will be pushed down to ORC for optimization. Support MIN, MAX and + COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date + type. For COUNT, support all data types. If statistics is missing from any ORC file + footer, exception would be thrown. + +3.3.0 + spark.sql.orc.mergeSchema false - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40675][DOCS] Supplement undocumented spark configurations in `configuration.md`
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new cd7ca92051b [SPARK-40675][DOCS] Supplement undocumented spark configurations in `configuration.md` cd7ca92051b is described below commit cd7ca92051b55c615b8db07030ea3af469dd4da4 Author: Qian.Sun AuthorDate: Sun Oct 9 10:12:19 2022 -0500 [SPARK-40675][DOCS] Supplement undocumented spark configurations in `configuration.md` ### What changes were proposed in this pull request? This PR aims to supplement missing spark configurations in `org.apache.spark.internal.config` in `configuration.md`. ### Why are the changes needed? Help users to confirm configuration through documentation instead of code. ### Does this PR introduce _any_ user-facing change? Yes, more configurations in documentation. ### How was this patch tested? Pass the GitHub Actions. Closes #38131 from dcoliversun/SPARK-40675. Authored-by: Qian.Sun Signed-off-by: Sean Owen --- docs/configuration.md | 314 +- 1 file changed, 313 insertions(+), 1 deletion(-) diff --git a/docs/configuration.md b/docs/configuration.md index 16c9fdfdf9f..b528c766884 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -468,6 +468,43 @@ of the most common options to set are: 3.0.0 + + spark.decommission.enabled + false + +When decommission enabled, Spark will try its best to shut down the executor gracefully. +Spark will try to migrate all the RDD blocks (controlled by spark.storage.decommission.rddBlocks.enabled) +and shuffle blocks (controlled by spark.storage.decommission.shuffleBlocks.enabled) from the decommissioning +executor to a remote executor when spark.storage.decommission.enabled is enabled. +With decommission enabled, Spark will also decommission an executor instead of killing when spark.dynamicAllocation.enabled enabled. + + 3.1.0 + + + spark.executor.decommission.killInterval + (none) + +Duration after which a decommissioned executor will be killed forcefully by an outside (e.g. non-spark) service. + + 3.1.0 + + + spark.executor.decommission.forceKillTimeout + (none) + +Duration after which a Spark will force a decommissioning executor to exit. +This should be set to a high value in most situations as low values will prevent block migrations from having enough time to complete. + + 3.2.0 + + + spark.executor.decommission.signal + PWR + +The signal that used to trigger the executor to start decommission. + + 3.2.0 + Apart from these, the following properties are also available, and may be useful in some situations: @@ -681,7 +718,7 @@ Apart from these, the following properties are also available, and may be useful spark.redaction.regex - (?i)secret|password|token + (?i)secret|password|token|access[.]key Regex to decide which Spark configuration properties and environment variables in driver and executor environments contain sensitive information. When this regex matches a property key or @@ -689,6 +726,16 @@ Apart from these, the following properties are also available, and may be useful 2.1.2 + + spark.redaction.string.regex + (none) + +Regex to decide which parts of strings produced by Spark contain sensitive information. +When this regex matches a string part, that string part is replaced by a dummy value. +This is currently used to redact the output of SQL explain commands. + + 2.2.0 + spark.python.profile false @@ -906,6 +953,23 @@ Apart from these, the following properties are also available, and may be useful 1.4.0 + + spark.shuffle.unsafe.file.output.buffer + 32k + +The file system for this buffer size after each partition is written in unsafe shuffle writer. +In KiB unless otherwise specified. + + 2.3.0 + + + spark.shuffle.spill.diskWriteBufferSize + 1024 * 1024 + +The buffer size, in bytes, to use when writing the sorted records to an on-disk file. + + 2.3.0 + spark.shuffle.io.maxRetries 3 @@ -988,6 +1052,17 @@ Apart from these, the following properties are also available, and may be useful 1.2.0 + + spark.shuffle.service.name + spark_shuffle + +The configured name of the Spark shuffle service the client should communicate with. +This must match the name used to configure the Shuffle within the YARN NodeManager configuration +(yarn.nodemanager.aux-services). Only takes effect +when spark.shuffle.service.enabled is set to true. + + 3.2.0 + spark.shuffle.service.index.cache.size 100m @@ -1028,6 +1103,14 @@ Apart from these, the following properties are also available, and may be useful
[spark] branch master updated: [SPARK-40710][DOCS] Supplement undocumented parquet configurations in documentation
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f39b75ccbdc [SPARK-40710][DOCS] Supplement undocumented parquet configurations in documentation f39b75ccbdc is described below commit f39b75ccbdcac6a9d67c61ed399f5c03603cada7 Author: Qian.Sun AuthorDate: Sun Oct 9 10:11:05 2022 -0500 [SPARK-40710][DOCS] Supplement undocumented parquet configurations in documentation ### What changes were proposed in this pull request? This PR aims to supplement undocumented parquet configurations in documentation. ### Why are the changes needed? Help users to confirm configurations through documentation instead of code. ### Does this PR introduce _any_ user-facing change? Yes, more configurations in documentation. ### How was this patch tested? Pass the GA. Closes #38160 from dcoliversun/SPARK-40710. Authored-by: Qian.Sun Signed-off-by: Sean Owen --- docs/sql-data-sources-parquet.md | 122 +++ 1 file changed, 122 insertions(+) diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md index 2189892c928..de339c21ef2 100644 --- a/docs/sql-data-sources-parquet.md +++ b/docs/sql-data-sources-parquet.md @@ -454,6 +454,28 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession 1.3.0 + + spark.sql.parquet.int96TimestampConversion + false + +This controls whether timestamp adjustments should be applied to INT96 data when +converting to timestamps, for data written by Impala. This is necessary because Impala +stores INT96 data with a different timezone offset than Hive & Spark. + + 2.3.0 + + + spark.sql.parquet.outputTimestampType + INT96 + +Sets which Parquet timestamp type to use when Spark writes data to Parquet files. +INT96 is a non-standard but commonly used timestamp type in Parquet. TIMESTAMP_MICROS +is a standard timestamp type in Parquet, which stores number of microseconds from the +Unix epoch. TIMESTAMP_MILLIS is also standard, but with millisecond precision, which +means Spark has to truncate the microsecond portion of its timestamp value. + + 2.3.0 + spark.sql.parquet.compression.codec snappy @@ -473,6 +495,17 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession Enables Parquet filter push-down optimization when set to true. 1.2.0 + + spark.sql.parquet.aggregatePushdown + false + +If true, aggregates will be pushed down to Parquet for optimization. Support MIN, MAX +and COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date +type. For COUNT, support all data types. If statistics is missing from any Parquet file +footer, exception would be thrown. + + 3.3.0 + spark.sql.hive.convertMetastoreParquet true @@ -493,6 +526,17 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession 1.5.0 + + spark.sql.parquet.respectSummaryFiles + false + +When true, we make assumption that all part-files of Parquet are consistent with +summary files and we will ignore them when merging schema. Otherwise, if this is +false, which is the default, we will merge all part-files. This should be considered +as expert-only option, and shouldn't be enabled before knowing what it means exactly. + + 1.5.0 + spark.sql.parquet.writeLegacyFormat false @@ -505,6 +549,84 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession 1.6.0 + + spark.sql.parquet.enableVectorizedReader + true + +Enables vectorized parquet decoding. + + 2.0.0 + + + spark.sql.parquet.enableNestedColumnVectorizedReader + true + +Enables vectorized Parquet decoding for nested columns (e.g., struct, list, map). +Requires spark.sql.parquet.enableVectorizedReader to be enabled. + + 3.3.0 + + + spark.sql.parquet.recordLevelFilter.enabled + false + +If true, enables Parquet's native record-level filtering using the pushed down filters. +This configuration only has an effect when spark.sql.parquet.filterPushdown +is enabled and the vectorized reader is not used. You can ensure the vectorized reader +is not used by setting spark.sql.parquet.enableVectorizedReader to false. + + 2.3.0 + + + spark.sql.parquet.columnarReaderBatchSize + 4096 + +The number of rows to include in a parquet vectorized reader batch. The number should +be carefully chosen to minimize overhead and avoid OOMs in reading data. + + 2.4.0 + + + spark.sql.parquet.fieldId.write.enabled + true + +Field ID is a native field of the Parquet schema spec
[spark] branch master updated: [SPARK-40709][DOCS] Supplement undocumented avro configurations in documentation
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 4b8f0e57821 [SPARK-40709][DOCS] Supplement undocumented avro configurations in documentation 4b8f0e57821 is described below commit 4b8f0e57821b4827bc72791120e6e039b926fbc8 Author: Qian.Sun AuthorDate: Sun Oct 9 10:10:37 2022 -0500 [SPARK-40709][DOCS] Supplement undocumented avro configurations in documentation ### What changes were proposed in this pull request? This PR aims to supplement undocumented avro configurations in documentation. ### Why are the changes needed? Help users to confirm configuration through documentation instead of code. ### Does this PR introduce _any_ user-facing change? Yes, more configurations in documentation ### How was this patch tested? Pass the GA Closes #38156 from dcoliversun/SPARK-40709. Authored-by: Qian.Sun Signed-off-by: Sean Owen --- docs/sql-data-sources-avro.md | 8 1 file changed, 8 insertions(+) diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md index 4422baa4c29..c12617ea922 100644 --- a/docs/sql-data-sources-avro.md +++ b/docs/sql-data-sources-avro.md @@ -371,6 +371,14 @@ Configuration of Avro can be done using the `setConf` method on SparkSession or 3.0.0 + +spark.sql.avro.filterPushdown.enabled +true + + When true, enable filter pushdown to Avro datasource. + +3.1.0 + ## Compatibility with Databricks spark-avro - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40699][DOCS] Supplement undocumented yarn configurations in documentation
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 51e8ca3635d [SPARK-40699][DOCS] Supplement undocumented yarn configurations in documentation 51e8ca3635d is described below commit 51e8ca3635d62d470721e7ce0f7e868b6b57334c Author: Qian.Sun AuthorDate: Sun Oct 9 10:10:06 2022 -0500 [SPARK-40699][DOCS] Supplement undocumented yarn configurations in documentation ### What changes were proposed in this pull request? This PR aims to supplement undocumented yarn configuration in documentation. ### Why are the changes needed? Help users to confirm yarn configurations through documentation instead of code. ### Does this PR introduce _any_ user-facing change? Yes, more configurations in documentation. ### How was this patch tested? Pass the GA. Closes #38150 from dcoliversun/SPARK-40699. Authored-by: Qian.Sun Signed-off-by: Sean Owen --- docs/running-on-yarn.md | 41 + 1 file changed, 41 insertions(+) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index ea117f31357..4112c71cdf9 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -486,6 +486,20 @@ To use a custom metrics.properties for the application master and executors, upd 3.3.0 + + spark.yarn.am.tokenConfRegex + (none) + +This config is only supported when Hadoop version is 2.9+ or 3.x (e.g., when using the Hadoop 3.x profile). +The value of this config is a regex expression used to grep a list of config entries from the job's configuration file (e.g., hdfs-site.xml) +and send to RM, which uses them when renewing delegation tokens. A typical use case of this feature is to support delegation +tokens in an environment where a YARN cluster needs to talk to multiple downstream HDFS clusters, where the YARN RM may not have configs +(e.g., dfs.nameservices, dfs.ha.namenodes.*, dfs.namenode.rpc-address.*) to connect to these clusters. +In this scenario, Spark users can specify the config value to be ^dfs.nameservices$|^dfs.namenode.rpc-address.*$|^dfs.ha.namenodes.*$ to parse +these HDFS configs from the job's local configuration files. This config is very similar to mapreduce.job.send-token-conf. Please check YARN-5910 for more details. + + 3.3.0 + spark.yarn.executor.failuresValidityInterval (none) @@ -632,6 +646,33 @@ To use a custom metrics.properties for the application master and executors, upd 0.9.0 + + spark.yarn.clientLaunchMonitorInterval + 1s + +Interval between requests for status the client mode AM when starting the app. + + 2.3.0 + + + spark.yarn.includeDriverLogsLink + false + +In cluster mode, whether the client application report includes links to the driver +container's logs. This requires polling the ResourceManager's REST API, so it +places some additional load on the RM. + + 3.1.0 + + + spark.yarn.unmanagedAM.enabled + false + +In client mode, whether to launch the Application Master service as part of the client +using unmanaged am. + + 3.0.0 + Available patterns for SHS custom executor log URL - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (44151c7908b -> f77c8f33e68)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 44151c7908b [SPARK-40663][SQL] Migrate execution errors onto error classes: _LEGACY_ERROR_TEMP_2026-2050 add f77c8f33e68 [SPARK-40712][BUILD] Upgrade `sbt-assembly` plugin to 1.2.0 No new revisions were added by this update. Summary of changes: project/SparkBuild.scala | 1 - project/plugins.sbt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Add Yikun Jiang to committers
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 1abe74811 Add Yikun Jiang to committers 1abe74811 is described below commit 1abe74811ae0eeb8a5e2a1b505adb47cb248faaa Author: Yikun Jiang AuthorDate: Sat Oct 8 10:53:52 2022 -0500 Add Yikun Jiang to committers Author: Yikun Jiang Closes #418 from Yikun/add-yikun. --- committers.md| 1 + site/committers.html | 4 2 files changed, 5 insertions(+) diff --git a/committers.md b/committers.md index 06256b2a2..a16b33d31 100644 --- a/committers.md +++ b/committers.md @@ -41,6 +41,7 @@ navigation: |Dongjoon Hyun|Apple| |Kazuaki Ishizaki|IBM| |Xingbo Jiang|Databricks| +|Yikun Jiang|Huawei| |Holden Karau|Apple| |Shane Knapp|UC Berkeley| |Cody Koeninger|Nexstar Digital| diff --git a/site/committers.html b/site/committers.html index 099827858..94ad1662e 100644 --- a/site/committers.html +++ b/site/committers.html @@ -256,6 +256,10 @@ Xingbo Jiang Databricks + + Yikun Jiang + Huawei + Holden Karau Apple - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [MINOR][DOCS] Reviews and updates the doc links for running-on-yarn
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 309638eeefb [MINOR][DOCS] Reviews and updates the doc links for running-on-yarn 309638eeefb is described below commit 309638eeefbfb13dae8dbded0279bf44390389ee Author: panbingkun AuthorDate: Fri Oct 7 20:38:43 2022 -0500 [MINOR][DOCS] Reviews and updates the doc links for running-on-yarn ### What changes were proposed in this pull request? The pr aim to reviews and updates the doc links for running-on-yarn. ### Why are the changes needed? Improve docs. After SPARK-39863, hadoop has been upgraded to version 3.3.4, but docs still retains the old link. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually verified. Closes #38129 from panbingkun/fix_doc_for_running-on-yarn. Authored-by: panbingkun Signed-off-by: Sean Owen --- docs/running-on-yarn.md | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 03179da115b..ea117f31357 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -163,7 +163,7 @@ To use a custom metrics.properties for the application master and executors, upd Amount of resource to use for the YARN Application Master in client mode. In cluster mode, use spark.yarn.driver.resource.<resource-type>.amount instead. Please note that this feature can be used only with YARN 3.0+ -For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html +For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html Example: To request GPU resources from YARN, use: spark.yarn.am.resource.yarn.io/gpu.amount @@ -185,7 +185,7 @@ To use a custom metrics.properties for the application master and executors, upd Amount of resource to use for the YARN Application Master in cluster mode. Please note that this feature can be used only with YARN 3.0+ -For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html +For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html Example: To request GPU resources from YARN, use: spark.yarn.driver.resource.yarn.io/gpu.amount @@ -198,7 +198,7 @@ To use a custom metrics.properties for the application master and executors, upd Amount of resource to use per executor process. Please note that this feature can be used only with YARN 3.0+ -For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html +For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html Example: To request GPU resources from YARN, use: spark.yarn.executor.resource.yarn.io/gpu.amount @@ -956,7 +956,7 @@ Or ``` The two `spark-*-config` directories each contain one file, `spark-shuffle-site.xml`. These are XML -files in the [Hadoop Configuration format](https://hadoop.apache.org/docs/r3.2.2/api/org/apache/hadoop/conf/Configuration.html) +files in the [Hadoop Configuration format](https://hadoop.apache.org/docs/current/api/org/apache/hadoop/conf/Configuration.html) which each contain a few configurations to adjust the port number and metrics name prefix used: ```xml - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Bump rexml from 3.2.4 to 3.2.5 (#416)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new dcb272fa5 Bump rexml from 3.2.4 to 3.2.5 (#416) dcb272fa5 is described below commit dcb272fa51424f1ad8b953a0bc4b948c57e09945 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Thu Oct 6 18:44:34 2022 -0500 Bump rexml from 3.2.4 to 3.2.5 (#416) Bumps [rexml](https://github.com/ruby/rexml) from 3.2.4 to 3.2.5. - [Release notes](https://github.com/ruby/rexml/releases) - [Changelog](https://github.com/ruby/rexml/blob/master/NEWS.md) - [Commits](https://github.com/ruby/rexml/compare/v3.2.4...v3.2.5) --- updated-dependencies: - dependency-name: rexml dependency-type: indirect ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Bump kramdown from 2.3.0 to 2.4.0 (#417)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new a3c77239f Bump kramdown from 2.3.0 to 2.4.0 (#417) a3c77239f is described below commit a3c77239f622dd69071ce893a488d14f88ace355 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Thu Oct 6 18:44:07 2022 -0500 Bump kramdown from 2.3.0 to 2.4.0 (#417) Bumps [kramdown](https://github.com/gettalong/kramdown) from 2.3.0 to 2.4.0. - [Release notes](https://github.com/gettalong/kramdown/releases) - [Changelog](https://github.com/gettalong/kramdown/blob/master/doc/news.page) - [Commits](https://github.com/gettalong/kramdown/commits) --- updated-dependencies: - dependency-name: kramdown dependency-type: indirect ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Gemfile.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 0207aacf5..a12f53745 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -33,7 +33,7 @@ GEM sassc (> 2.0.1, < 3.0) jekyll-watch (2.2.1) listen (~> 3.0) -kramdown (2.3.0) +kramdown (2.4.0) rexml kramdown-parser-gfm (1.1.0) kramdown (~> 2.0) @@ -48,7 +48,7 @@ GEM rb-fsevent (0.10.4) rb-inotify (0.10.1) ffi (~> 1.0) -rexml (3.2.4) +rexml (3.2.5) rouge (3.26.0) safe_yaml (1.0.5) sassc (2.4.0) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark-website] branch asf-site updated: Bump addressable from 2.7.0 to 2.8.1 (#415)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 96079555f Bump addressable from 2.7.0 to 2.8.1 (#415) 96079555f is described below commit 96079555fc3fb1d89e4888dabe013e568514cb23 Author: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> AuthorDate: Thu Oct 6 18:08:54 2022 -0500 Bump addressable from 2.7.0 to 2.8.1 (#415) Bumps [addressable](https://github.com/sporkmonger/addressable) from 2.7.0 to 2.8.1. - [Release notes](https://github.com/sporkmonger/addressable/releases) - [Changelog](https://github.com/sporkmonger/addressable/blob/main/CHANGELOG.md) - [Commits](https://github.com/sporkmonger/addressable/compare/addressable-2.7.0...addressable-2.8.1) --- updated-dependencies: - dependency-name: addressable dependency-type: indirect ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- Gemfile.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 28e33ccb8..0207aacf5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,8 +1,8 @@ GEM remote: https://rubygems.org/ specs: -addressable (2.7.0) - public_suffix (>= 2.0.2, < 5.0) +addressable (2.8.1) + public_suffix (>= 2.0.2, < 6.0) colorator (1.1.0) concurrent-ruby (1.1.8) em-websocket (0.5.2) @@ -44,7 +44,7 @@ GEM mercenary (0.4.0) pathutil (0.16.2) forwardable-extended (~> 2.6) -public_suffix (4.0.6) +public_suffix (5.0.0) rb-fsevent (0.10.4) rb-inotify (0.10.1) ffi (~> 1.0) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40661][BUILD] Upgrade `jetty-http` from 9.4.48.v20220622 to 9.4.49.v20220914
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new fa421980cc2 [SPARK-40661][BUILD] Upgrade `jetty-http` from 9.4.48.v20220622 to 9.4.49.v20220914 fa421980cc2 is described below commit fa421980cc2be4b2c1f503280c103a8320657935 Author: panbingkun AuthorDate: Thu Oct 6 08:24:30 2022 -0500 [SPARK-40661][BUILD] Upgrade `jetty-http` from 9.4.48.v20220622 to 9.4.49.v20220914 ### What changes were proposed in this pull request? The pr aim to Upgrade `jetty-http` from 9.4.48.v20220622 to 9.4.49.v20220914 The v9.4.49.v20220914 should be the last version of the 9. x series. https://user-images.githubusercontent.com/15246973/193972198-54e0ac86-6b38-484b-be4b-a3bf87cb6c3b.png";> ### Why are the changes needed? [Release Notes](https://github.com/eclipse/jetty.project/releases/tag/jetty-9.4.49.v20220914), bring some bug fix: https://user-images.githubusercontent.com/15246973/193972233-07b9e5dd-4b31-4440-9f1c-bbebb28d6e3d.png";> ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GA. Closes #38107 from panbingkun/upgrade_jetty_lastet_version. Authored-by: panbingkun Signed-off-by: Sean Owen --- dev/deps/spark-deps-hadoop-2-hive-2.3 | 2 +- dev/deps/spark-deps-hadoop-3-hive-2.3 | 4 ++-- pom.xml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3 index 76d59f63ddd..7e0b551277e 100644 --- a/dev/deps/spark-deps-hadoop-2-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-2-hive-2.3 @@ -144,7 +144,7 @@ jersey-hk2/2.36//jersey-hk2-2.36.jar jersey-server/2.36//jersey-server-2.36.jar jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar jetty-util/6.1.26//jetty-util-6.1.26.jar -jetty-util/9.4.48.v20220622//jetty-util-9.4.48.v20220622.jar +jetty-util/9.4.49.v20220914//jetty-util-9.4.49.v20220914.jar jetty/6.1.26//jetty-6.1.26.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.11.2//joda-time-2.11.2.jar diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index f6e29a6cee5..e63b652e523 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -129,8 +129,8 @@ jersey-container-servlet/2.36//jersey-container-servlet-2.36.jar jersey-hk2/2.36//jersey-hk2-2.36.jar jersey-server/2.36//jersey-server-2.36.jar jettison/1.1//jettison-1.1.jar -jetty-util-ajax/9.4.48.v20220622//jetty-util-ajax-9.4.48.v20220622.jar -jetty-util/9.4.48.v20220622//jetty-util-9.4.48.v20220622.jar +jetty-util-ajax/9.4.49.v20220914//jetty-util-ajax-9.4.49.v20220914.jar +jetty-util/9.4.49.v20220914//jetty-util-9.4.49.v20220914.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.11.2//joda-time-2.11.2.jar jodd-core/3.5.2//jodd-core-3.5.2.jar diff --git a/pom.xml b/pom.xml index 632722c9bf1..a0a74efebf9 100644 --- a/pom.xml +++ b/pom.xml @@ -135,7 +135,7 @@ 1.12.3 1.8.0 shaded-protobuf -9.4.48.v20220622 +9.4.49.v20220914 4.0.3 0.10.0 2.5.0 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40607][CORE][SQL][MLLIB][SS] Remove redundant string interpolator operations
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 34d5272663c [SPARK-40607][CORE][SQL][MLLIB][SS] Remove redundant string interpolator operations 34d5272663c is described below commit 34d5272663ce4852ca5b2daa665983a321b42060 Author: yangjie01 AuthorDate: Wed Oct 5 18:05:12 2022 -0500 [SPARK-40607][CORE][SQL][MLLIB][SS] Remove redundant string interpolator operations ### What changes were proposed in this pull request? This pr remove redundant string interpolator operations in Spark code, and the change of this pr does not include the code related to logs, exceptions, and `configurations.doc`. ### Why are the changes needed? Clean up unnecessary function calls ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Pass Github Actions Closes #38043 from LuciferYang/unused-s. Authored-by: yangjie01 Signed-off-by: Sean Owen --- core/src/main/scala/org/apache/spark/TaskEndReason.scala | 2 +- core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala | 10 +- .../org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala | 4 ++-- .../scala/org/apache/spark/sql/catalyst/expressions/Cast.scala | 4 ++-- .../spark/sql/catalyst/expressions/collectionOperations.scala | 2 +- .../spark/sql/catalyst/expressions/datetimeExpressions.scala | 2 +- .../org/apache/spark/sql/catalyst/expressions/literals.scala | 2 +- .../sql/catalyst/optimizer/PullOutGroupingExpressions.scala| 2 +- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 2 +- .../main/scala/org/apache/spark/sql/execution/HiveResult.scala | 2 +- .../spark/sql/execution/aggregate/HashMapGenerator.scala | 2 +- .../sql/execution/aggregate/RowBasedHashMapGenerator.scala | 2 +- .../apache/spark/sql/execution/basicPhysicalOperators.scala| 2 +- .../spark/sql/execution/joins/BroadcastHashJoinExec.scala | 2 +- .../spark/sql/execution/streaming/ResolveWriteToStream.scala | 2 +- .../src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala | 2 +- .../main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala | 2 +- 17 files changed, 23 insertions(+), 23 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala index 5dc70e9834b..f1ce302a05d 100644 --- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala +++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala @@ -242,7 +242,7 @@ case class TaskCommitDenied( jobID: Int, partitionID: Int, attemptNumber: Int) extends TaskFailedReason { - override def toErrorString: String = s"TaskCommitDenied (Driver denied task commit)" + + override def toErrorString: String = "TaskCommitDenied (Driver denied task commit)" + s" for job: $jobID, partition: $partitionID, attemptNumber: $attemptNumber" /** * If a task failed because its attempt to commit was denied, do not count this failure diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 8106eec847e..1934e9e58e6 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -360,7 +360,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We |'content': ' + |data-title="${"Task " + index + " (attempt " + attempt + ")"} |Status: ${taskInfo.status} |Launch Time: ${UIUtils.formatDate(new Date(launchTime))} |${ @@ -416,7 +416,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We Enable zooming - . Show @@ -445,7 +445,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We {TIMELINE_LEGEND} ++ - {Unparsed(s"drawTaskAssignmentTimeline(" + + {Unparsed("drawTaskAssignmentTimeline(" + s"$groupArrayStr, $executorsArrayStr, $minLaunchTime, $maxFinishTime, " + s"${UIUtils.getTimeZoneOffset()})")} diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala index 2f6b9c1e11a..c61aa14edca 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedPa
[spark-website] branch asf-site updated: CVE version update
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch asf-site in repository https://gitbox.apache.org/repos/asf/spark-website.git The following commit(s) were added to refs/heads/asf-site by this push: new 20f272012 CVE version update 20f272012 is described below commit 20f2720126be7a5ecea244fa7ff977a995ed2a8c Author: Sean Owen AuthorDate: Mon Oct 3 12:25:05 2022 -0500 CVE version update See mailing list discussion. The idea is to give a 'resolved by' version for older CVEs that are advice or affected only the build. Author: Sean Owen Closes #414 from srowen/CVEVersionUpdate. --- security.md| 6 -- site/security.html | 6 -- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/security.md b/security.md index 0fb077b05..a4b470cd6 100644 --- a/security.md +++ b/security.md @@ -200,7 +200,9 @@ Vendor: The Apache Software Foundation Versions Affected -- 1.3.x release branch and later +- 2.1.x release branch and earlier +- 2.2.x release branch before Spark 2.2.3 +- 2.3.x release branch before Spark 2.3.3 Description: @@ -232,7 +234,7 @@ Vendor: The Apache Software Foundation Versions Affected: -- Spark versions from 1.3.0, running standalone master with REST API enabled, or running Mesos master with cluster mode enabled +- Spark versions from 1.3.0, running standalone master with REST API enabled, or running Mesos master with cluster mode enabled; suggested mitigations resolved the issue as of Spark 2.4.0. Description: diff --git a/site/security.html b/site/security.html index d750bd0c0..369af400a 100644 --- a/site/security.html +++ b/site/security.html @@ -341,7 +341,9 @@ and related security properties described at https://spark.apache.org/docs/lates Versions Affected - 1.3.x release branch and later + 2.1.x release branch and earlier + 2.2.x release branch before Spark 2.2.3 + 2.3.x release branch before Spark 2.3.3 Description: @@ -378,7 +380,7 @@ source code. Versions Affected: - Spark versions from 1.3.0, running standalone master with REST API enabled, or running Mesos master with cluster mode enabled + Spark versions from 1.3.0, running standalone master with REST API enabled, or running Mesos master with cluster mode enabled; suggested mitigations resolved the issue as of Spark 2.4.0. Description: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (3cd13fd6b73 -> 01159c7bd07)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 3cd13fd6b73 [SPARK-40165][BUILD] Update test plugins to latest versions add 01159c7bd07 [SPARK-40620][CORE] Simplify make offers No new revisions were added by this update. Summary of changes: .../cluster/CoarseGrainedSchedulerBackend.scala| 27 -- 1 file changed, 15 insertions(+), 12 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (9dae42b9739 -> 3cd13fd6b73)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 9dae42b9739 [MINOR][DOCS][SQL] Fix typo in Doc for emptyDataset: change returns to since add 3cd13fd6b73 [SPARK-40165][BUILD] Update test plugins to latest versions No new revisions were added by this update. Summary of changes: pom.xml | 7 --- sql/hive-thriftserver/pom.xml | 4 2 files changed, 8 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (fa88651fee4 -> bfad44ee5d6)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from fa88651fee4 [SPARK-40458][K8S] Bump Kubernetes Client Version to 6.1.1 add bfad44ee5d6 [SPARK-40613][BUILD] Upgrade sbt-protoc to 1.0.6 No new revisions were added by this update. Summary of changes: project/plugins.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-39146][CORE][SQL] Introduce local singleton for `ObjectMapper` that may be reused
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 94407429427 [SPARK-39146][CORE][SQL] Introduce local singleton for `ObjectMapper` that may be reused 94407429427 is described below commit 944074294277849f8bb920e8c368ef837c364fb1 Author: yangjie01 AuthorDate: Thu Sep 29 07:37:21 2022 -0500 [SPARK-39146][CORE][SQL] Introduce local singleton for `ObjectMapper` that may be reused ### What changes were proposed in this pull request? This pr introduce local singletons for Jackson `ObjectMapper` that may be reused in Spark code to reduce the cost of repeatedly creating `ObjectMapper`. ### Why are the changes needed? Minor performance improvement. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass GitHub Actions Closes #37999 from LuciferYang/SPARK-39146-2. Authored-by: yangjie01 Signed-off-by: Sean Owen --- .../org/apache/spark/ErrorClassesJSONReader.scala | 19 +++ .../spark/sql/catalyst/util/RebaseDateTime.scala | 8 ++-- .../execution/datasources/v2/DataSourceV2Utils.scala | 2 +- .../execution/datasources/v2/FileDataSourceV2.scala | 10 -- 4 files changed, 26 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala b/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala index 9d6dd9dde07..e06fd1711d8 100644 --- a/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala +++ b/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala @@ -39,15 +39,9 @@ import org.apache.spark.annotation.DeveloperApi class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { assert(jsonFileURLs.nonEmpty) - private def readAsMap(url: URL): SortedMap[String, ErrorInfo] = { -val mapper: JsonMapper = JsonMapper.builder() - .addModule(DefaultScalaModule) - .build() -mapper.readValue(url, new TypeReference[SortedMap[String, ErrorInfo]]() {}) - } - // Exposed for testing - private[spark] val errorInfoMap = jsonFileURLs.map(readAsMap).reduce(_ ++ _) + private[spark] val errorInfoMap = +jsonFileURLs.map(ErrorClassesJsonReader.readAsMap).reduce(_ ++ _) def getErrorMessage(errorClass: String, messageParameters: Map[String, String]): String = { val messageTemplate = getMessageTemplate(errorClass) @@ -88,6 +82,15 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) { } } +private object ErrorClassesJsonReader { + private val mapper: JsonMapper = JsonMapper.builder() +.addModule(DefaultScalaModule) +.build() + private def readAsMap(url: URL): SortedMap[String, ErrorInfo] = { +mapper.readValue(url, new TypeReference[SortedMap[String, ErrorInfo]]() {}) + } +} + /** * Information associated with an error class. * diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala index dc1c4dbe677..a2a63e2af42 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala @@ -268,13 +268,17 @@ object RebaseDateTime { micros + rebaseInfo.diffs(i) } + private lazy val mapper = { +val mapper = new ObjectMapper() with ClassTagExtensions +mapper.registerModule(DefaultScalaModule) +mapper + } + // Loads rebasing info from an JSON file. JSON records in the files should conform to // `JsonRebaseRecord`. AnyRefMap is used here instead of Scala's immutable map because // it is 2 times faster in DateTimeRebaseBenchmark. private[sql] def loadRebaseRecords(fileName: String): AnyRefMap[String, RebaseInfo] = { val file = Utils.getSparkClassLoader.getResource(fileName) -val mapper = new ObjectMapper() with ClassTagExtensions -mapper.registerModule(DefaultScalaModule) val jsonRebaseRecords = mapper.readValue[Seq[JsonRebaseRecord]](file) val anyRefMap = new AnyRefMap[String, RebaseInfo]((3 * jsonRebaseRecords.size) / 2) jsonRebaseRecords.foreach { jsonRecord => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala index 7fd61c44fd1..f1d1cc5a173 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala @@ -150,6 +150,7 @@ private[sql] object DataSourceV2Utils extends Logging { } } + private lazy val objectMapper = new Obje
[spark] branch master updated (778acd411e3 -> 7e39d9bfef3)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 778acd411e3 [SPARK-40478][DOCS] Add create datasource table options docs add 7e39d9bfef3 [SPARK-40552][BUILD][INFRA] Upgrade `protobuf-python` to 4.21.6 No new revisions were added by this update. Summary of changes: dev/create-release/spark-rm/Dockerfile | 2 +- dev/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (8fdaf548bcc -> 778acd411e3)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git from 8fdaf548bcc [SPARK-40560][SQL] Rename `message` to `messageTemplate` in the `STANDARD` format of errors add 778acd411e3 [SPARK-40478][DOCS] Add create datasource table options docs No new revisions were added by this update. Summary of changes: docs/sql-ref-syntax-ddl-create-table-datasource.md | 13 + 1 file changed, 13 insertions(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40096][CORE][TESTS][FOLLOW-UP] Explicitly check the element and length
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new ff1b57ddafc [SPARK-40096][CORE][TESTS][FOLLOW-UP] Explicitly check the element and length ff1b57ddafc is described below commit ff1b57ddafc812de34ebe66463bbab23b0196ad4 Author: Hyukjin Kwon AuthorDate: Sun Sep 25 18:05:40 2022 -0500 [SPARK-40096][CORE][TESTS][FOLLOW-UP] Explicitly check the element and length ### What changes were proposed in this pull request? This PR is a followup of https://github.com/apache/spark/pull/37533 that works around the test failure by explicitly checking the element and length in the test. ### Why are the changes needed? For an unknown reason the test added in is flaky even though both `ArrayBuffer` and `List` are `Sep` and the test should pass up to my best knowledge. See https://github.com/apache/spark/actions/runs/3109851954/jobs/5040465291 ``` [info] - SPARK-40096: Send finalize events even if shuffle merger blocks indefinitely with registerMergeResults is false *** FAILED *** (90 milliseconds) [info] ArrayBuffer("hostB") did not equal List("hostB") (DAGSchedulerSuite.scala:4498) [info] org.scalatest.exceptions.TestFailedException: [info] at org.scalatest.Assertions.newAssertionFailedException(Assertions.scala:472) [info] at org.scalatest.Assertions.newAssertionFailedException$(Assertions.scala:471) [info] at org.scalatest.Assertions$.newAssertionFailedException(Assertions.scala:1231) [info] at org.scalatest.Assertions$AssertionsHelper.macroAssert(Assertions.scala:1295) [info] at org.apache.spark.scheduler.DAGSchedulerSuite.$anonfun$new$286(DAGSchedulerSuite.scala:4498) [info] at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) [info] at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) [info] at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) [info] at org.scalatest.Transformer.apply(Transformer.scala:22) [info] at org.scalatest.Transformer.apply(Transformer.scala:20) [info] at org.scalatest.funsuite.AnyFunSuiteLike$$anon$1.apply(AnyFunSuiteLike.scala:226) [info] at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:207) [info] at org.scalatest.funsuite.AnyFunSuiteLike.invokeWithFixture$1(AnyFunSuiteLike.scala:224) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTest$1(AnyFunSuiteLike.scala:236) [info] at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest(AnyFunSuiteLike.scala:236) [info] at org.scalatest.funsuite.AnyFunSuiteLike.runTest$(AnyFunSuiteLike.scala:218) [info] at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterEach$$super$runTest(SparkFunSuite.scala:66) [info] at org.scalatest.BeforeAndAfterEach.runTest(BeforeAndAfterEach.scala:234) [info] at org.scalatest.BeforeAndAfterEach.runTest$(BeforeAndAfterEach.scala:227) [info] at org.apache.spark.SparkFunSuite.runTest(SparkFunSuite.scala:66) [info] at org.scalatest.funsuite.AnyFunSuiteLike.$anonfun$runTests$1(AnyFunSuiteLike.scala:269) [info] at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:413) [info] at scala.collection.immutable.List.foreach(List.scala:431) ``` ### Does this PR introduce _any_ user-facing change? No, test-only. ### How was this patch tested? CI in this PR should verify that. Closes #37989 from HyukjinKwon/SPARK-40096-followup. Authored-by: Hyukjin Kwon Signed-off-by: Sean Owen --- core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala index 10cd136d564..847e0622213 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala @@ -4495,7 +4495,8 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti sendRequestsLatch.await() verify(blockStoreClient, times(2)) .finalizeShuffleMerge(any(), any(), any(), any(), any()) - assert(sentHosts === Seq("hostB")) + assert(sentHosts.nonEmpty) + assert(sentHosts.head === "hostB" && sentHosts.length == 1) completeLatch.await() assert(hostAInterrupted) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-40142][PYTHON][SQL][FOLLOW-UP] Make pyspark.sql.functions examples self-contained (FINAL)
This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 57e6cf0b547 [SPARK-40142][PYTHON][SQL][FOLLOW-UP] Make pyspark.sql.functions examples self-contained (FINAL) 57e6cf0b547 is described below commit 57e6cf0b54771df68ad0ab77259e300d271fd4d6 Author: Khalid Mammadov AuthorDate: Sun Sep 25 12:45:39 2022 -0500 [SPARK-40142][PYTHON][SQL][FOLLOW-UP] Make pyspark.sql.functions examples self-contained (FINAL) ### What changes were proposed in this pull request? It's part of the Pyspark docstrings improvement series (https://github.com/apache/spark/pull/37592, https://github.com/apache/spark/pull/37662, https://github.com/apache/spark/pull/37686, https://github.com/apache/spark/pull/37786, https://github.com/apache/spark/pull/37797, https://github.com/apache/spark/pull/37850) In this PR I mainly covered missing parts in the docstrings adding some more examples where it needed. I have also made all examples self explanatory by providing DataFrame creation command where it was missing for clarity to a user. This should complete "my take" on `functions.py` docstrings & example improvements. ### Why are the changes needed? To improve PySpark documentation ### Does this PR introduce _any_ user-facing change? Yes, documentation ### How was this patch tested? ``` PYTHON_EXECUTABLE=python3.9 ./dev/lint-python ./python/run-tests --testnames pyspark.sql.functions bundle exec jekyll build ``` Closes #37988 from khalidmammadov/docstrings_funcs_part_8. Authored-by: Khalid Mammadov Signed-off-by: Sean Owen --- python/pyspark/sql/functions.py | 357 +++- 1 file changed, 316 insertions(+), 41 deletions(-) diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index fe114e07c88..38baf9b9913 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2320,10 +2320,20 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C maximum relative standard deviation allowed (default = 0.05). For rsd < 0.01, it is more efficient to use :func:`count_distinct` +Returns +--- +:class:`~pyspark.sql.Column` +the column of computed results. + Examples ->>> df.agg(approx_count_distinct(df.age).alias('distinct_ages')).collect() -[Row(distinct_ages=2)] +>>> df = spark.createDataFrame([1,2,2,3], "INT") +>>> df.agg(approx_count_distinct("value").alias('distinct_values')).show() ++---+ +|distinct_values| ++---+ +| 3| ++---+ """ if rsd is None: return _invoke_function_over_columns("approx_count_distinct", col) @@ -2624,6 +2634,7 @@ def grouping(col: "ColumnOrName") -> Column: Examples +>>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age")) >>> df.cube("name").agg(grouping("name"), sum("age")).orderBy("name").show() +-+--++ | name|grouping(name)|sum(age)| @@ -2966,9 +2977,14 @@ def rand(seed: Optional[int] = None) -> Column: Examples ->>> df.withColumn('rand', rand(seed=42) * 3).collect() -[Row(age=2, name='Alice', rand=2.4052597283576684), - Row(age=5, name='Bob', rand=2.3913904055683974)] +>>> df = spark.range(2) +>>> df.withColumn('rand', rand(seed=42) * 3).show() # doctest: +SKIP ++---+--+ +| id| rand| ++---+--+ +| 0|1.4385751892400076| +| 1|1.7082186019706387| ++---+--+ """ if seed is not None: return _invoke_function("rand", seed) @@ -2998,9 +3014,14 @@ def randn(seed: Optional[int] = None) -> Column: Examples ->>> df.withColumn('randn', randn(seed=42)).collect() -[Row(age=2, name='Alice', randn=1.1027054481455365), -Row(age=5, name='Bob', randn=0.7400395449950132)] +>>> df = spark.range(2) +>>> df.withColumn('randn', randn(seed=42)).show() # doctest: +SKIP ++---++ +| id| randn| ++---++ +| 0|-0.04167221574820542| +| 1| 0.15241403986452778| ++---++ """ if se