svn commit: r30701 - in /dev/spark/2.4.1-SNAPSHOT-2018_11_05_22_02-f98c0ad-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue Nov 6 06:16:56 2018 New Revision: 30701 Log: Apache Spark 2.4.1-SNAPSHOT-2018_11_05_22_02-f98c0ad docs [This commit notification would consist of 1476 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25946][BUILD] Upgrade ASM to 7.x to support JDK11
Repository: spark Updated Branches: refs/heads/master cc38abc27 -> 3ed91c9b8 [SPARK-25946][BUILD] Upgrade ASM to 7.x to support JDK11 ## What changes were proposed in this pull request? Upgrade ASM to 7.x to support JDK11 ## How was this patch tested? Existing tests. Closes #22953 from dbtsai/asm7. Authored-by: DB Tsai Signed-off-by: DB Tsai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3ed91c9b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3ed91c9b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3ed91c9b Branch: refs/heads/master Commit: 3ed91c9b8998f2512716f906cd1cba25578111ff Parents: cc38abc Author: DB Tsai Authored: Tue Nov 6 05:38:59 2018 + Committer: DB Tsai Committed: Tue Nov 6 05:38:59 2018 + -- core/pom.xml | 2 +- .../org/apache/spark/util/ClosureCleaner.scala| 18 +- dev/deps/spark-deps-hadoop-2.7| 2 +- dev/deps/spark-deps-hadoop-3.1| 2 +- graphx/pom.xml| 2 +- .../apache/spark/graphx/util/BytecodeUtils.scala | 8 pom.xml | 8 repl/pom.xml | 2 +- .../apache/spark/repl/ExecutorClassLoader.scala | 6 +++--- sql/core/pom.xml | 2 +- 10 files changed, 26 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3ed91c9b/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index f23d09f..5c26f9a 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -56,7 +56,7 @@ org.apache.xbean - xbean-asm6-shaded + xbean-asm7-shaded org.apache.hadoop http://git-wip-us.apache.org/repos/asf/spark/blob/3ed91c9b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala -- diff --git a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala index 6c4740c..1b3e525 100644 --- a/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala +++ b/core/src/main/scala/org/apache/spark/util/ClosureCleaner.scala @@ -23,8 +23,8 @@ import java.lang.invoke.SerializedLambda import scala.collection.mutable.{Map, Set, Stack} import scala.language.existentials -import org.apache.xbean.asm6.{ClassReader, ClassVisitor, MethodVisitor, Type} -import org.apache.xbean.asm6.Opcodes._ +import org.apache.xbean.asm7.{ClassReader, ClassVisitor, MethodVisitor, Type} +import org.apache.xbean.asm7.Opcodes._ import org.apache.spark.{SparkEnv, SparkException} import org.apache.spark.internal.Logging @@ -424,7 +424,7 @@ private[spark] class ReturnStatementInClosureException extends SparkException("Return statements aren't allowed in Spark closures") private class ReturnStatementFinder(targetMethodName: Option[String] = None) - extends ClassVisitor(ASM6) { + extends ClassVisitor(ASM7) { override def visitMethod(access: Int, name: String, desc: String, sig: String, exceptions: Array[String]): MethodVisitor = { @@ -438,7 +438,7 @@ private class ReturnStatementFinder(targetMethodName: Option[String] = None) val isTargetMethod = targetMethodName.isEmpty || name == targetMethodName.get || name == targetMethodName.get.stripSuffix("$adapted") - new MethodVisitor(ASM6) { + new MethodVisitor(ASM7) { override def visitTypeInsn(op: Int, tp: String) { if (op == NEW && tp.contains("scala/runtime/NonLocalReturnControl") && isTargetMethod) { throw new ReturnStatementInClosureException @@ -446,7 +446,7 @@ private class ReturnStatementFinder(targetMethodName: Option[String] = None) } } } else { - new MethodVisitor(ASM6) {} + new MethodVisitor(ASM7) {} } } } @@ -470,7 +470,7 @@ private[util] class FieldAccessFinder( findTransitively: Boolean, specificMethod: Option[MethodIdentifier[_]] = None, visitedMethods: Set[MethodIdentifier[_]] = Set.empty) - extends ClassVisitor(ASM6) { + extends ClassVisitor(ASM7) { override def visitMethod( access: Int, @@ -485,7 +485,7 @@ private[util] class FieldAccessFinder( return null } -new MethodVisitor(ASM6) { +new MethodVisitor(ASM7) { override def visitFieldInsn(op: Int, owner: String, name: String, desc: String) { if (op == GETFIELD) { for (cl <- fields.keys if cl.getName == owner.replace('/', '.')) { @@ -525,7 +525,7 @@ private[util] class FieldAccessFinder( } } -private class InnerClosureFinder(output:
svn commit: r30698 - in /dev/spark/3.0.0-SNAPSHOT-2018_11_05_20_02-cc38abc-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue Nov 6 04:16:52 2018 New Revision: 30698 Log: Apache Spark 3.0.0-SNAPSHOT-2018_11_05_20_02-cc38abc docs [This commit notification would consist of 1471 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25906][SHELL] Documents '-I' option (from Scala REPL) in spark-shell
Repository: spark Updated Branches: refs/heads/branch-2.4 8526f2ee5 -> f98c0ad02 [SPARK-25906][SHELL] Documents '-I' option (from Scala REPL) in spark-shell ## What changes were proposed in this pull request? This PR targets to document `-I` option from Spark 2.4.x (previously `-i` option until Spark 2.3.x). After we upgraded Scala to 2.11.12, `-i` option (`:load`) was replaced to `-I`(SI-7898). Existing `-i` became `:paste` which does not respect Spark's implicit import (for instance `toDF`, symbol as column, etc.). Therefore, `-i` option does not correctly from Spark 2.4.x and it's not documented. I checked other Scala REPL options but looks not applicable or working from quick tests. This PR only targets to document `-I` for now. ## How was this patch tested? Manually tested. **Mac:** ```bash $ ./bin/spark-shell --help Usage: ./bin/spark-shell [options] Scala REPL options: -Ipreload , enforcing line-by-line interpretation Options: --master MASTER_URL spark://host:port, mesos://host:port, yarn, k8s://https://host:port, or local (Default: local[*]). --deploy-mode DEPLOY_MODE Whether to launch the driver program locally ("client") or on one of the worker machines inside the cluster ("cluster") (Default: client). ... ``` **Windows:** ```cmd C:\...\spark>.\bin\spark-shell --help Usage: .\bin\spark-shell.cmd [options] Scala REPL options: -Ipreload , enforcing line-by-line interpretation Options: --master MASTER_URL spark://host:port, mesos://host:port, yarn, k8s://https://host:port, or local (Default: local[*]). --deploy-mode DEPLOY_MODE Whether to launch the driver program locally ("client") or on one of the worker machines inside the cluster ("cluster") (Default: client). ... ``` Closes #22919 from HyukjinKwon/SPARK-25906. Authored-by: hyukjinkwon Signed-off-by: hyukjinkwon (cherry picked from commit cc38abc27a671f345e3b4c170977a1976a02a0d0) Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f98c0ad0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f98c0ad0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f98c0ad0 Branch: refs/heads/branch-2.4 Commit: f98c0ad02ea087ae79fef277801d0b71a5019b48 Parents: 8526f2e Author: hyukjinkwon Authored: Tue Nov 6 10:39:58 2018 +0800 Committer: hyukjinkwon Committed: Tue Nov 6 10:40:17 2018 +0800 -- bin/spark-shell | 5 - bin/spark-shell2.cmd | 8 +++- 2 files changed, 11 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f98c0ad0/bin/spark-shell -- diff --git a/bin/spark-shell b/bin/spark-shell index 421f36c..e920137 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -32,7 +32,10 @@ if [ -z "${SPARK_HOME}" ]; then source "$(dirname "$0")"/find-spark-home fi -export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]" +export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options] + +Scala REPL options: + -Ipreload , enforcing line-by-line interpretation" # SPARK-4161: scala does not assume use of the java classpath, # so we need to add the "-Dscala.usejavacp=true" flag manually. We http://git-wip-us.apache.org/repos/asf/spark/blob/f98c0ad0/bin/spark-shell2.cmd -- diff --git a/bin/spark-shell2.cmd b/bin/spark-shell2.cmd index aaf7190..549bf43 100644 --- a/bin/spark-shell2.cmd +++ b/bin/spark-shell2.cmd @@ -20,7 +20,13 @@ rem rem Figure out where the Spark framework is installed call "%~dp0find-spark-home.cmd" -set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options] +set LF=^ + + +rem two empty lines are required +set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]^%LF%%LF%^%LF%%LF%^ +Scala REPL options:^%LF%%LF%^ + -I ^ preload ^, enforcing line-by-line interpretation rem SPARK-4161: scala does not assume use of the java classpath, rem so we need to add the "-Dscala.usejavacp=true" flag manually. We - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25906][SHELL] Documents '-I' option (from Scala REPL) in spark-shell
Repository: spark Updated Branches: refs/heads/master 78fa1be29 -> cc38abc27 [SPARK-25906][SHELL] Documents '-I' option (from Scala REPL) in spark-shell ## What changes were proposed in this pull request? This PR targets to document `-I` option from Spark 2.4.x (previously `-i` option until Spark 2.3.x). After we upgraded Scala to 2.11.12, `-i` option (`:load`) was replaced to `-I`(SI-7898). Existing `-i` became `:paste` which does not respect Spark's implicit import (for instance `toDF`, symbol as column, etc.). Therefore, `-i` option does not correctly from Spark 2.4.x and it's not documented. I checked other Scala REPL options but looks not applicable or working from quick tests. This PR only targets to document `-I` for now. ## How was this patch tested? Manually tested. **Mac:** ```bash $ ./bin/spark-shell --help Usage: ./bin/spark-shell [options] Scala REPL options: -Ipreload , enforcing line-by-line interpretation Options: --master MASTER_URL spark://host:port, mesos://host:port, yarn, k8s://https://host:port, or local (Default: local[*]). --deploy-mode DEPLOY_MODE Whether to launch the driver program locally ("client") or on one of the worker machines inside the cluster ("cluster") (Default: client). ... ``` **Windows:** ```cmd C:\...\spark>.\bin\spark-shell --help Usage: .\bin\spark-shell.cmd [options] Scala REPL options: -Ipreload , enforcing line-by-line interpretation Options: --master MASTER_URL spark://host:port, mesos://host:port, yarn, k8s://https://host:port, or local (Default: local[*]). --deploy-mode DEPLOY_MODE Whether to launch the driver program locally ("client") or on one of the worker machines inside the cluster ("cluster") (Default: client). ... ``` Closes #22919 from HyukjinKwon/SPARK-25906. Authored-by: hyukjinkwon Signed-off-by: hyukjinkwon Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cc38abc2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cc38abc2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cc38abc2 Branch: refs/heads/master Commit: cc38abc27a671f345e3b4c170977a1976a02a0d0 Parents: 78fa1be Author: hyukjinkwon Authored: Tue Nov 6 10:39:58 2018 +0800 Committer: hyukjinkwon Committed: Tue Nov 6 10:39:58 2018 +0800 -- bin/spark-shell | 5 - bin/spark-shell2.cmd | 8 +++- 2 files changed, 11 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cc38abc2/bin/spark-shell -- diff --git a/bin/spark-shell b/bin/spark-shell index 421f36c..e920137 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -32,7 +32,10 @@ if [ -z "${SPARK_HOME}" ]; then source "$(dirname "$0")"/find-spark-home fi -export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]" +export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options] + +Scala REPL options: + -Ipreload , enforcing line-by-line interpretation" # SPARK-4161: scala does not assume use of the java classpath, # so we need to add the "-Dscala.usejavacp=true" flag manually. We http://git-wip-us.apache.org/repos/asf/spark/blob/cc38abc2/bin/spark-shell2.cmd -- diff --git a/bin/spark-shell2.cmd b/bin/spark-shell2.cmd index aaf7190..549bf43 100644 --- a/bin/spark-shell2.cmd +++ b/bin/spark-shell2.cmd @@ -20,7 +20,13 @@ rem rem Figure out where the Spark framework is installed call "%~dp0find-spark-home.cmd" -set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options] +set LF=^ + + +rem two empty lines are required +set _SPARK_CMD_USAGE=Usage: .\bin\spark-shell.cmd [options]^%LF%%LF%^%LF%%LF%^ +Scala REPL options:^%LF%%LF%^ + -I ^ preload ^, enforcing line-by-line interpretation rem SPARK-4161: scala does not assume use of the java classpath, rem so we need to add the "-Dscala.usejavacp=true" flag manually. We - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r30695 - in /dev/spark/2.4.1-SNAPSHOT-2018_11_05_18_02-8526f2e-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue Nov 6 02:16:58 2018 New Revision: 30695 Log: Apache Spark 2.4.1-SNAPSHOT-2018_11_05_18_02-8526f2e docs [This commit notification would consist of 1476 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25926][CORE] Move config entries in core module to internal.config.
Repository: spark Updated Branches: refs/heads/master c0d1bf032 -> 78fa1be29 [SPARK-25926][CORE] Move config entries in core module to internal.config. ## What changes were proposed in this pull request? Currently definitions of config entries in `core` module are in several files separately. We should move them into `internal/config` to be easy to manage. ## How was this patch tested? Existing tests. Closes #22928 from ueshin/issues/SPARK-25926/single_config_file. Authored-by: Takuya UESHIN Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78fa1be2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78fa1be2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78fa1be2 Branch: refs/heads/master Commit: 78fa1be29bc9fbe98dd0226418aafc221c5e5309 Parents: c0d1bf0 Author: Takuya UESHIN Authored: Tue Nov 6 09:18:17 2018 +0800 Committer: Wenchen Fan Committed: Tue Nov 6 09:18:17 2018 +0800 -- .../main/scala/org/apache/spark/SparkConf.scala | 2 +- .../deploy/history/FsHistoryProvider.scala | 4 +- .../spark/deploy/history/HistoryServer.scala| 2 +- .../history/HistoryServerDiskManager.scala | 3 +- .../apache/spark/deploy/history/config.scala| 67 .../apache/spark/internal/config/History.scala | 65 +++ .../apache/spark/internal/config/Status.scala | 58 + .../apache/spark/status/AppStatusListener.scala | 3 +- .../apache/spark/status/AppStatusSource.scala | 11 +--- .../spark/status/ElementTrackingStore.scala | 3 +- .../scala/org/apache/spark/status/config.scala | 54 .../scala/org/apache/spark/SparkConfSuite.scala | 2 +- .../deploy/history/FsHistoryProviderSuite.scala | 2 +- .../history/HistoryServerDiskManagerSuite.scala | 3 +- .../deploy/history/HistoryServerSuite.scala | 2 +- .../spark/status/AppStatusListenerSuite.scala | 3 +- .../status/ElementTrackingStoreSuite.scala | 3 +- .../org/apache/spark/ui/StagePageSuite.scala| 2 +- .../org/apache/spark/ui/UISeleniumSuite.scala | 2 +- .../cluster/mesos/MesosSchedulerUtils.scala | 2 +- .../sql/execution/ui/SQLAppStatusListener.scala | 2 +- .../ui/SQLAppStatusListenerSuite.scala | 2 +- 22 files changed, 143 insertions(+), 154 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/78fa1be2/core/src/main/scala/org/apache/spark/SparkConf.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 8537c53..21c5cbc 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -25,9 +25,9 @@ import scala.collection.mutable.LinkedHashSet import org.apache.avro.{Schema, SchemaNormalization} -import org.apache.spark.deploy.history.config._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config._ +import org.apache.spark.internal.config.History._ import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.Utils http://git-wip-us.apache.org/repos/asf/spark/blob/78fa1be2/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index c4517d3..2230bc8 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -42,13 +42,14 @@ import org.fusesource.leveldbjni.internal.NativeDB import org.apache.spark.{SecurityManager, SparkConf, SparkException} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.History._ +import org.apache.spark.internal.config.Status._ import org.apache.spark.io.CompressionCodec import org.apache.spark.scheduler._ import org.apache.spark.scheduler.ReplayListenerBus._ import org.apache.spark.status._ import org.apache.spark.status.KVUtils._ import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo} -import org.apache.spark.status.config._ import org.apache.spark.ui.SparkUI import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils} import org.apache.spark.util.kvstore._ @@ -86,7 +87,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) this(conf, new SystemClock()) } - import config._ import FsHistoryProvider._ // Interval between safemode checks.
svn commit: r30694 - in /dev/spark/3.0.0-SNAPSHOT-2018_11_05_16_02-c0d1bf0-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue Nov 6 00:17:02 2018 New Revision: 30694 Log: Apache Spark 3.0.0-SNAPSHOT-2018_11_05_16_02-c0d1bf0 docs [This commit notification would consist of 1471 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [MINOR] Fix typos and misspellings
Repository: spark Updated Branches: refs/heads/branch-2.4 af2ec972d -> 8526f2ee5 [MINOR] Fix typos and misspellings ## What changes were proposed in this pull request? Fix typos and misspellings, per https://github.com/apache/spark-website/pull/158#issuecomment-435790366 ## How was this patch tested? Existing tests. Closes #22950 from srowen/Typos. Authored-by: Sean Owen Signed-off-by: Sean Owen (cherry picked from commit c0d1bf0322be12230c30cb200f19a02e4d5e0d49) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8526f2ee Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8526f2ee Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8526f2ee Branch: refs/heads/branch-2.4 Commit: 8526f2ee5362df63febe47ab4c64aa6d1b71f990 Parents: af2ec97 Author: Sean Owen Authored: Mon Nov 5 17:34:23 2018 -0600 Committer: Sean Owen Committed: Mon Nov 5 17:34:39 2018 -0600 -- .../main/java/org/apache/spark/ExecutorPlugin.java | 6 +++--- .../java/org/apache/spark/ExecutorPluginSuite.java | 4 ++-- docs/sql-migration-guide-upgrade.md| 2 +- .../spark/ml/r/AFTSurvivalRegressionWrapper.scala | 6 +++--- .../org/apache/spark/ml/stat/Summarizer.scala | 4 ++-- .../mllib/stat/MultivariateOnlineSummarizer.scala | 2 +- python/pyspark/ml/stat.py | 2 +- .../apache/spark/sql/hive/CachedTableSuite.scala | 17 - 8 files changed, 21 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8526f2ee/core/src/main/java/org/apache/spark/ExecutorPlugin.java -- diff --git a/core/src/main/java/org/apache/spark/ExecutorPlugin.java b/core/src/main/java/org/apache/spark/ExecutorPlugin.java index ec0b57f..f86520c 100644 --- a/core/src/main/java/org/apache/spark/ExecutorPlugin.java +++ b/core/src/main/java/org/apache/spark/ExecutorPlugin.java @@ -20,18 +20,18 @@ package org.apache.spark; import org.apache.spark.annotation.DeveloperApi; /** - * A plugin which can be automaticaly instantiated within each Spark executor. Users can specify + * A plugin which can be automatically instantiated within each Spark executor. Users can specify * plugins which should be created with the "spark.executor.plugins" configuration. An instance * of each plugin will be created for every executor, including those created by dynamic allocation, * before the executor starts running any tasks. * * The specific api exposed to the end users still considered to be very unstable. We will - * hopefully be able to keep compatability by providing default implementations for any methods + * hopefully be able to keep compatibility by providing default implementations for any methods * added, but make no guarantees this will always be possible across all Spark releases. * * Spark does nothing to verify the plugin is doing legitimate things, or to manage the resources * it uses. A plugin acquires the same privileges as the user running the task. A bad plugin - * could also intefere with task execution and make the executor fail in unexpected ways. + * could also interfere with task execution and make the executor fail in unexpected ways. */ @DeveloperApi public interface ExecutorPlugin { http://git-wip-us.apache.org/repos/asf/spark/blob/8526f2ee/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java -- diff --git a/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java b/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java index 686eb28..80cd702 100644 --- a/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java +++ b/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java @@ -63,10 +63,10 @@ public class ExecutorPluginSuite { @Test public void testPluginClassDoesNotExist() { -SparkConf conf = initializeSparkConf("nonexistant.plugin"); +SparkConf conf = initializeSparkConf("nonexistent.plugin"); try { sc = new JavaSparkContext(conf); - fail("No exception thrown for nonexistant plugin"); + fail("No exception thrown for nonexistent plugin"); } catch (Exception e) { // We cannot catch ClassNotFoundException directly because Java doesn't think it'll be thrown assertTrue(e.toString().startsWith("java.lang.ClassNotFoundException")); http://git-wip-us.apache.org/repos/asf/spark/blob/8526f2ee/docs/sql-migration-guide-upgrade.md -- diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index 9a7f5b6..7b48040 100644 --- a/docs/sql-migration-guide-upgrade.md +++
spark git commit: [MINOR] Fix typos and misspellings
Repository: spark Updated Branches: refs/heads/master 0b5917000 -> c0d1bf032 [MINOR] Fix typos and misspellings ## What changes were proposed in this pull request? Fix typos and misspellings, per https://github.com/apache/spark-website/pull/158#issuecomment-435790366 ## How was this patch tested? Existing tests. Closes #22950 from srowen/Typos. Authored-by: Sean Owen Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c0d1bf03 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c0d1bf03 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c0d1bf03 Branch: refs/heads/master Commit: c0d1bf0322be12230c30cb200f19a02e4d5e0d49 Parents: 0b59170 Author: Sean Owen Authored: Mon Nov 5 17:34:23 2018 -0600 Committer: Sean Owen Committed: Mon Nov 5 17:34:23 2018 -0600 -- .../main/java/org/apache/spark/ExecutorPlugin.java | 6 +++--- .../java/org/apache/spark/ExecutorPluginSuite.java | 4 ++-- docs/sql-migration-guide-upgrade.md| 2 +- .../spark/ml/r/AFTSurvivalRegressionWrapper.scala | 6 +++--- .../org/apache/spark/ml/stat/Summarizer.scala | 4 ++-- .../mllib/stat/MultivariateOnlineSummarizer.scala | 2 +- python/pyspark/ml/stat.py | 2 +- .../apache/spark/sql/hive/CachedTableSuite.scala | 17 - 8 files changed, 21 insertions(+), 22 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c0d1bf03/core/src/main/java/org/apache/spark/ExecutorPlugin.java -- diff --git a/core/src/main/java/org/apache/spark/ExecutorPlugin.java b/core/src/main/java/org/apache/spark/ExecutorPlugin.java index ec0b57f..f86520c 100644 --- a/core/src/main/java/org/apache/spark/ExecutorPlugin.java +++ b/core/src/main/java/org/apache/spark/ExecutorPlugin.java @@ -20,18 +20,18 @@ package org.apache.spark; import org.apache.spark.annotation.DeveloperApi; /** - * A plugin which can be automaticaly instantiated within each Spark executor. Users can specify + * A plugin which can be automatically instantiated within each Spark executor. Users can specify * plugins which should be created with the "spark.executor.plugins" configuration. An instance * of each plugin will be created for every executor, including those created by dynamic allocation, * before the executor starts running any tasks. * * The specific api exposed to the end users still considered to be very unstable. We will - * hopefully be able to keep compatability by providing default implementations for any methods + * hopefully be able to keep compatibility by providing default implementations for any methods * added, but make no guarantees this will always be possible across all Spark releases. * * Spark does nothing to verify the plugin is doing legitimate things, or to manage the resources * it uses. A plugin acquires the same privileges as the user running the task. A bad plugin - * could also intefere with task execution and make the executor fail in unexpected ways. + * could also interfere with task execution and make the executor fail in unexpected ways. */ @DeveloperApi public interface ExecutorPlugin { http://git-wip-us.apache.org/repos/asf/spark/blob/c0d1bf03/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java -- diff --git a/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java b/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java index 686eb28..80cd702 100644 --- a/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java +++ b/core/src/test/java/org/apache/spark/ExecutorPluginSuite.java @@ -63,10 +63,10 @@ public class ExecutorPluginSuite { @Test public void testPluginClassDoesNotExist() { -SparkConf conf = initializeSparkConf("nonexistant.plugin"); +SparkConf conf = initializeSparkConf("nonexistent.plugin"); try { sc = new JavaSparkContext(conf); - fail("No exception thrown for nonexistant plugin"); + fail("No exception thrown for nonexistent plugin"); } catch (Exception e) { // We cannot catch ClassNotFoundException directly because Java doesn't think it'll be thrown assertTrue(e.toString().startsWith("java.lang.ClassNotFoundException")); http://git-wip-us.apache.org/repos/asf/spark/blob/c0d1bf03/docs/sql-migration-guide-upgrade.md -- diff --git a/docs/sql-migration-guide-upgrade.md b/docs/sql-migration-guide-upgrade.md index c9685b8..50458e9 100644 --- a/docs/sql-migration-guide-upgrade.md +++ b/docs/sql-migration-guide-upgrade.md @@ -117,7 +117,7 @@ displayTitle: Spark SQL Upgrading Guide - Since
spark git commit: [SPARK-25764][ML][EXAMPLES] Update BisectingKMeans example to use ClusteringEvaluator
Repository: spark Updated Branches: refs/heads/master 486acda8c -> 0b5917000 [SPARK-25764][ML][EXAMPLES] Update BisectingKMeans example to use ClusteringEvaluator ## What changes were proposed in this pull request? Using `computeCost` for evaluating a model is a very poor approach. We should advice the users to a better approach which is available, ie. using the `ClusteringEvaluator` to evaluate their models. The PR updates the examples for `BisectingKMeans` in order to do that. ## How was this patch tested? running examples Closes #22786 from mgaido91/SPARK-25764. Authored-by: Marco Gaido Signed-off-by: DB Tsai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b591700 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b591700 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b591700 Branch: refs/heads/master Commit: 0b59170001be1cc1198cfc1c0486ca34633e64d5 Parents: 486acda Author: Marco Gaido Authored: Mon Nov 5 22:42:04 2018 + Committer: DB Tsai Committed: Mon Nov 5 22:42:04 2018 + -- .../spark/examples/ml/JavaBisectingKMeansExample.java | 12 +--- .../src/main/python/ml/bisecting_k_means_example.py | 12 +--- .../spark/examples/ml/BisectingKMeansExample.scala | 12 +--- 3 files changed, 27 insertions(+), 9 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0b591700/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java -- diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java index 8c82aaa..f517dc3 100644 --- a/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java +++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBisectingKMeansExample.java @@ -20,6 +20,7 @@ package org.apache.spark.examples.ml; // $example on$ import org.apache.spark.ml.clustering.BisectingKMeans; import org.apache.spark.ml.clustering.BisectingKMeansModel; +import org.apache.spark.ml.evaluation.ClusteringEvaluator; import org.apache.spark.ml.linalg.Vector; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; @@ -50,9 +51,14 @@ public class JavaBisectingKMeansExample { BisectingKMeans bkm = new BisectingKMeans().setK(2).setSeed(1); BisectingKMeansModel model = bkm.fit(dataset); -// Evaluate clustering. -double cost = model.computeCost(dataset); -System.out.println("Within Set Sum of Squared Errors = " + cost); +// Make predictions +Dataset predictions = model.transform(dataset); + +// Evaluate clustering by computing Silhouette score +ClusteringEvaluator evaluator = new ClusteringEvaluator(); + +double silhouette = evaluator.evaluate(predictions); +System.out.println("Silhouette with squared euclidean distance = " + silhouette); // Shows the result. System.out.println("Cluster Centers: "); http://git-wip-us.apache.org/repos/asf/spark/blob/0b591700/examples/src/main/python/ml/bisecting_k_means_example.py -- diff --git a/examples/src/main/python/ml/bisecting_k_means_example.py b/examples/src/main/python/ml/bisecting_k_means_example.py index 7842d20..82adb33 100644 --- a/examples/src/main/python/ml/bisecting_k_means_example.py +++ b/examples/src/main/python/ml/bisecting_k_means_example.py @@ -24,6 +24,7 @@ from __future__ import print_function # $example on$ from pyspark.ml.clustering import BisectingKMeans +from pyspark.ml.evaluation import ClusteringEvaluator # $example off$ from pyspark.sql import SparkSession @@ -41,9 +42,14 @@ if __name__ == "__main__": bkm = BisectingKMeans().setK(2).setSeed(1) model = bkm.fit(dataset) -# Evaluate clustering. -cost = model.computeCost(dataset) -print("Within Set Sum of Squared Errors = " + str(cost)) +# Make predictions +predictions = model.transform(dataset) + +# Evaluate clustering by computing Silhouette score +evaluator = ClusteringEvaluator() + +silhouette = evaluator.evaluate(predictions) +print("Silhouette with squared euclidean distance = " + str(silhouette)) # Shows the result. print("Cluster Centers: ") http://git-wip-us.apache.org/repos/asf/spark/blob/0b591700/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala -- diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BisectingKMeansExample.scala index
spark git commit: [SPARK-25944][R][BUILD] AppVeyor change to latest R version (3.5.1)
Repository: spark Updated Branches: refs/heads/master fc10c898f -> 486acda8c [SPARK-25944][R][BUILD] AppVeyor change to latest R version (3.5.1) ## What changes were proposed in this pull request? R 3.5.1 is released 2018-07-02. This PR targets to changes R version from 3.4.1 to 3.5.1. ## How was this patch tested? AppVeyor Closes #22948 from HyukjinKwon/SPARK-25944. Authored-by: hyukjinkwon Signed-off-by: Dongjoon Hyun Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/486acda8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/486acda8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/486acda8 Branch: refs/heads/master Commit: 486acda8c5a421b440571629730dfa6b02af9b80 Parents: fc10c89 Author: hyukjinkwon Authored: Mon Nov 5 14:26:22 2018 -0800 Committer: Dongjoon Hyun Committed: Mon Nov 5 14:26:22 2018 -0800 -- dev/appveyor-install-dependencies.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/486acda8/dev/appveyor-install-dependencies.ps1 -- diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1 index c918828..06d9d70 100644 --- a/dev/appveyor-install-dependencies.ps1 +++ b/dev/appveyor-install-dependencies.ps1 @@ -115,7 +115,7 @@ $env:Path += ";$env:HADOOP_HOME\bin" Pop-Location # == R -$rVer = "3.4.1" +$rVer = "3.5.1" $rToolsVer = "3.4.0" InstallR - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25758][ML] Deprecate computeCost in BisectingKMeans
Repository: spark Updated Branches: refs/heads/master fc65b4af0 -> fc10c898f [SPARK-25758][ML] Deprecate computeCost in BisectingKMeans ## What changes were proposed in this pull request? The PR proposes to deprecate the `computeCost` method on `BisectingKMeans` in favor of the adoption of `ClusteringEvaluator` in order to evaluate the clustering. ## How was this patch tested? NA Closes #22869 from mgaido91/SPARK-25758_3.0. Authored-by: Marco Gaido Signed-off-by: DB Tsai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc10c898 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc10c898 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc10c898 Branch: refs/heads/master Commit: fc10c898f45a25cf3751f0cd042e4c0743f1adba Parents: fc65b4a Author: Marco Gaido Authored: Mon Nov 5 22:13:20 2018 + Committer: DB Tsai Committed: Mon Nov 5 22:13:20 2018 + -- .../org/apache/spark/ml/clustering/BisectingKMeans.scala | 7 +++ python/pyspark/ml/clustering.py | 7 +++ 2 files changed, 14 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fc10c898/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 5cb16cc..1a94aef 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -125,8 +125,15 @@ class BisectingKMeansModel private[ml] ( /** * Computes the sum of squared distances between the input points and their corresponding cluster * centers. + * + * @deprecated This method is deprecated and will be removed in future versions. Use + * ClusteringEvaluator instead. You can also get the cost on the training dataset in + * the summary. */ @Since("2.0.0") + @deprecated("This method is deprecated and will be removed in future versions. Use " + +"ClusteringEvaluator instead. You can also get the cost on the training dataset in the " + +"summary.", "3.0.0") def computeCost(dataset: Dataset[_]): Double = { SchemaUtils.validateVectorCompatibleColumn(dataset.schema, getFeaturesCol) val data = DatasetUtils.columnToOldVector(dataset, getFeaturesCol) http://git-wip-us.apache.org/repos/asf/spark/blob/fc10c898/python/pyspark/ml/clustering.py -- diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 5ef4e76..b371294 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -540,7 +540,14 @@ class BisectingKMeansModel(JavaModel, JavaMLWritable, JavaMLReadable): """ Computes the sum of squared distances between the input points and their corresponding cluster centers. + +..note:: Deprecated in 3.0.0. It will be removed in future versions. Use + ClusteringEvaluator instead. You can also get the cost on the training dataset in the + summary. """ +warnings.warn("Deprecated in 3.0.0. It will be removed in future versions. Use " + "ClusteringEvaluator instead. You can also get the cost on the training " + "dataset in the summary.", DeprecationWarning) return self._call_java("computeCost", dataset) @property - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r30684 - in /dev/spark/2.4.1-SNAPSHOT-2018_11_05_10_03-af2ec97-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon Nov 5 18:18:32 2018 New Revision: 30684 Log: Apache Spark 2.4.1-SNAPSHOT-2018_11_05_10_03-af2ec97 docs [This commit notification would consist of 1476 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r30683 - in /dev/spark/3.0.0-SNAPSHOT-2018_11_05_08_03-fc65b4a-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon Nov 5 16:17:16 2018 New Revision: 30683 Log: Apache Spark 3.0.0-SNAPSHOT-2018_11_05_08_03-fc65b4a docs [This commit notification would consist of 1471 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25900][WEBUI] When the page number is more than the total page size, then fall back to the first page
Repository: spark Updated Branches: refs/heads/master 1fb3759f2 -> fc65b4af0 [SPARK-25900][WEBUI] When the page number is more than the total page size, then fall back to the first page ## What changes were proposed in this pull request? When we give the page number more than the maximum page number, webui is throwing an exception. It would be better if fall back to the default page, instead of throwing the exception in the web ui. ## How was this patch tested? Before PR: ![screenshot from 2018-10-31 23-41-37](https://user-images.githubusercontent.com/23054875/47816448-354fbe80-dd79-11e8-83d8-6aab196642f7.png) After PR: ![screenshot from 2018-10-31 23-54-23](https://user-images.githubusercontent.com/23054875/47816461-3ed92680-dd79-11e8-959d-0c531b3a6b2d.png) Closes #22914 from shahidki31/pageFallBack. Authored-by: Shahid Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fc65b4af Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fc65b4af Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fc65b4af Branch: refs/heads/master Commit: fc65b4af00c0a813613a7977126e942df8440bbb Parents: 1fb3759 Author: Shahid Authored: Mon Nov 5 09:13:53 2018 -0600 Committer: Sean Owen Committed: Mon Nov 5 09:13:53 2018 -0600 -- .../scala/org/apache/spark/ui/PagedTable.scala | 51 +--- .../org/apache/spark/ui/jobs/AllJobsPage.scala | 17 +-- .../org/apache/spark/ui/jobs/StagePage.scala| 16 +- .../org/apache/spark/ui/jobs/StageTable.scala | 19 +--- .../org/apache/spark/ui/storage/RDDPage.scala | 16 +- .../org/apache/spark/ui/PagedTableSuite.scala | 17 ++- .../sql/execution/ui/AllExecutionsPage.scala| 15 +- 7 files changed, 45 insertions(+), 106 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fc65b4af/core/src/main/scala/org/apache/spark/ui/PagedTable.scala -- diff --git a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala index 0bbb10a..6c2c1f6 100644 --- a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala +++ b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala @@ -33,10 +33,6 @@ import org.apache.spark.util.Utils */ private[spark] abstract class PagedDataSource[T](val pageSize: Int) { - if (pageSize <= 0) { -throw new IllegalArgumentException("Page size must be positive") - } - /** * Return the size of all data. */ @@ -51,13 +47,24 @@ private[spark] abstract class PagedDataSource[T](val pageSize: Int) { * Slice the data for this page */ def pageData(page: Int): PageData[T] = { -val totalPages = (dataSize + pageSize - 1) / pageSize -if (page <= 0 || page > totalPages) { - throw new IndexOutOfBoundsException( -s"Page $page is out of range. Please select a page number between 1 and $totalPages.") +// Display all the data in one page, if the pageSize is less than or equal to zero. +val pageTableSize = if (pageSize <= 0) { + dataSize +} else { + pageSize +} +val totalPages = (dataSize + pageTableSize - 1) / pageTableSize + +val pageToShow = if (page <= 0) { + 1 +} else if (page > totalPages) { + totalPages +} else { + page } -val from = (page - 1) * pageSize -val to = dataSize.min(page * pageSize) + +val (from, to) = ((pageToShow - 1) * pageSize, dataSize.min(pageToShow * pageTableSize)) + PageData(totalPages, sliceData(from, to)) } @@ -80,8 +87,6 @@ private[spark] trait PagedTable[T] { def pageSizeFormField: String - def prevPageSizeFormField: String - def pageNumberFormField: String def dataSource: PagedDataSource[T] @@ -94,7 +99,23 @@ private[spark] trait PagedTable[T] { val _dataSource = dataSource try { val PageData(totalPages, data) = _dataSource.pageData(page) - val pageNavi = pageNavigation(page, _dataSource.pageSize, totalPages) + + val pageToShow = if (page <= 0) { +1 + } else if (page > totalPages) { +totalPages + } else { +page + } + // Display all the data in one page, if the pageSize is less than or equal to zero. + val pageSize = if (_dataSource.pageSize <= 0) { +data.size + } else { +_dataSource.pageSize + } + + val pageNavi = pageNavigation(pageToShow, pageSize, totalPages) + {pageNavi} @@ -180,7 +201,6 @@ private[spark] trait PagedTable[T] { .split(search) .asScala .filterKeys(_ != pageSizeFormField) - .filterKeys(_ != prevPageSizeFormField) .filterKeys(_ != pageNumberFormField)
spark git commit: [SPARK-25930][K8S] Fix scala string detection in k8s tests
Repository: spark Updated Branches: refs/heads/branch-2.4 5bc4e7d1a -> af2ec972d [SPARK-25930][K8S] Fix scala string detection in k8s tests ## What changes were proposed in this pull request? - Issue is described in detail in [SPARK-25930](https://issues.apache.org/jira/browse/SPARK-25930). Since we rely on the std output, pick always the last line which contains the wanted value. Although minor, current implementation breaks tests. ## How was this patch tested? manually. rm -rf ~/.m2 and then run the tests. Closes #22931 from skonto/fix_scala_detection. Authored-by: Stavros Kontopoulos Signed-off-by: Sean Owen (cherry picked from commit 1fb3759f2b60a2e7c5e2a82afe1a580d848e0f8c) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/af2ec972 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/af2ec972 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/af2ec972 Branch: refs/heads/branch-2.4 Commit: af2ec972d7022fac369e7b6125f04134983486cf Parents: 5bc4e7d Author: Stavros Kontopoulos Authored: Mon Nov 5 08:40:25 2018 -0600 Committer: Sean Owen Committed: Mon Nov 5 08:40:41 2018 -0600 -- .../integration-tests/dev/dev-run-integration-tests.sh| 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/af2ec972/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh -- diff --git a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh index cb5cf69..1b9de48 100755 --- a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh +++ b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh @@ -28,7 +28,12 @@ NAMESPACE= SERVICE_ACCOUNT= INCLUDE_TAGS="k8s" EXCLUDE_TAGS= -SCALA_VERSION="$($TEST_ROOT_DIR/build/mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=scala.binary.version | grep -v '\[' )" +MVN="$TEST_ROOT_DIR/build/mvn" + +SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version 2>/dev/null\ +| grep -v "INFO"\ +| grep -v "WARNING"\ +| tail -n 1) # Parse arguments while (( "$#" )); do - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25930][K8S] Fix scala string detection in k8s tests
Repository: spark Updated Branches: refs/heads/master e017cb396 -> 1fb3759f2 [SPARK-25930][K8S] Fix scala string detection in k8s tests ## What changes were proposed in this pull request? - Issue is described in detail in [SPARK-25930](https://issues.apache.org/jira/browse/SPARK-25930). Since we rely on the std output, pick always the last line which contains the wanted value. Although minor, current implementation breaks tests. ## How was this patch tested? manually. rm -rf ~/.m2 and then run the tests. Closes #22931 from skonto/fix_scala_detection. Authored-by: Stavros Kontopoulos Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1fb3759f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1fb3759f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1fb3759f Branch: refs/heads/master Commit: 1fb3759f2b60a2e7c5e2a82afe1a580d848e0f8c Parents: e017cb3 Author: Stavros Kontopoulos Authored: Mon Nov 5 08:40:25 2018 -0600 Committer: Sean Owen Committed: Mon Nov 5 08:40:25 2018 -0600 -- .../integration-tests/dev/dev-run-integration-tests.sh| 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1fb3759f/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh -- diff --git a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh index 3c7cc93..68f284c 100755 --- a/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh +++ b/resource-managers/kubernetes/integration-tests/dev/dev-run-integration-tests.sh @@ -29,7 +29,12 @@ SERVICE_ACCOUNT= CONTEXT= INCLUDE_TAGS="k8s" EXCLUDE_TAGS= -SCALA_VERSION="$($TEST_ROOT_DIR/build/mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=scala.binary.version | grep -v '\[' )" +MVN="$TEST_ROOT_DIR/build/mvn" + +SCALA_VERSION=$("$MVN" help:evaluate -Dexpression=scala.binary.version 2>/dev/null\ +| grep -v "INFO"\ +| grep -v "WARNING"\ +| tail -n 1) # Parse arguments while (( "$#" )); do - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25850][SQL] Make the split threshold for the code generated function configurable
Repository: spark Updated Branches: refs/heads/master 4afb35033 -> e017cb396 [SPARK-25850][SQL] Make the split threshold for the code generated function configurable ## What changes were proposed in this pull request? As per the discussion in [#22823](https://github.com/apache/spark/pull/22823/files#r228400706), add a new configuration to make the split threshold for the code generated function configurable. When the generated Java function source code exceeds `spark.sql.codegen.methodSplitThreshold`, it will be split into multiple small functions. ## How was this patch tested? manual tests Closes #22847 from yucai/splitThreshold. Authored-by: yucai Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e017cb39 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e017cb39 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e017cb39 Branch: refs/heads/master Commit: e017cb39642a5039abd8ce8127ad41712901bdbc Parents: 4afb350 Author: yucai Authored: Mon Nov 5 20:09:39 2018 +0800 Committer: Wenchen Fan Committed: Mon Nov 5 20:09:39 2018 +0800 -- .../spark/sql/catalyst/expressions/Expression.scala | 4 +++- .../catalyst/expressions/codegen/CodeGenerator.scala | 3 ++- .../scala/org/apache/spark/sql/internal/SQLConf.scala | 14 ++ 3 files changed, 19 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e017cb39/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala index ccc5b90..141fcff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala @@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.DeclarativeAggregate import org.apache.spark.sql.catalyst.expressions.codegen._ import org.apache.spark.sql.catalyst.expressions.codegen.Block._ import org.apache.spark.sql.catalyst.trees.TreeNode +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ import org.apache.spark.util.Utils @@ -121,7 +122,8 @@ abstract class Expression extends TreeNode[Expression] { private def reduceCodeSize(ctx: CodegenContext, eval: ExprCode): Unit = { // TODO: support whole stage codegen too -if (eval.code.length > 1024 && ctx.INPUT_ROW != null && ctx.currentVars == null) { +val splitThreshold = SQLConf.get.methodSplitThreshold +if (eval.code.length > splitThreshold && ctx.INPUT_ROW != null && ctx.currentVars == null) { val setIsNull = if (!eval.isNull.isInstanceOf[LiteralValue]) { val globalIsNull = ctx.addMutableState(CodeGenerator.JAVA_BOOLEAN, "globalIsNull") val localIsNull = eval.isNull http://git-wip-us.apache.org/repos/asf/spark/blob/e017cb39/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d5857e0..b868a0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -910,12 +910,13 @@ class CodegenContext { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() var length = 0 +val splitThreshold = SQLConf.get.methodSplitThreshold for (code <- expressions) { // We can't know how many bytecode will be generated, so use the length of source code // as metric. A method should not go beyond 8K, otherwise it will not be JITted, should // also not be too small, or it will have many function calls (for wide table), see the // results in BenchmarkWideTable. - if (length > 1024) { + if (length > splitThreshold) { blocks += blockBuilder.toString() blockBuilder.clear() length = 0 http://git-wip-us.apache.org/repos/asf/spark/blob/e017cb39/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
svn commit: r30654 - in /dev/spark/3.0.0-SNAPSHOT-2018_11_05_00_02-4afb350-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon Nov 5 08:17:27 2018 New Revision: 30654 Log: Apache Spark 3.0.0-SNAPSHOT-2018_11_05_00_02-4afb350 docs [This commit notification would consist of 1471 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org