spark git commit: [SPARK-24035][SQL] SQL syntax for Pivot - fix antlr warning
Repository: spark Updated Branches: refs/heads/master e29176fd7 -> 80c6d35a3 [SPARK-24035][SQL] SQL syntax for Pivot - fix antlr warning ## What changes were proposed in this pull request? 1. Change antlr rule to fix the warning. 2. Add PIVOT/LATERAL check in AstBuilder with a more meaningful error message. ## How was this patch tested? 1. Add a counter case in `PlanParserSuite.test("lateral view")` Author: maryannxue Closes #21324 from maryannxue/spark-24035-fix. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80c6d35a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80c6d35a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80c6d35a Branch: refs/heads/master Commit: 80c6d35a3edbfb2e053c7d6650e2f725c36af53e Parents: e29176f Author: maryannxue Authored: Mon May 14 23:34:42 2018 -0700 Committer: gatorsmile Committed: Mon May 14 23:34:42 2018 -0700 -- .../org/apache/spark/sql/catalyst/parser/SqlBase.g4 | 2 +- .../org/apache/spark/sql/catalyst/parser/AstBuilder.scala | 3 +++ .../spark/sql/catalyst/parser/PlanParserSuite.scala | 10 ++ 3 files changed, 14 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/80c6d35a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 -- diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index f7f921e..7c54851 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -398,7 +398,7 @@ hintStatement ; fromClause -: FROM relation (',' relation)* (pivotClause | lateralView*)? +: FROM relation (',' relation)* lateralView* pivotClause? ; aggregation http://git-wip-us.apache.org/repos/asf/spark/blob/80c6d35a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala index 64eed23..b9ece29 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala @@ -504,6 +504,9 @@ class AstBuilder(conf: SQLConf) extends SqlBaseBaseVisitor[AnyRef] with Logging withJoinRelations(join, relation) } if (ctx.pivotClause() != null) { + if (!ctx.lateralView.isEmpty) { +throw new ParseException("LATERAL cannot be used together with PIVOT in FROM clause", ctx) + } withPivot(ctx.pivotClause, from) } else { ctx.lateralView.asScala.foldLeft(from)(withGenerate) http://git-wip-us.apache.org/repos/asf/spark/blob/80c6d35a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala index 812bfdd..fb51376 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala @@ -318,6 +318,16 @@ class PlanParserSuite extends AnalysisTest { assertEqual( "select * from t lateral view posexplode(x) posexpl as x, y", expected) + +intercept( + """select * +|from t +|lateral view explode(x) expl +|pivot ( +| sum(x) +| FOR y IN ('a', 'b') +|)""".stripMargin, + "LATERAL cannot be used together with PIVOT in FROM clause") } test("joins") { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23627][SQL] Provide isEmpty in Dataset
Repository: spark Updated Branches: refs/heads/master 9059f1ee6 -> e29176fd7 [SPARK-23627][SQL] Provide isEmpty in Dataset ## What changes were proposed in this pull request? This PR adds isEmpty() in DataSet ## How was this patch tested? Unit tests added Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Goun Na Author: goungoun Closes #20800 from goungoun/SPARK-23627. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e29176fd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e29176fd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e29176fd Branch: refs/heads/master Commit: e29176fd7dbcef04a29c4922ba655d58144fed24 Parents: 9059f1e Author: Goun Na Authored: Tue May 15 14:11:20 2018 +0800 Committer: hyukjinkwon Committed: Tue May 15 14:11:20 2018 +0800 -- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 10 ++ .../test/scala/org/apache/spark/sql/DatasetSuite.scala| 8 2 files changed, 18 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e29176fd/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index d518e07..f001f16 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -512,6 +512,16 @@ class Dataset[T] private[sql]( def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation] /** + * Returns true if the `Dataset` is empty. + * + * @group basic + * @since 2.4.0 + */ + def isEmpty: Boolean = withAction("isEmpty", limit(1).groupBy().count().queryExecution) { plan => +plan.executeCollect().head.getLong(0) == 0 + } + + /** * Returns true if this Dataset contains one or more sources that continuously * return data as it arrives. A Dataset that reads data from a streaming source * must be executed as a `StreamingQuery` using the `start()` method in http://git-wip-us.apache.org/repos/asf/spark/blob/e29176fd/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index e0f4d2b..d477d78 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -1425,6 +1425,14 @@ class DatasetSuite extends QueryTest with SharedSQLContext { } } + test("SPARK-23627: provide isEmpty in DataSet") { +val ds1 = spark.emptyDataset[Int] +val ds2 = Seq(1, 2, 3).toDS() + +assert(ds1.isEmpty == true) +assert(ds2.isEmpty == false) + } + test("SPARK-22472: add null check for top-level primitive values") { // If the primitive values are from Option, we need to do runtime null check. val ds = Seq(Some(1), None).toDS().as[Int] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26911 - in /dev/spark/2.3.2-SNAPSHOT-2018_05_14_22_01-a886dc2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue May 15 05:16:03 2018 New Revision: 26911 Log: Apache Spark 2.3.2-SNAPSHOT-2018_05_14_22_01-a886dc2 docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26906 - in /dev/spark/2.4.0-SNAPSHOT-2018_05_14_20_01-9059f1e-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue May 15 03:16:11 2018 New Revision: 26906 Log: Apache Spark 2.4.0-SNAPSHOT-2018_05_14_20_01-9059f1e docs [This commit notification would consist of 1462 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23780][R] Failed to use googleVis library with new SparkR
Repository: spark Updated Branches: refs/heads/branch-2.3 eb7b37350 -> a886dc23a [SPARK-23780][R] Failed to use googleVis library with new SparkR ## What changes were proposed in this pull request? change generic to get it to work with googleVis also fix lintr ## How was this patch tested? manual test, unit tests Author: Felix Cheung Closes #21315 from felixcheung/googvis. (cherry picked from commit 9059f1ee6ae13c8636c9b7fdbb708a349256fb8e) Signed-off-by: Felix Cheung Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a886dc23 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a886dc23 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a886dc23 Branch: refs/heads/branch-2.3 Commit: a886dc23aeae8a79f7bc4d1aeb47d545e5550604 Parents: eb7b373 Author: Felix Cheung Authored: Mon May 14 19:20:25 2018 -0700 Committer: Felix Cheung Committed: Mon May 14 19:21:04 2018 -0700 -- R/pkg/R/client.R | 5 +++-- R/pkg/R/generics.R | 2 +- R/pkg/R/sparkR.R | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a886dc23/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 14a17c6..4c87f64 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -63,7 +63,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack checkJavaVersion <- function() { javaBin <- "java" javaHome <- Sys.getenv("JAVA_HOME") - javaReqs <- utils::packageDescription(utils::packageName(), fields=c("SystemRequirements")) + javaReqs <- utils::packageDescription(utils::packageName(), fields = c("SystemRequirements")) sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L)) if (javaHome != "") { javaBin <- file.path(javaHome, "bin", javaBin) @@ -90,7 +90,8 @@ checkJavaVersion <- function() { # Extract 8 from it to compare to sparkJavaVersion javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2]) if (javaVersionNum != sparkJavaVersion) { -stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", javaVersionStr)) +stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", + javaVersionStr)) } } http://git-wip-us.apache.org/repos/asf/spark/blob/a886dc23/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index e0dde33..cffc9ab 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -762,7 +762,7 @@ setGeneric("summarize", function(x, ...) { standardGeneric("summarize") }) #' @export setGeneric("summary", function(object, ...) { standardGeneric("summary") }) -setGeneric("toJSON", function(x) { standardGeneric("toJSON") }) +setGeneric("toJSON", function(x, ...) { standardGeneric("toJSON") }) setGeneric("toRDD", function(x) { standardGeneric("toRDD") }) http://git-wip-us.apache.org/repos/asf/spark/blob/a886dc23/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index 2cd8b0c..266fa46 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -197,7 +197,7 @@ sparkR.sparkContext <- function( # Don't use readString() so that we can provide a useful # error message if the R and Java versions are mismatched. -authSecretLen = readInt(f) +authSecretLen <- readInt(f) if (length(authSecretLen) == 0 || authSecretLen == 0) { stop("Unexpected EOF in JVM connection data. Mismatched versions?") } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23780][R] Failed to use googleVis library with new SparkR
Repository: spark Updated Branches: refs/heads/master 061e0084c -> 9059f1ee6 [SPARK-23780][R] Failed to use googleVis library with new SparkR ## What changes were proposed in this pull request? change generic to get it to work with googleVis also fix lintr ## How was this patch tested? manual test, unit tests Author: Felix Cheung Closes #21315 from felixcheung/googvis. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9059f1ee Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9059f1ee Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9059f1ee Branch: refs/heads/master Commit: 9059f1ee6ae13c8636c9b7fdbb708a349256fb8e Parents: 061e008 Author: Felix Cheung Authored: Mon May 14 19:20:25 2018 -0700 Committer: Felix Cheung Committed: Mon May 14 19:20:25 2018 -0700 -- R/pkg/R/client.R | 5 +++-- R/pkg/R/generics.R | 2 +- R/pkg/R/sparkR.R | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9059f1ee/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index 14a17c6..4c87f64 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -63,7 +63,7 @@ generateSparkSubmitArgs <- function(args, sparkHome, jars, sparkSubmitOpts, pack checkJavaVersion <- function() { javaBin <- "java" javaHome <- Sys.getenv("JAVA_HOME") - javaReqs <- utils::packageDescription(utils::packageName(), fields=c("SystemRequirements")) + javaReqs <- utils::packageDescription(utils::packageName(), fields = c("SystemRequirements")) sparkJavaVersion <- as.numeric(tail(strsplit(javaReqs, "[(=)]")[[1]], n = 1L)) if (javaHome != "") { javaBin <- file.path(javaHome, "bin", javaBin) @@ -90,7 +90,8 @@ checkJavaVersion <- function() { # Extract 8 from it to compare to sparkJavaVersion javaVersionNum <- as.integer(strsplit(javaVersionStr, "[.]")[[1L]][2]) if (javaVersionNum != sparkJavaVersion) { -stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", javaVersionStr)) +stop(paste("Java version", sparkJavaVersion, "is required for this package; found version:", + javaVersionStr)) } } http://git-wip-us.apache.org/repos/asf/spark/blob/9059f1ee/R/pkg/R/generics.R -- diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R index 61da30b..3ea1811 100644 --- a/R/pkg/R/generics.R +++ b/R/pkg/R/generics.R @@ -624,7 +624,7 @@ setGeneric("summarize", function(x, ...) { standardGeneric("summarize") }) #' @rdname summary setGeneric("summary", function(object, ...) { standardGeneric("summary") }) -setGeneric("toJSON", function(x) { standardGeneric("toJSON") }) +setGeneric("toJSON", function(x, ...) { standardGeneric("toJSON") }) setGeneric("toRDD", function(x) { standardGeneric("toRDD") }) http://git-wip-us.apache.org/repos/asf/spark/blob/9059f1ee/R/pkg/R/sparkR.R -- diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R index d6a2d08..f7c1663 100644 --- a/R/pkg/R/sparkR.R +++ b/R/pkg/R/sparkR.R @@ -194,7 +194,7 @@ sparkR.sparkContext <- function( # Don't use readString() so that we can provide a useful # error message if the R and Java versions are mismatched. -authSecretLen = readInt(f) +authSecretLen <- readInt(f) if (length(authSecretLen) == 0 || authSecretLen == 0) { stop("Unexpected EOF in JVM connection data. Mismatched versions?") } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26905 - in /dev/spark/2.3.2-SNAPSHOT-2018_05_14_18_01-eb7b373-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Tue May 15 01:15:23 2018 New Revision: 26905 Log: Apache Spark 2.3.2-SNAPSHOT-2018_05_14_18_01-eb7b373 docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 2.3.2-SNAPSHOT
Preparing development version 2.3.2-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/eb7b3735 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/eb7b3735 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/eb7b3735 Branch: refs/heads/branch-2.3 Commit: eb7b37350108fca9d52d60a842c5920783a356da Parents: cc93bc9 Author: Marcelo Vanzin Authored: Mon May 14 17:57:20 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 17:57:20 2018 -0700 -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/eb7b3735/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 632bcb3..8df2635 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.3.1 +Version: 2.3.2 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/eb7b3735/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index d744c8b..02bf39b 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.1 +2.3.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/eb7b3735/common/kvstore/pom.xml -- diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 3a41e16..646fdfb 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.1 +2.3.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/eb7b3735/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index f02108f..76c7dcf 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.1 +2.3.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/eb7b3735/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 4430487..f2661fe 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -
[1/2] spark git commit: Preparing Spark release v2.3.1-rc1
Repository: spark Updated Branches: refs/heads/branch-2.3 6dfb51557 -> eb7b37350 Preparing Spark release v2.3.1-rc1 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/cc93bc95 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/cc93bc95 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/cc93bc95 Branch: refs/heads/branch-2.3 Commit: cc93bc9592fa52c1bf81b583e2bc4df5f4914146 Parents: 6dfb515 Author: Marcelo Vanzin Authored: Mon May 14 17:57:16 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 17:57:16 2018 -0700 -- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 40 files changed, 40 insertions(+), 40 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/cc93bc95/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 5c5a8e9..d744c8b 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.1-SNAPSHOT +2.3.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cc93bc95/common/kvstore/pom.xml -- diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 2a625da..3a41e16 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.1-SNAPSHOT +2.3.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cc93bc95/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index adb1890..f02108f 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.1-SNAPSHOT +2.3.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cc93bc95/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 4cdcfa2..4430487 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.1-SNAPSHOT +2.3.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/cc93bc95/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 494137f..167dc7a 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.1-SNAPSHOT +2.3.1 ../../pom.xml
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.3.1-rc1 [created] cc93bc959 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26904 - in /dev/spark/2.4.0-SNAPSHOT-2018_05_14_16_01-061e008-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon May 14 23:15:34 2018 New Revision: 26904 Log: Apache Spark 2.4.0-SNAPSHOT-2018_05_14_16_01-061e008 docs [This commit notification would consist of 1462 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23852][SQL] Add withSQLConf(...) to test case
Repository: spark Updated Branches: refs/heads/branch-2.3 a8ee5706a -> 6dfb51557 [SPARK-23852][SQL] Add withSQLConf(...) to test case ## What changes were proposed in this pull request? Add a `withSQLConf(...)` wrapper to force Parquet filter pushdown for a test that relies on it. ## How was this patch tested? Test passes Author: Henry Robinson Closes #21323 from henryr/spark-23582. (cherry picked from commit 061e0084ce19c1384ba271a97a0aa1f87abe879d) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6dfb5155 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6dfb5155 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6dfb5155 Branch: refs/heads/branch-2.3 Commit: 6dfb515571b68a471509035287a46e431e48b73b Parents: a8ee570 Author: Henry Robinson Authored: Mon May 14 14:35:08 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 14:35:17 2018 -0700 -- .../datasources/parquet/ParquetFilterSuite.scala | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6dfb5155/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 79891af..f8d04b5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -604,13 +604,15 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } test("SPARK-23852: Broken Parquet push-down for partially-written stats") { -// parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. -// The row-group statistics include null counts, but not min and max values, which -// triggers PARQUET-1217. -val df = readResourceParquetFile("test-data/parquet-1217.parquet") +withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> "true") { + // parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. + // The row-group statistics include null counts, but not min and max values, which + // triggers PARQUET-1217. + val df = readResourceParquetFile("test-data/parquet-1217.parquet") -// Will return 0 rows if PARQUET-1217 is not fixed. -assert(df.where("col > 0").count() === 2) + // Will return 0 rows if PARQUET-1217 is not fixed. + assert(df.where("col > 0").count() === 2) +} } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23852][SQL] Add withSQLConf(...) to test case
Repository: spark Updated Branches: refs/heads/master 8cd83acf4 -> 061e0084c [SPARK-23852][SQL] Add withSQLConf(...) to test case ## What changes were proposed in this pull request? Add a `withSQLConf(...)` wrapper to force Parquet filter pushdown for a test that relies on it. ## How was this patch tested? Test passes Author: Henry Robinson Closes #21323 from henryr/spark-23582. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/061e0084 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/061e0084 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/061e0084 Branch: refs/heads/master Commit: 061e0084ce19c1384ba271a97a0aa1f87abe879d Parents: 8cd83ac Author: Henry Robinson Authored: Mon May 14 14:35:08 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 14:35:08 2018 -0700 -- .../datasources/parquet/ParquetFilterSuite.scala | 14 -- 1 file changed, 8 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/061e0084/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 4d0ecde..90da7eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -650,13 +650,15 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } test("SPARK-23852: Broken Parquet push-down for partially-written stats") { -// parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. -// The row-group statistics include null counts, but not min and max values, which -// triggers PARQUET-1217. -val df = readResourceParquetFile("test-data/parquet-1217.parquet") +withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> "true") { + // parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. + // The row-group statistics include null counts, but not min and max values, which + // triggers PARQUET-1217. + val df = readResourceParquetFile("test-data/parquet-1217.parquet") -// Will return 0 rows if PARQUET-1217 is not fixed. -assert(df.where("col > 0").count() === 2) + // Will return 0 rows if PARQUET-1217 is not fixed. + assert(df.where("col > 0").count() === 2) +} } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r26901 - in /dev/spark/2.3.1-SNAPSHOT-2018_05_14_14_01-2f60df0-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon May 14 21:15:26 2018 New Revision: 26901 Log: Apache Spark 2.3.1-SNAPSHOT-2018_05_14_14_01-2f60df0 docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-23852][SQL] Upgrade to Parquet 1.8.3
Repository: spark Updated Branches: refs/heads/branch-2.3 2f60df09d -> a8ee5706a [SPARK-23852][SQL] Upgrade to Parquet 1.8.3 ## What changes were proposed in this pull request? Upgrade Parquet dependency to 1.8.3 to avoid PARQUET-1217 ## How was this patch tested? Ran the included new test case. Author: Henry Robinson Closes #21302 from henryr/branch-2.3. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8ee5706 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8ee5706 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8ee5706 Branch: refs/heads/branch-2.3 Commit: a8ee5706ad96be3d6501471d05f7c3d61d3ca38e Parents: 2f60df0 Author: Henry Robinson Authored: Mon May 14 14:05:32 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 14:05:32 2018 -0700 -- dev/deps/spark-deps-hadoop-2.6 | 10 +- dev/deps/spark-deps-hadoop-2.7 | 10 +- pom.xml | 2 +- .../test/resources/test-data/parquet-1217.parquet| Bin 0 -> 321 bytes .../datasources/parquet/ParquetFilterSuite.scala | 10 ++ 5 files changed, 21 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/dev/deps/spark-deps-hadoop-2.6 -- diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 577bf43..f4559a8 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -161,13 +161,13 @@ orc-mapreduce-1.4.3-nohive.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.8.jar -parquet-column-1.8.2.jar -parquet-common-1.8.2.jar -parquet-encoding-1.8.2.jar +parquet-column-1.8.3.jar +parquet-common-1.8.3.jar +parquet-encoding-1.8.3.jar parquet-format-2.3.1.jar -parquet-hadoop-1.8.2.jar +parquet-hadoop-1.8.3.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.2.jar +parquet-jackson-1.8.3.jar protobuf-java-2.5.0.jar py4j-0.10.7.jar pyrolite-4.13.jar http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/dev/deps/spark-deps-hadoop-2.7 -- diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index 304982e..c2df998 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -162,13 +162,13 @@ orc-mapreduce-1.4.3-nohive.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar paranamer-2.8.jar -parquet-column-1.8.2.jar -parquet-common-1.8.2.jar -parquet-encoding-1.8.2.jar +parquet-column-1.8.3.jar +parquet-common-1.8.3.jar +parquet-encoding-1.8.3.jar parquet-format-2.3.1.jar -parquet-hadoop-1.8.2.jar +parquet-hadoop-1.8.3.jar parquet-hadoop-bundle-1.6.0.jar -parquet-jackson-1.8.2.jar +parquet-jackson-1.8.3.jar protobuf-java-2.5.0.jar py4j-0.10.7.jar pyrolite-4.13.jar http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/pom.xml -- diff --git a/pom.xml b/pom.xml index 9c2d931..533c6b4 100644 --- a/pom.xml +++ b/pom.xml @@ -129,7 +129,7 @@ 1.2.1 10.12.1.1 -1.8.2 +1.8.3 1.4.3 nohive 1.6.0 http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/sql/core/src/test/resources/test-data/parquet-1217.parquet -- diff --git a/sql/core/src/test/resources/test-data/parquet-1217.parquet b/sql/core/src/test/resources/test-data/parquet-1217.parquet new file mode 100644 index 000..eb2dc4f Binary files /dev/null and b/sql/core/src/test/resources/test-data/parquet-1217.parquet differ http://git-wip-us.apache.org/repos/asf/spark/blob/a8ee5706/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 3380195..79891af 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -602,6 +602,16 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } } } + + test("SPARK-23852: Broken Parquet push-down for partially-written stats") { +// parquet-1217.parquet contains a single column with values -1, 0, 1, 2 and null. +// The row-group statistics include null counts, but not min and max values, which +// triggers P
spark git commit: [SPARK-24027][SQL] Support MapType with StringType for keys as the root type by from_json
Repository: spark Updated Branches: refs/heads/master 075d678c8 -> 8cd83acf4 [SPARK-24027][SQL] Support MapType with StringType for keys as the root type by from_json ## What changes were proposed in this pull request? Currently, the from_json function support StructType or ArrayType as the root type. The PR allows to specify MapType(StringType, DataType) as the root type additionally to mentioned types. For example: ```scala import org.apache.spark.sql.types._ val schema = MapType(StringType, IntegerType) val in = Seq("""{"a": 1, "b": 2, "c": 3}""").toDS() in.select(from_json($"value", schema, Map[String, String]())).collect() ``` ``` res1: Array[org.apache.spark.sql.Row] = Array([Map(a -> 1, b -> 2, c -> 3)]) ``` ## How was this patch tested? It was checked by new tests for the map type with integer type and struct type as value types. Also roundtrip tests like from_json(to_json) and to_json(from_json) for MapType are added. Author: Maxim Gekk Author: Maxim Gekk Closes #21108 from MaxGekk/from_json-map-type. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8cd83acf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8cd83acf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8cd83acf Branch: refs/heads/master Commit: 8cd83acf4075d369bfcf9e703760d4946ef15f00 Parents: 075d678 Author: Maxim Gekk Authored: Mon May 14 14:05:42 2018 -0700 Committer: gatorsmile Committed: Mon May 14 14:05:42 2018 -0700 -- python/pyspark/sql/functions.py | 10 ++- .../catalyst/expressions/jsonExpressions.scala | 10 ++- .../spark/sql/catalyst/json/JacksonParser.scala | 18 +- .../scala/org/apache/spark/sql/functions.scala | 29 - .../apache/spark/sql/JsonFunctionsSuite.scala | 66 5 files changed, 113 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8cd83acf/python/pyspark/sql/functions.py -- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index b62748e..6866c1c 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -2095,12 +2095,13 @@ def json_tuple(col, *fields): return Column(jc) +@ignore_unicode_prefix @since(2.1) def from_json(col, schema, options={}): """ -Parses a column containing a JSON string into a :class:`StructType` or :class:`ArrayType` -of :class:`StructType`\\s with the specified schema. Returns `null`, in the case of an -unparseable string. +Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType` +as keys type, :class:`StructType` or :class:`ArrayType` of :class:`StructType`\\s with +the specified schema. Returns `null`, in the case of an unparseable string. :param col: string column in json format :param schema: a StructType or ArrayType of StructType to use when parsing the json column. @@ -2117,6 +2118,9 @@ def from_json(col, schema, options={}): [Row(json=Row(a=1))] >>> df.select(from_json(df.value, "a INT").alias("json")).collect() [Row(json=Row(a=1))] +>>> schema = MapType(StringType(), IntegerType()) +>>> df.select(from_json(df.value, schema).alias("json")).collect() +[Row(json={u'a': 1})] >>> data = [(1, '''[{"a": 1}]''')] >>> schema = ArrayType(StructType([StructField("a", IntegerType())])) >>> df = spark.createDataFrame(data, ("key", "value")) http://git-wip-us.apache.org/repos/asf/spark/blob/8cd83acf/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index 34161f0..04a4eb0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -548,7 +548,7 @@ case class JsonToStructs( forceNullableSchema = SQLConf.get.getConf(SQLConf.FROM_JSON_FORCE_NULLABLE_SCHEMA)) override def checkInputDataTypes(): TypeCheckResult = nullableSchema match { -case _: StructType | ArrayType(_: StructType, _) => +case _: StructType | ArrayType(_: StructType, _) | _: MapType => super.checkInputDataTypes() case _ => TypeCheckResult.TypeCheckFailure( s"Input schema ${nullableSchema.simpleString} must be a struct or an array of structs.") @@ -558,6 +558,7 @@ case class JsonToStructs( lazy val rowSchema = nullableSchema match { case st: StructType =>
spark git commit: [SPARK-24155][ML] Instrumentation improvements for clustering
Repository: spark Updated Branches: refs/heads/master c26f67325 -> 075d678c8 [SPARK-24155][ML] Instrumentation improvements for clustering ## What changes were proposed in this pull request? changed the instrument for all of the clustering methods ## How was this patch tested? N/A Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Lu WANG Closes #21218 from ludatabricks/SPARK-23686-1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/075d678c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/075d678c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/075d678c Branch: refs/heads/master Commit: 075d678c8844614910b50abca07282bde31ef7e0 Parents: c26f673 Author: Lu WANG Authored: Mon May 14 13:35:54 2018 -0700 Committer: Xiangrui Meng Committed: Mon May 14 13:35:54 2018 -0700 -- .../org/apache/spark/ml/clustering/BisectingKMeans.scala | 7 +-- .../org/apache/spark/ml/clustering/GaussianMixture.scala | 5 - .../main/scala/org/apache/spark/ml/clustering/KMeans.scala| 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/075d678c/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index 438e53b..1ad4e09 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -261,8 +261,9 @@ class BisectingKMeans @Since("2.0.0") ( transformSchema(dataset.schema, logging = true) val rdd = DatasetUtils.columnToOldVector(dataset, getFeaturesCol) -val instr = Instrumentation.create(this, rdd) -instr.logParams(featuresCol, predictionCol, k, maxIter, seed, minDivisibleClusterSize) +val instr = Instrumentation.create(this, dataset) +instr.logParams(featuresCol, predictionCol, k, maxIter, seed, + minDivisibleClusterSize, distanceMeasure) val bkm = new MLlibBisectingKMeans() .setK($(k)) @@ -275,6 +276,8 @@ class BisectingKMeans @Since("2.0.0") ( val summary = new BisectingKMeansSummary( model.transform(dataset), $(predictionCol), $(featuresCol), $(k)) model.setSummary(Some(summary)) +// TODO: need to extend logNamedValue to support Array +instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]")) instr.logSuccess(model) model } http://git-wip-us.apache.org/repos/asf/spark/blob/075d678c/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index 88d618c..3091bb5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -352,7 +352,7 @@ class GaussianMixture @Since("2.0.0") ( s"than ${GaussianMixture.MAX_NUM_FEATURES} features because the size of the covariance" + s" matrix is quadratic in the number of features.") -val instr = Instrumentation.create(this, instances) +val instr = Instrumentation.create(this, dataset) instr.logParams(featuresCol, predictionCol, probabilityCol, k, maxIter, seed, tol) instr.logNumFeatures(numFeatures) @@ -425,6 +425,9 @@ class GaussianMixture @Since("2.0.0") ( val summary = new GaussianMixtureSummary(model.transform(dataset), $(predictionCol), $(probabilityCol), $(featuresCol), $(k), logLikelihood) model.setSummary(Some(summary)) +instr.logNamedValue("logLikelihood", logLikelihood) +// TODO: need to extend logNamedValue to support Array +instr.logNamedValue("clusterSizes", summary.clusterSizes.mkString("[", ",", "]")) instr.logSuccess(model) model } http://git-wip-us.apache.org/repos/asf/spark/blob/075d678c/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index 97f246f..e72d7f9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -342,7 +342,7 @@ class KMeans @Since("1.5.0") ( instances.persist(StorageLevel.ME
svn commit: r26899 - in /dev/spark/2.4.0-SNAPSHOT-2018_05_14_12_03-c26f673-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon May 14 19:18:09 2018 New Revision: 26899 Log: Apache Spark 2.4.0-SNAPSHOT-2018_05_14_12_03-c26f673 docs [This commit notification would consist of 1462 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24246][SQL] Improve AnalysisException by setting the cause when it's available
Repository: spark Updated Branches: refs/heads/master 1430fa80e -> c26f67325 [SPARK-24246][SQL] Improve AnalysisException by setting the cause when it's available ## What changes were proposed in this pull request? If there is an exception, it's better to set it as the cause of AnalysisException since the exception may contain useful debug information. ## How was this patch tested? Jenkins Author: Shixiong Zhu Closes #21297 from zsxwing/SPARK-24246. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c26f6732 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c26f6732 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c26f6732 Branch: refs/heads/master Commit: c26f673252c2cbbccf8c395ba6d4ab80c098d60e Parents: 1430fa8 Author: Shixiong Zhu Authored: Mon May 14 11:37:57 2018 -0700 Committer: gatorsmile Committed: Mon May 14 11:37:57 2018 -0700 -- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 6 +++--- .../spark/sql/catalyst/analysis/ResolveInlineTables.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/analysis/package.scala | 5 + .../org/apache/spark/sql/execution/datasources/rules.scala | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c26f6732/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index dfdcdbc..3eaa9ec 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -676,13 +676,13 @@ class Analyzer( try { catalog.lookupRelation(tableIdentWithDb) } catch { -case _: NoSuchTableException => - u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}") +case e: NoSuchTableException => + u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}", e) // If the database is defined and that database is not found, throw an AnalysisException. // Note that if the database is not defined, it is possible we are looking up a temp view. case e: NoSuchDatabaseException => u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}, the " + -s"database ${e.db} doesn't exist.") +s"database ${e.db} doesn't exist.", e) } } http://git-wip-us.apache.org/repos/asf/spark/blob/c26f6732/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala index 4eb6e64..31ba9d7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala @@ -105,7 +105,7 @@ case class ResolveInlineTables(conf: SQLConf) extends Rule[LogicalPlan] with Cas castedExpr.eval() } catch { case NonFatal(ex) => -table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}") +table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}", ex) } }) } http://git-wip-us.apache.org/repos/asf/spark/blob/c26f6732/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala index 7731336..354a3fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala @@ -41,6 +41,11 @@ package object analysis { def failAnalysis(msg: String): Nothing = { throw new AnalysisException(msg, t.origin.line, t.origin.startPosition) } + +/** Fails the analysis at the point where a specific tree node was parsed. */ +def failAnalysis(msg: String, cause: Throwable): Nothing = { + throw new AnalysisException(msg, t.origin.line, t.origin.startPosition, cause = Some(cause)) +} } /** Catc
spark git commit: [SPARK-24246][SQL] Improve AnalysisException by setting the cause when it's available
Repository: spark Updated Branches: refs/heads/branch-2.3 88003f02c -> 2f60df09d [SPARK-24246][SQL] Improve AnalysisException by setting the cause when it's available ## What changes were proposed in this pull request? If there is an exception, it's better to set it as the cause of AnalysisException since the exception may contain useful debug information. ## How was this patch tested? Jenkins Author: Shixiong Zhu Closes #21297 from zsxwing/SPARK-24246. (cherry picked from commit c26f673252c2cbbccf8c395ba6d4ab80c098d60e) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2f60df09 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2f60df09 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2f60df09 Branch: refs/heads/branch-2.3 Commit: 2f60df09dc1bb65da254e00abe8463122e8c77cf Parents: 88003f0 Author: Shixiong Zhu Authored: Mon May 14 11:37:57 2018 -0700 Committer: gatorsmile Committed: Mon May 14 11:38:09 2018 -0700 -- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 6 +++--- .../spark/sql/catalyst/analysis/ResolveInlineTables.scala | 2 +- .../scala/org/apache/spark/sql/catalyst/analysis/package.scala | 5 + .../org/apache/spark/sql/execution/datasources/rules.scala | 2 +- 4 files changed, 10 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2f60df09/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 9cc928c..8597d83 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -661,13 +661,13 @@ class Analyzer( try { catalog.lookupRelation(tableIdentWithDb) } catch { -case _: NoSuchTableException => - u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}") +case e: NoSuchTableException => + u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}", e) // If the database is defined and that database is not found, throw an AnalysisException. // Note that if the database is not defined, it is possible we are looking up a temp view. case e: NoSuchDatabaseException => u.failAnalysis(s"Table or view not found: ${tableIdentWithDb.unquotedString}, the " + -s"database ${e.db} doesn't exist.") +s"database ${e.db} doesn't exist.", e) } } http://git-wip-us.apache.org/repos/asf/spark/blob/2f60df09/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala index f2df3e1..71ed754 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala @@ -103,7 +103,7 @@ case class ResolveInlineTables(conf: SQLConf) extends Rule[LogicalPlan] with Cas castedExpr.eval() } catch { case NonFatal(ex) => -table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}") +table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}", ex) } }) } http://git-wip-us.apache.org/repos/asf/spark/blob/2f60df09/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala index 7731336..354a3fa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala @@ -41,6 +41,11 @@ package object analysis { def failAnalysis(msg: String): Nothing = { throw new AnalysisException(msg, t.origin.line, t.origin.startPosition) } + +/** Fails the analysis at the point where a specific tree node was parsed. */ +def failAnalysis(msg: String, cause: Throwable): Nothing = { + throw new Anal
spark git commit: [SPARK-24263][R] SparkR java check breaks with openjdk
Repository: spark Updated Branches: refs/heads/branch-2.3 867d948a6 -> 88003f02c [SPARK-24263][R] SparkR java check breaks with openjdk ## What changes were proposed in this pull request? Change text to grep for. ## How was this patch tested? manual test Author: Felix Cheung Closes #21314 from felixcheung/openjdkver. (cherry picked from commit 1430fa80e37762e31cc5adc74cd609c215d84b6e) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/88003f02 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/88003f02 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/88003f02 Branch: refs/heads/branch-2.3 Commit: 88003f02c11bec9df42d225841b82dc748828940 Parents: 867d948 Author: Felix Cheung Authored: Mon May 14 10:49:12 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 10:49:24 2018 -0700 -- R/pkg/R/client.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/88003f02/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index e9295e0..14a17c6 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -82,7 +82,7 @@ checkJavaVersion <- function() { }) javaVersionFilter <- Filter( function(x) { -grepl("java version", x) +grepl(" version", x) }, javaVersionOut) javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24263][R] SparkR java check breaks with openjdk
Repository: spark Updated Branches: refs/heads/master b6c50d782 -> 1430fa80e [SPARK-24263][R] SparkR java check breaks with openjdk ## What changes were proposed in this pull request? Change text to grep for. ## How was this patch tested? manual test Author: Felix Cheung Closes #21314 from felixcheung/openjdkver. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1430fa80 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1430fa80 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1430fa80 Branch: refs/heads/master Commit: 1430fa80e37762e31cc5adc74cd609c215d84b6e Parents: b6c50d7 Author: Felix Cheung Authored: Mon May 14 10:49:12 2018 -0700 Committer: Marcelo Vanzin Committed: Mon May 14 10:49:12 2018 -0700 -- R/pkg/R/client.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1430fa80/R/pkg/R/client.R -- diff --git a/R/pkg/R/client.R b/R/pkg/R/client.R index e9295e0..14a17c6 100644 --- a/R/pkg/R/client.R +++ b/R/pkg/R/client.R @@ -82,7 +82,7 @@ checkJavaVersion <- function() { }) javaVersionFilter <- Filter( function(x) { -grepl("java version", x) +grepl(" version", x) }, javaVersionOut) javaVersionStr <- strsplit(javaVersionFilter[[1]], "[\"]")[[1L]][2] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org