spark git commit: [SPARK-20437][R] R wrappers for rollup and cube
Repository: spark Updated Branches: refs/heads/master 57e1da394 -> df58a95a3 [SPARK-20437][R] R wrappers for rollup and cube ## What changes were proposed in this pull request? - Add `rollup` and `cube` methods and corresponding generics. - Add short description to the vignette. ## How was this patch tested? - Existing unit tests. - Additional unit tests covering new features. - `check-cran.sh`. Author: zero323Closes #17728 from zero323/SPARK-20437. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/df58a95a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/df58a95a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/df58a95a Branch: refs/heads/master Commit: df58a95a33b739462dbe84e098839af2a8643d45 Parents: 57e1da3 Author: zero323 Authored: Tue Apr 25 22:00:45 2017 -0700 Committer: Felix Cheung Committed: Tue Apr 25 22:00:45 2017 -0700 -- R/pkg/NAMESPACE | 2 + R/pkg/R/DataFrame.R | 73 +- R/pkg/R/generics.R| 8 ++ R/pkg/inst/tests/testthat/test_sparkSQL.R | 102 + R/pkg/vignettes/sparkr-vignettes.Rmd | 15 docs/sparkr.md| 30 6 files changed, 229 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/df58a95a/R/pkg/NAMESPACE -- diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE index 95d5cc6..2800461 100644 --- a/R/pkg/NAMESPACE +++ b/R/pkg/NAMESPACE @@ -101,6 +101,7 @@ exportMethods("arrange", "createOrReplaceTempView", "crossJoin", "crosstab", + "cube", "dapply", "dapplyCollect", "describe", @@ -143,6 +144,7 @@ exportMethods("arrange", "registerTempTable", "rename", "repartition", + "rollup", "sample", "sample_frac", "sampleBy", http://git-wip-us.apache.org/repos/asf/spark/blob/df58a95a/R/pkg/R/DataFrame.R -- diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R index 88a138f..cd6f03a 100644 --- a/R/pkg/R/DataFrame.R +++ b/R/pkg/R/DataFrame.R @@ -1321,7 +1321,7 @@ setMethod("toRDD", #' Groups the SparkDataFrame using the specified columns, so we can run aggregation on them. #' #' @param x a SparkDataFrame. -#' @param ... variable(s) (character names(s) or Column(s)) to group on. +#' @param ... character name(s) or Column(s) to group on. #' @return A GroupedData. #' @family SparkDataFrame functions #' @aliases groupBy,SparkDataFrame-method @@ -1337,6 +1337,7 @@ setMethod("toRDD", #' agg(groupBy(df, "department", "gender"), salary="avg", "age" -> "max") #' } #' @note groupBy since 1.4.0 +#' @seealso \link{agg}, \link{cube}, \link{rollup} setMethod("groupBy", signature(x = "SparkDataFrame"), function(x, ...) { @@ -3642,3 +3643,73 @@ setMethod("checkpoint", df <- callJMethod(x@sdf, "checkpoint", as.logical(eager)) dataFrame(df) }) + +#' cube +#' +#' Create a multi-dimensional cube for the SparkDataFrame using the specified columns. +#' +#' If grouping expression is missing \code{cube} creates a single global aggregate and is equivalent to +#' direct application of \link{agg}. +#' +#' @param x a SparkDataFrame. +#' @param ... character name(s) or Column(s) to group on. +#' @return A GroupedData. +#' @family SparkDataFrame functions +#' @aliases cube,SparkDataFrame-method +#' @rdname cube +#' @name cube +#' @export +#' @examples +#' \dontrun{ +#' df <- createDataFrame(mtcars) +#' mean(cube(df, "cyl", "gear", "am"), "mpg") +#' +#' # Following calls are equivalent +#' agg(cube(carsDF), mean(carsDF$mpg)) +#' agg(carsDF, mean(carsDF$mpg)) +#' } +#' @note cube since 2.3.0 +#' @seealso \link{agg}, \link{groupBy}, \link{rollup} +setMethod("cube", + signature(x = "SparkDataFrame"), + function(x, ...) { +cols <- list(...) +jcol <- lapply(cols, function(x) if (class(x) == "Column") x@jc else column(x)@jc) +sgd <- callJMethod(x@sdf, "cube", jcol) +groupedData(sgd) + }) + +#' rollup +#' +#' Create a multi-dimensional rollup for the SparkDataFrame using the specified columns. +#' +#' If grouping expression is missing \code{rollup} creates a single global aggregate and is equivalent to +#' direct application of \link{agg}. +#' +#' @param x a SparkDataFrame. +#' @param ... character name(s) or Column(s) to group on. +#' @return
spark git commit: [SPARK-16548][SQL] Inconsistent error handling in JSON parsing SQL functions
Repository: spark Updated Branches: refs/heads/master caf392025 -> 57e1da394 [SPARK-16548][SQL] Inconsistent error handling in JSON parsing SQL functions ## What changes were proposed in this pull request? change to using Jackson's `com.fasterxml.jackson.core.JsonFactory` public JsonParser createParser(String content) ## How was this patch tested? existing unit tests Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Eric WassermanCloses #17693 from ewasserman/SPARK-20314. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/57e1da39 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/57e1da39 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/57e1da39 Branch: refs/heads/master Commit: 57e1da39464131329318b723caa54df9f55fa54f Parents: caf3920 Author: Eric Wasserman Authored: Wed Apr 26 11:42:43 2017 +0800 Committer: Wenchen Fan Committed: Wed Apr 26 11:42:43 2017 +0800 -- .../sql/catalyst/expressions/jsonExpressions.scala | 12 +--- .../expressions/JsonExpressionsSuite.scala | 17 + 2 files changed, 26 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/57e1da39/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala index df4d406..9fb0ea6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.expressions -import java.io.{ByteArrayOutputStream, CharArrayWriter, StringWriter} +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, CharArrayWriter, InputStreamReader, StringWriter} import scala.util.parsing.combinator.RegexParsers @@ -149,7 +149,10 @@ case class GetJsonObject(json: Expression, path: Expression) if (parsed.isDefined) { try { -Utils.tryWithResource(jsonFactory.createParser(jsonStr.getBytes)) { parser => +/* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson + detect character encoding which could fail for some malformed strings */ +Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader( +new ByteArrayInputStream(jsonStr.getBytes), "UTF-8"))) { parser => val output = new ByteArrayOutputStream() val matched = Utils.tryWithResource( jsonFactory.createGenerator(output, JsonEncoding.UTF8)) { generator => @@ -393,7 +396,10 @@ case class JsonTuple(children: Seq[Expression]) } try { - Utils.tryWithResource(jsonFactory.createParser(json.getBytes)) { + /* We know the bytes are UTF-8 encoded. Pass a Reader to avoid having Jackson + detect character encoding which could fail for some malformed strings */ + Utils.tryWithResource(jsonFactory.createParser(new InputStreamReader( + new ByteArrayInputStream(json.getBytes), "UTF-8"))) { parser => parseRow(parser, input) } } catch { http://git-wip-us.apache.org/repos/asf/spark/blob/57e1da39/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala index c5b7223..4402ad4 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala @@ -39,6 +39,10 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { |"fb:testid":"1234"} |""".stripMargin + /* invalid json with leading nulls would trigger java.io.CharConversionException + in Jackson's JsonFactory.createParser(byte[]) due to RFC-4627 encoding detection */ + val badJson = "\0\0\0A\1AAA" + test("$.store.bicycle") { checkEvaluation( GetJsonObject(Literal(json), Literal("$.store.bicycle")), @@ -224,6 +228,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { null) } + test("SPARK-16548: character conversion") { +checkEvaluation( +
spark git commit: [SPARK-20439][SQL][BACKPORT-2.1] Fix Catalog API listTables and getTable when failed to fetch table metadata
Repository: spark Updated Branches: refs/heads/branch-2.1 8460b0905 -> 6696ad0e8 [SPARK-20439][SQL][BACKPORT-2.1] Fix Catalog API listTables and getTable when failed to fetch table metadata ### What changes were proposed in this pull request? This PR is to backport https://github.com/apache/spark/pull/17730 to Spark 2.1 --- -- `spark.catalog.listTables` and `spark.catalog.getTable` does not work if we are unable to retrieve table metadata due to any reason (e.g., table serde class is not accessible or the table type is not accepted by Spark SQL). After this PR, the APIs still return the corresponding Table without the description and tableType) ### How was this patch tested? Added a test case Author: Xiao LiCloses #17760 from gatorsmile/backport-17730. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6696ad0e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6696ad0e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6696ad0e Branch: refs/heads/branch-2.1 Commit: 6696ad0e8ce196a27e2908108f6e7eb7661affc4 Parents: 8460b09 Author: Xiao Li Authored: Wed Apr 26 11:39:10 2017 +0800 Committer: Wenchen Fan Committed: Wed Apr 26 11:39:10 2017 +0800 -- .../apache/spark/sql/internal/CatalogImpl.scala | 28 .../spark/sql/hive/execution/HiveDDLSuite.scala | 8 ++ 2 files changed, 31 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6696ad0e/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index 9d0b214..c3c3513 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -19,6 +19,7 @@ package org.apache.spark.sql.internal import scala.collection.JavaConverters._ import scala.reflect.runtime.universe.TypeTag +import scala.util.control.NonFatal import org.apache.spark.annotation.Experimental import org.apache.spark.sql._ @@ -99,14 +100,27 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { CatalogImpl.makeDataset(tables, sparkSession) } + /** + * Returns a Table for the given table/view or temporary view. + * + * Note that this function requires the table already exists in the Catalog. + * + * If the table metadata retrieval failed due to any reason (e.g., table serde class + * is not accessible or the table type is not accepted by Spark SQL), this function + * still returns the corresponding Table without the description and tableType) + */ private def makeTable(tableIdent: TableIdentifier): Table = { -val metadata = sessionCatalog.getTempViewOrPermanentTableMetadata(tableIdent) +val metadata = try { + Some(sessionCatalog.getTempViewOrPermanentTableMetadata(tableIdent)) +} catch { + case NonFatal(_) => None +} val isTemp = sessionCatalog.isTemporaryTable(tableIdent) new Table( name = tableIdent.table, - database = metadata.identifier.database.orNull, - description = metadata.comment.orNull, - tableType = if (isTemp) "TEMPORARY" else metadata.tableType.name, + database = metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull, + description = metadata.map(_.comment.orNull).orNull, + tableType = if (isTemp) "TEMPORARY" else metadata.map(_.tableType.name).orNull, isTemporary = isTemp) } @@ -197,7 +211,11 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { * `AnalysisException` when no `Table` can be found. */ override def getTable(dbName: String, tableName: String): Table = { -makeTable(TableIdentifier(tableName, Option(dbName))) +if (tableExists(dbName, tableName)) { + makeTable(TableIdentifier(tableName, Option(dbName))) +} else { + throw new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'") +} } /** http://git-wip-us.apache.org/repos/asf/spark/blob/6696ad0e/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala index 3b9437da..037ab47 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala +++
spark git commit: [SPARK-18127] Add hooks and extension points to Spark
Repository: spark Updated Branches: refs/heads/branch-2.2 f971ce5dd -> f0de60079 [SPARK-18127] Add hooks and extension points to Spark ## What changes were proposed in this pull request? This patch adds support for customizing the spark session by injecting user-defined custom extensions. This allows a user to add custom analyzer rules/checks, optimizer rules, planning strategies or even a customized parser. ## How was this patch tested? Unit Tests in SparkSessionExtensionSuite Author: Sameer AgarwalCloses #17724 from sameeragarwal/session-extensions. (cherry picked from commit caf392025ce21d701b503112060fa016d5eabe04) Signed-off-by: Xiao Li Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f0de6007 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f0de6007 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f0de6007 Branch: refs/heads/branch-2.2 Commit: f0de600797ff4883927d0c70732675fd8629e239 Parents: f971ce5 Author: Sameer Agarwal Authored: Tue Apr 25 17:05:20 2017 -0700 Committer: Xiao Li Committed: Tue Apr 25 17:05:41 2017 -0700 -- .../spark/sql/catalyst/parser/ParseDriver.scala | 9 +- .../sql/catalyst/parser/ParserInterface.scala | 35 +++- .../spark/sql/internal/StaticSQLConf.scala | 6 + .../org/apache/spark/sql/SparkSession.scala | 45 - .../spark/sql/SparkSessionExtensions.scala | 171 +++ .../sql/internal/BaseSessionStateBuilder.scala | 33 +++- .../spark/sql/SparkSessionExtensionSuite.scala | 144 7 files changed, 418 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f0de6007/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 80ab75c..dcccbd0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -34,8 +34,7 @@ import org.apache.spark.sql.types.{DataType, StructType} abstract class AbstractSqlParser extends ParserInterface with Logging { /** Creates/Resolves DataType for a given SQL string. */ - def parseDataType(sqlText: String): DataType = parse(sqlText) { parser => -// TODO add this to the parser interface. + override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser => astBuilder.visitSingleDataType(parser.singleDataType()) } @@ -50,8 +49,10 @@ abstract class AbstractSqlParser extends ParserInterface with Logging { } /** Creates FunctionIdentifier for a given SQL string. */ - def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = parse(sqlText) { parser => -astBuilder.visitSingleFunctionIdentifier(parser.singleFunctionIdentifier()) + override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = { +parse(sqlText) { parser => + astBuilder.visitSingleFunctionIdentifier(parser.singleFunctionIdentifier()) +} } /** http://git-wip-us.apache.org/repos/asf/spark/blob/f0de6007/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala index db3598b..75240d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala @@ -17,30 +17,51 @@ package org.apache.spark.sql.catalyst.parser +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{DataType, StructType} /** * Interface for a parser. */ +@DeveloperApi trait ParserInterface { - /** Creates LogicalPlan for a given SQL string. */ + /** + * Parse a string to a [[LogicalPlan]]. + */ + @throws[ParseException]("Text cannot be parsed to a LogicalPlan") def parsePlan(sqlText: String): LogicalPlan - /** Creates Expression for a given SQL string. */ + /** + * Parse a string to an
spark git commit: [SPARK-18127] Add hooks and extension points to Spark
Repository: spark Updated Branches: refs/heads/master 0a7f5f279 -> caf392025 [SPARK-18127] Add hooks and extension points to Spark ## What changes were proposed in this pull request? This patch adds support for customizing the spark session by injecting user-defined custom extensions. This allows a user to add custom analyzer rules/checks, optimizer rules, planning strategies or even a customized parser. ## How was this patch tested? Unit Tests in SparkSessionExtensionSuite Author: Sameer AgarwalCloses #17724 from sameeragarwal/session-extensions. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/caf39202 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/caf39202 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/caf39202 Branch: refs/heads/master Commit: caf392025ce21d701b503112060fa016d5eabe04 Parents: 0a7f5f2 Author: Sameer Agarwal Authored: Tue Apr 25 17:05:20 2017 -0700 Committer: Xiao Li Committed: Tue Apr 25 17:05:20 2017 -0700 -- .../spark/sql/catalyst/parser/ParseDriver.scala | 9 +- .../sql/catalyst/parser/ParserInterface.scala | 35 +++- .../spark/sql/internal/StaticSQLConf.scala | 6 + .../org/apache/spark/sql/SparkSession.scala | 45 - .../spark/sql/SparkSessionExtensions.scala | 171 +++ .../sql/internal/BaseSessionStateBuilder.scala | 33 +++- .../spark/sql/SparkSessionExtensionSuite.scala | 144 7 files changed, 418 insertions(+), 25 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/caf39202/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala index 80ab75c..dcccbd0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala @@ -34,8 +34,7 @@ import org.apache.spark.sql.types.{DataType, StructType} abstract class AbstractSqlParser extends ParserInterface with Logging { /** Creates/Resolves DataType for a given SQL string. */ - def parseDataType(sqlText: String): DataType = parse(sqlText) { parser => -// TODO add this to the parser interface. + override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser => astBuilder.visitSingleDataType(parser.singleDataType()) } @@ -50,8 +49,10 @@ abstract class AbstractSqlParser extends ParserInterface with Logging { } /** Creates FunctionIdentifier for a given SQL string. */ - def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = parse(sqlText) { parser => -astBuilder.visitSingleFunctionIdentifier(parser.singleFunctionIdentifier()) + override def parseFunctionIdentifier(sqlText: String): FunctionIdentifier = { +parse(sqlText) { parser => + astBuilder.visitSingleFunctionIdentifier(parser.singleFunctionIdentifier()) +} } /** http://git-wip-us.apache.org/repos/asf/spark/blob/caf39202/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala index db3598b..75240d2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala @@ -17,30 +17,51 @@ package org.apache.spark.sql.catalyst.parser +import org.apache.spark.annotation.DeveloperApi import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier} import org.apache.spark.sql.catalyst.expressions.Expression import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{DataType, StructType} /** * Interface for a parser. */ +@DeveloperApi trait ParserInterface { - /** Creates LogicalPlan for a given SQL string. */ + /** + * Parse a string to a [[LogicalPlan]]. + */ + @throws[ParseException]("Text cannot be parsed to a LogicalPlan") def parsePlan(sqlText: String): LogicalPlan - /** Creates Expression for a given SQL string. */ + /** + * Parse a string to an [[Expression]]. + */ + @throws[ParseException]("Text cannot be parsed to an Expression") def parseExpression(sqlText: String):
[2/2] spark git commit: Preparing development version 2.1.2-SNAPSHOT
Preparing development version 2.1.2-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8460b090 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8460b090 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8460b090 Branch: refs/heads/branch-2.1 Commit: 8460b09054c9aa488df4fd1e7461a75b7a646e4b Parents: 267aca5 Author: Patrick WendellAuthored: Tue Apr 25 16:28:26 2017 -0700 Committer: Patrick Wendell Committed: Tue Apr 25 16:28:26 2017 -0700 -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mesos/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 39 files changed, 40 insertions(+), 40 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8460b090/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 1ceda7b..2d461ca 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.1.1 +Version: 2.1.2 Title: R Frontend for Apache Spark Description: The SparkR package provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/8460b090/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index cc290c0..6e092ef 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.1.1 +2.1.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/8460b090/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index ccf4b27..77a4b64 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.1 +2.1.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/8460b090/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 98a2324..1a2d85a 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.1 +2.1.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/8460b090/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index dc1ad14..7a57e89 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.1 +
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.1.1-rc4 [created] 267aca5bd - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: Preparing Spark release v2.1.1-rc4
Repository: spark Updated Branches: refs/heads/branch-2.1 359382c03 -> 8460b0905 Preparing Spark release v2.1.1-rc4 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/267aca5b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/267aca5b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/267aca5b Branch: refs/heads/branch-2.1 Commit: 267aca5bd5042303a718d10635bc0d1a1596853f Parents: 359382c Author: Patrick WendellAuthored: Tue Apr 25 16:28:22 2017 -0700 Committer: Patrick Wendell Committed: Tue Apr 25 16:28:22 2017 -0700 -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mesos/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- yarn/pom.xml | 2 +- 39 files changed, 40 insertions(+), 40 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/267aca5b/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 2d461ca..1ceda7b 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.1.2 +Version: 2.1.1 Title: R Frontend for Apache Spark Description: The SparkR package provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/267aca5b/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 6e092ef..cc290c0 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.1.2-SNAPSHOT +2.1.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/267aca5b/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 77a4b64..ccf4b27 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.2-SNAPSHOT +2.1.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/267aca5b/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 1a2d85a..98a2324 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.1.2-SNAPSHOT +2.1.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/267aca5b/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 7a57e89..dc1ad14 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@
spark git commit: [SPARK-20239][CORE][2.1-BACKPORT] Improve HistoryServer's ACL mechanism
Repository: spark Updated Branches: refs/heads/branch-2.1 2d47e1aaf -> 359382c03 [SPARK-20239][CORE][2.1-BACKPORT] Improve HistoryServer's ACL mechanism Current SHS (Spark History Server) has two different ACLs: * ACL of base URL, it is controlled by "spark.acls.enabled" or "spark.ui.acls.enabled", and with this enabled, only user configured with "spark.admin.acls" (or group) or "spark.ui.view.acls" (or group), or the user who started SHS could list all the applications, otherwise none of them can be listed. This will also affect REST APIs which listing the summary of all apps and one app. * Per application ACL. This is controlled by "spark.history.ui.acls.enabled". With this enabled only history admin user and user/group who ran this app can access the details of this app. With this two ACLs, we may encounter several unexpected behaviors: 1. if base URL's ACL (`spark.acls.enable`) is enabled but user A has no view permission. User "A" cannot see the app list but could still access details of it's own app. 2. if ACLs of base URL (`spark.acls.enable`) is disabled, then user "A" could download any application's event log, even it is not run by user "A". 3. The changes of Live UI's ACL will affect History UI's ACL which share the same conf file. The unexpected behaviors is mainly because we have two different ACLs, ideally we should have only one to manage all. So to improve SHS's ACL mechanism, here in this PR proposed to: 1. Disable "spark.acls.enable" and only use "spark.history.ui.acls.enable" for history server. 2. Check permission for event-log download REST API. With this PR: 1. Admin user could see/download the list of all applications, as well as application details. 2. Normal user could see the list of all applications, but can only download and check the details of applications accessible to him. New UTs are added, also verified in real cluster. CC tgravescs vanzin please help to review, this PR changes the semantics you did previously. Thanks a lot. Author: jerryshaoCloses #17755 from jerryshao/SPARK-20239-2.1-backport. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/359382c0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/359382c0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/359382c0 Branch: refs/heads/branch-2.1 Commit: 359382c038d5836e95ee3ca871f3d1da5bc08148 Parents: 2d47e1a Author: jerryshao Authored: Tue Apr 25 15:21:12 2017 -0700 Committer: Marcelo Vanzin Committed: Tue Apr 25 15:21:12 2017 -0700 -- .../history/ApplicationHistoryProvider.scala| 4 ++-- .../spark/deploy/history/HistoryServer.scala| 20 +++- .../spark/status/api/v1/ApiRootResource.scala | 18 +++--- .../deploy/history/HistoryServerSuite.scala | 12 +++- 4 files changed, 43 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/359382c0/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala index d7d8280..6d8758a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala @@ -86,7 +86,7 @@ private[history] abstract class ApplicationHistoryProvider { * @return Count of application event logs that are currently under process */ def getEventLogsUnderProcess(): Int = { -return 0; +0 } /** @@ -95,7 +95,7 @@ private[history] abstract class ApplicationHistoryProvider { * @return 0 if this is undefined or unsupported, otherwise the last updated time in millis */ def getLastUpdatedTime(): Long = { -return 0; +0 } /** http://git-wip-us.apache.org/repos/asf/spark/blob/359382c0/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index 7e21fa6..b02992a 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -269,7 +269,7 @@ object HistoryServer extends Logging { Utils.initDaemon(log) new HistoryServerArguments(conf, argStrings) initSecurity() -val securityManager = new
spark git commit: [SPARK-20239][CORE][2.1-BACKPORT] Improve HistoryServer's ACL mechanism
Repository: spark Updated Branches: refs/heads/branch-2.0 ddf6dd88a -> 068500a4a [SPARK-20239][CORE][2.1-BACKPORT] Improve HistoryServer's ACL mechanism Current SHS (Spark History Server) has two different ACLs: * ACL of base URL, it is controlled by "spark.acls.enabled" or "spark.ui.acls.enabled", and with this enabled, only user configured with "spark.admin.acls" (or group) or "spark.ui.view.acls" (or group), or the user who started SHS could list all the applications, otherwise none of them can be listed. This will also affect REST APIs which listing the summary of all apps and one app. * Per application ACL. This is controlled by "spark.history.ui.acls.enabled". With this enabled only history admin user and user/group who ran this app can access the details of this app. With this two ACLs, we may encounter several unexpected behaviors: 1. if base URL's ACL (`spark.acls.enable`) is enabled but user A has no view permission. User "A" cannot see the app list but could still access details of it's own app. 2. if ACLs of base URL (`spark.acls.enable`) is disabled, then user "A" could download any application's event log, even it is not run by user "A". 3. The changes of Live UI's ACL will affect History UI's ACL which share the same conf file. The unexpected behaviors is mainly because we have two different ACLs, ideally we should have only one to manage all. So to improve SHS's ACL mechanism, here in this PR proposed to: 1. Disable "spark.acls.enable" and only use "spark.history.ui.acls.enable" for history server. 2. Check permission for event-log download REST API. With this PR: 1. Admin user could see/download the list of all applications, as well as application details. 2. Normal user could see the list of all applications, but can only download and check the details of applications accessible to him. New UTs are added, also verified in real cluster. CC tgravescs vanzin please help to review, this PR changes the semantics you did previously. Thanks a lot. Author: jerryshaoCloses #17755 from jerryshao/SPARK-20239-2.1-backport. (cherry picked from commit 359382c038d5836e95ee3ca871f3d1da5bc08148) Signed-off-by: Marcelo Vanzin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/068500a4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/068500a4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/068500a4 Branch: refs/heads/branch-2.0 Commit: 068500a4a67d67112dcd012820388ca99df9a011 Parents: ddf6dd8 Author: jerryshao Authored: Tue Apr 25 15:21:12 2017 -0700 Committer: Marcelo Vanzin Committed: Tue Apr 25 15:21:24 2017 -0700 -- .../history/ApplicationHistoryProvider.scala| 4 ++-- .../spark/deploy/history/HistoryServer.scala| 20 +++- .../spark/status/api/v1/ApiRootResource.scala | 18 +++--- .../deploy/history/HistoryServerSuite.scala | 12 +++- 4 files changed, 43 insertions(+), 11 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/068500a4/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala index f3ea541..bc9fa70 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationHistoryProvider.scala @@ -84,7 +84,7 @@ private[history] abstract class ApplicationHistoryProvider { * @return Count of application event logs that are currently under process */ def getEventLogsUnderProcess(): Int = { -return 0; +0 } /** @@ -93,7 +93,7 @@ private[history] abstract class ApplicationHistoryProvider { * @return 0 if this is undefined or unsupported, otherwise the last updated time in millis */ def getLastUpdatedTime(): Long = { -return 0; +0 } /** http://git-wip-us.apache.org/repos/asf/spark/blob/068500a4/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index 996c19e..44ce495 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -261,7 +261,7 @@ object HistoryServer extends Logging { Utils.initDaemon(log)
spark git commit: [SPARK-5484][GRAPHX] Periodically do checkpoint in Pregel
Repository: spark Updated Branches: refs/heads/branch-2.2 55834a898 -> f971ce5dd [SPARK-5484][GRAPHX] Periodically do checkpoint in Pregel ## What changes were proposed in this pull request? Pregel-based iterative algorithms with more than ~50 iterations begin to slow down and eventually fail with a StackOverflowError due to Spark's lack of support for long lineage chains. This PR causes Pregel to checkpoint the graph periodically if the checkpoint directory is set. This PR moves PeriodicGraphCheckpointer.scala from mllib to graphx, moves PeriodicRDDCheckpointer.scala, PeriodicCheckpointer.scala from mllib to core ## How was this patch tested? unit tests, manual tests (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: dingAuthor: dding3 Author: Michael Allman Closes #15125 from dding3/cp2_pregel. (cherry picked from commit 0a7f5f2798b6e8b2ba15e8b3aa07d5953ad1c695) Signed-off-by: Felix Cheung Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f971ce5d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f971ce5d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f971ce5d Branch: refs/heads/branch-2.2 Commit: f971ce5dd0788fe7f5d2ca820b9ea3db72033ddc Parents: 55834a8 Author: ding Authored: Tue Apr 25 11:20:32 2017 -0700 Committer: Felix Cheung Committed: Tue Apr 25 11:20:52 2017 -0700 -- .../main/scala/org/apache/spark/rdd/RDD.scala | 4 +- .../rdd/util/PeriodicRDDCheckpointer.scala | 98 ++ .../spark/util/PeriodicCheckpointer.scala | 193 ++ .../org/apache/spark/rdd/SortingSuite.scala | 2 +- .../util/PeriodicRDDCheckpointerSuite.scala | 175 + docs/configuration.md | 14 ++ docs/graphx-programming-guide.md| 9 +- .../scala/org/apache/spark/graphx/Pregel.scala | 25 ++- .../graphx/util/PeriodicGraphCheckpointer.scala | 105 ++ .../util/PeriodicGraphCheckpointerSuite.scala | 194 +++ .../org/apache/spark/ml/clustering/LDA.scala| 3 +- .../ml/tree/impl/GradientBoostedTrees.scala | 2 +- .../spark/mllib/clustering/LDAOptimizer.scala | 2 +- .../spark/mllib/impl/PeriodicCheckpointer.scala | 183 - .../mllib/impl/PeriodicGraphCheckpointer.scala | 102 -- .../mllib/impl/PeriodicRDDCheckpointer.scala| 97 -- .../impl/PeriodicGraphCheckpointerSuite.scala | 189 -- .../impl/PeriodicRDDCheckpointerSuite.scala | 175 - 18 files changed, 812 insertions(+), 760 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f971ce5d/core/src/main/scala/org/apache/spark/rdd/RDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index e524675..63a87e7 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -41,7 +41,7 @@ import org.apache.spark.partial.GroupedCountEvaluator import org.apache.spark.partial.PartialResult import org.apache.spark.storage.{RDDBlockId, StorageLevel} import org.apache.spark.util.{BoundedPriorityQueue, Utils} -import org.apache.spark.util.collection.OpenHashMap +import org.apache.spark.util.collection.{OpenHashMap, Utils => collectionUtils} import org.apache.spark.util.random.{BernoulliCellSampler, BernoulliSampler, PoissonSampler, SamplingUtils} @@ -1420,7 +1420,7 @@ abstract class RDD[T: ClassTag]( val mapRDDs = mapPartitions { items => // Priority keeps the largest elements, so let's reverse the ordering. val queue = new BoundedPriorityQueue[T](num)(ord.reverse) -queue ++= util.collection.Utils.takeOrdered(items, num)(ord) +queue ++= collectionUtils.takeOrdered(items, num)(ord) Iterator.single(queue) } if (mapRDDs.partitions.length == 0) { http://git-wip-us.apache.org/repos/asf/spark/blob/f971ce5d/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala -- diff --git a/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala new file mode 100644 index 000..ab72add --- /dev/null +++ b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala @@ -0,0 +1,98 @@
spark git commit: [SPARK-5484][GRAPHX] Periodically do checkpoint in Pregel
Repository: spark Updated Branches: refs/heads/master 67eef47ac -> 0a7f5f279 [SPARK-5484][GRAPHX] Periodically do checkpoint in Pregel ## What changes were proposed in this pull request? Pregel-based iterative algorithms with more than ~50 iterations begin to slow down and eventually fail with a StackOverflowError due to Spark's lack of support for long lineage chains. This PR causes Pregel to checkpoint the graph periodically if the checkpoint directory is set. This PR moves PeriodicGraphCheckpointer.scala from mllib to graphx, moves PeriodicRDDCheckpointer.scala, PeriodicCheckpointer.scala from mllib to core ## How was this patch tested? unit tests, manual tests (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) (If this patch involves UI changes, please attach a screenshot; otherwise, remove this) Author: dingAuthor: dding3 Author: Michael Allman Closes #15125 from dding3/cp2_pregel. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a7f5f27 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a7f5f27 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a7f5f27 Branch: refs/heads/master Commit: 0a7f5f2798b6e8b2ba15e8b3aa07d5953ad1c695 Parents: 67eef47 Author: ding Authored: Tue Apr 25 11:20:32 2017 -0700 Committer: Felix Cheung Committed: Tue Apr 25 11:20:32 2017 -0700 -- .../main/scala/org/apache/spark/rdd/RDD.scala | 4 +- .../rdd/util/PeriodicRDDCheckpointer.scala | 98 ++ .../spark/util/PeriodicCheckpointer.scala | 193 ++ .../org/apache/spark/rdd/SortingSuite.scala | 2 +- .../util/PeriodicRDDCheckpointerSuite.scala | 175 + docs/configuration.md | 14 ++ docs/graphx-programming-guide.md| 9 +- .../scala/org/apache/spark/graphx/Pregel.scala | 25 ++- .../graphx/util/PeriodicGraphCheckpointer.scala | 105 ++ .../util/PeriodicGraphCheckpointerSuite.scala | 194 +++ .../org/apache/spark/ml/clustering/LDA.scala| 3 +- .../ml/tree/impl/GradientBoostedTrees.scala | 2 +- .../spark/mllib/clustering/LDAOptimizer.scala | 2 +- .../spark/mllib/impl/PeriodicCheckpointer.scala | 183 - .../mllib/impl/PeriodicGraphCheckpointer.scala | 102 -- .../mllib/impl/PeriodicRDDCheckpointer.scala| 97 -- .../impl/PeriodicGraphCheckpointerSuite.scala | 189 -- .../impl/PeriodicRDDCheckpointerSuite.scala | 175 - 18 files changed, 812 insertions(+), 760 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0a7f5f27/core/src/main/scala/org/apache/spark/rdd/RDD.scala -- diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index e524675..63a87e7 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -41,7 +41,7 @@ import org.apache.spark.partial.GroupedCountEvaluator import org.apache.spark.partial.PartialResult import org.apache.spark.storage.{RDDBlockId, StorageLevel} import org.apache.spark.util.{BoundedPriorityQueue, Utils} -import org.apache.spark.util.collection.OpenHashMap +import org.apache.spark.util.collection.{OpenHashMap, Utils => collectionUtils} import org.apache.spark.util.random.{BernoulliCellSampler, BernoulliSampler, PoissonSampler, SamplingUtils} @@ -1420,7 +1420,7 @@ abstract class RDD[T: ClassTag]( val mapRDDs = mapPartitions { items => // Priority keeps the largest elements, so let's reverse the ordering. val queue = new BoundedPriorityQueue[T](num)(ord.reverse) -queue ++= util.collection.Utils.takeOrdered(items, num)(ord) +queue ++= collectionUtils.takeOrdered(items, num)(ord) Iterator.single(queue) } if (mapRDDs.partitions.length == 0) { http://git-wip-us.apache.org/repos/asf/spark/blob/0a7f5f27/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala -- diff --git a/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala new file mode 100644 index 000..ab72add --- /dev/null +++ b/core/src/main/scala/org/apache/spark/rdd/util/PeriodicRDDCheckpointer.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file
spark git commit: [SPARK-20449][ML] Upgrade breeze version to 0.13.1
Repository: spark Updated Branches: refs/heads/branch-2.2 e2591c6d7 -> 55834a898 [SPARK-20449][ML] Upgrade breeze version to 0.13.1 ## What changes were proposed in this pull request? Upgrade breeze version to 0.13.1, which fixed some critical bugs of L-BFGS-B. ## How was this patch tested? Existing unit tests. Author: Yanbo LiangCloses #17746 from yanboliang/spark-20449. (cherry picked from commit 67eef47acfd26f1f0be3e8ef10453514f3655f62) Signed-off-by: DB Tsai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/55834a89 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/55834a89 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/55834a89 Branch: refs/heads/branch-2.2 Commit: 55834a898547b00bb8de1891fd061651f941aa0b Parents: e2591c6 Author: Yanbo Liang Authored: Tue Apr 25 17:10:41 2017 + Committer: DB Tsai Committed: Tue Apr 25 17:11:06 2017 + -- LICENSE | 1 + .../tests/testthat/test_mllib_classification.R| 10 +- dev/deps/spark-deps-hadoop-2.6| 12 +++- dev/deps/spark-deps-hadoop-2.7| 12 +++- .../regression/GeneralizedLinearRegression.scala | 4 ++-- .../apache/spark/mllib/clustering/LDAModel.scala | 14 -- .../spark/mllib/optimization/LBFGSSuite.scala | 4 ++-- pom.xml | 2 +- python/pyspark/ml/classification.py | 18 -- 9 files changed, 37 insertions(+), 40 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/55834a89/LICENSE -- diff --git a/LICENSE b/LICENSE index 7950dd6..c21032a 100644 --- a/LICENSE +++ b/LICENSE @@ -297,3 +297,4 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (MIT License) RowsGroup (http://datatables.net/license/mit) (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html) (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE) + (MIT License) machinist (https://github.com/typelevel/machinist) http://git-wip-us.apache.org/repos/asf/spark/blob/55834a89/R/pkg/inst/tests/testthat/test_mllib_classification.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index 459254d..af7cbdc 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -288,18 +288,18 @@ test_that("spark.mlp", { c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9)) mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) expect_equal(head(mlpPredictions$prediction, 10), - c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0")) + c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0")) model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights = c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0)) mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) expect_equal(head(mlpPredictions$prediction, 10), - c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0")) + c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0")) model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2) mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) expect_equal(head(mlpPredictions$prediction, 10), - c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "2.0", "1.0", "0.0")) + c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "0.0", "1.0", "0.0")) # Test formula works well df <- suppressWarnings(createDataFrame(iris)) @@ -310,8 +310,8 @@ test_that("spark.mlp", { expect_equal(summary$numOfOutputs, 3) expect_equal(summary$layers, c(4, 3)) expect_equal(length(summary$weights), 15) - expect_equal(head(summary$weights, 5), list(-1.1957257, -5.2693685, 7.4489734, -6.3751413, - -10.2376130), tolerance = 1e-6) + expect_equal(head(summary$weights, 5), list(-0.5793153, -4.652961, 6.216155, -6.649478, + -10.51147), tolerance = 1e-3) }) test_that("spark.naiveBayes", { http://git-wip-us.apache.org/repos/asf/spark/blob/55834a89/dev/deps/spark-deps-hadoop-2.6 -- diff
spark git commit: [SPARK-20449][ML] Upgrade breeze version to 0.13.1
Repository: spark Updated Branches: refs/heads/master 387565cf1 -> 67eef47ac [SPARK-20449][ML] Upgrade breeze version to 0.13.1 ## What changes were proposed in this pull request? Upgrade breeze version to 0.13.1, which fixed some critical bugs of L-BFGS-B. ## How was this patch tested? Existing unit tests. Author: Yanbo LiangCloses #17746 from yanboliang/spark-20449. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/67eef47a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/67eef47a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/67eef47a Branch: refs/heads/master Commit: 67eef47acfd26f1f0be3e8ef10453514f3655f62 Parents: 387565c Author: Yanbo Liang Authored: Tue Apr 25 17:10:41 2017 + Committer: DB Tsai Committed: Tue Apr 25 17:10:41 2017 + -- LICENSE | 1 + .../tests/testthat/test_mllib_classification.R| 10 +- dev/deps/spark-deps-hadoop-2.6| 12 +++- dev/deps/spark-deps-hadoop-2.7| 12 +++- .../regression/GeneralizedLinearRegression.scala | 4 ++-- .../apache/spark/mllib/clustering/LDAModel.scala | 14 -- .../spark/mllib/optimization/LBFGSSuite.scala | 4 ++-- pom.xml | 2 +- python/pyspark/ml/classification.py | 18 -- 9 files changed, 37 insertions(+), 40 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/67eef47a/LICENSE -- diff --git a/LICENSE b/LICENSE index 7950dd6..c21032a 100644 --- a/LICENSE +++ b/LICENSE @@ -297,3 +297,4 @@ The text of each license is also included at licenses/LICENSE-[project].txt. (MIT License) RowsGroup (http://datatables.net/license/mit) (MIT License) jsonFormatter (http://www.jqueryscript.net/other/jQuery-Plugin-For-Pretty-JSON-Formatting-jsonFormatter.html) (MIT License) modernizr (https://github.com/Modernizr/Modernizr/blob/master/LICENSE) + (MIT License) machinist (https://github.com/typelevel/machinist) http://git-wip-us.apache.org/repos/asf/spark/blob/67eef47a/R/pkg/inst/tests/testthat/test_mllib_classification.R -- diff --git a/R/pkg/inst/tests/testthat/test_mllib_classification.R b/R/pkg/inst/tests/testthat/test_mllib_classification.R index 459254d..af7cbdc 100644 --- a/R/pkg/inst/tests/testthat/test_mllib_classification.R +++ b/R/pkg/inst/tests/testthat/test_mllib_classification.R @@ -288,18 +288,18 @@ test_that("spark.mlp", { c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9)) mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) expect_equal(head(mlpPredictions$prediction, 10), - c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0")) + c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0")) model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2, initialWeights = c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0)) mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) expect_equal(head(mlpPredictions$prediction, 10), - c("1.0", "1.0", "1.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0")) + c("1.0", "1.0", "2.0", "1.0", "2.0", "1.0", "2.0", "2.0", "1.0", "0.0")) model <- spark.mlp(df, label ~ features, layers = c(4, 3), maxIter = 2) mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) expect_equal(head(mlpPredictions$prediction, 10), - c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "2.0", "1.0", "0.0")) + c("1.0", "1.0", "1.0", "1.0", "0.0", "1.0", "0.0", "0.0", "1.0", "0.0")) # Test formula works well df <- suppressWarnings(createDataFrame(iris)) @@ -310,8 +310,8 @@ test_that("spark.mlp", { expect_equal(summary$numOfOutputs, 3) expect_equal(summary$layers, c(4, 3)) expect_equal(length(summary$weights), 15) - expect_equal(head(summary$weights, 5), list(-1.1957257, -5.2693685, 7.4489734, -6.3751413, - -10.2376130), tolerance = 1e-6) + expect_equal(head(summary$weights, 5), list(-0.5793153, -4.652961, 6.216155, -6.649478, + -10.51147), tolerance = 1e-3) }) test_that("spark.naiveBayes", { http://git-wip-us.apache.org/repos/asf/spark/blob/67eef47a/dev/deps/spark-deps-hadoop-2.6 -- diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 73dc1f9..9287bd4 100644 ---
spark git commit: [SPARK-18901][FOLLOWUP][ML] Require in LR LogisticAggregator is redundant
Repository: spark Updated Branches: refs/heads/branch-2.2 b62ebd91b -> e2591c6d7 [SPARK-18901][FOLLOWUP][ML] Require in LR LogisticAggregator is redundant ## What changes were proposed in this pull request? This is a follow-up PR of #17478. ## How was this patch tested? Existing tests Author: wangmiao1981Closes #17754 from wangmiao1981/followup. (cherry picked from commit 387565cf14b490810f9479ff3adbf776e2edecdc) Signed-off-by: Yanbo Liang Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e2591c6d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e2591c6d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e2591c6d Branch: refs/heads/branch-2.2 Commit: e2591c6d74081e9edad2e8982c0125a4f1d21437 Parents: b62ebd9 Author: wangmiao1981 Authored: Tue Apr 25 16:30:36 2017 +0800 Committer: Yanbo Liang Committed: Tue Apr 25 16:30:53 2017 +0800 -- .../scala/org/apache/spark/ml/classification/LinearSVC.scala| 5 ++--- .../scala/org/apache/spark/ml/regression/LinearRegression.scala | 5 - 2 files changed, 2 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e2591c6d/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index f76b14e..7507c75 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -458,9 +458,7 @@ private class LinearSVCAggregator( */ def add(instance: Instance): this.type = { instance match { case Instance(label, weight, features) => - require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0") - require(numFeatures == features.size, s"Dimensions mismatch when adding new instance." + -s" Expecting $numFeatures but got ${features.size}.") + if (weight == 0.0) return this val localFeaturesStd = bcFeaturesStd.value val localCoefficients = coefficientsArray @@ -512,6 +510,7 @@ private class LinearSVCAggregator( * @return This LinearSVCAggregator object. */ def merge(other: LinearSVCAggregator): this.type = { + if (other.weightSum != 0.0) { weightSum += other.weightSum lossSum += other.lossSum http://git-wip-us.apache.org/repos/asf/spark/blob/e2591c6d/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index f7e3c8f..eaad549 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -971,9 +971,6 @@ private class LeastSquaresAggregator( */ def add(instance: Instance): this.type = { instance match { case Instance(label, weight, features) => - require(dim == features.size, s"Dimensions mismatch when adding new sample." + -s" Expecting $dim but got ${features.size}.") - require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0") if (weight == 0.0) return this @@ -1005,8 +1002,6 @@ private class LeastSquaresAggregator( * @return This LeastSquaresAggregator object. */ def merge(other: LeastSquaresAggregator): this.type = { -require(dim == other.dim, s"Dimensions mismatch when merging with another " + - s"LeastSquaresAggregator. Expecting $dim but got ${other.dim}.") if (other.weightSum != 0) { totalCnt += other.totalCnt - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18901][FOLLOWUP][ML] Require in LR LogisticAggregator is redundant
Repository: spark Updated Branches: refs/heads/master 0bc7a9021 -> 387565cf1 [SPARK-18901][FOLLOWUP][ML] Require in LR LogisticAggregator is redundant ## What changes were proposed in this pull request? This is a follow-up PR of #17478. ## How was this patch tested? Existing tests Author: wangmiao1981Closes #17754 from wangmiao1981/followup. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/387565cf Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/387565cf Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/387565cf Branch: refs/heads/master Commit: 387565cf14b490810f9479ff3adbf776e2edecdc Parents: 0bc7a90 Author: wangmiao1981 Authored: Tue Apr 25 16:30:36 2017 +0800 Committer: Yanbo Liang Committed: Tue Apr 25 16:30:36 2017 +0800 -- .../scala/org/apache/spark/ml/classification/LinearSVC.scala| 5 ++--- .../scala/org/apache/spark/ml/regression/LinearRegression.scala | 5 - 2 files changed, 2 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/387565cf/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index f76b14e..7507c75 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -458,9 +458,7 @@ private class LinearSVCAggregator( */ def add(instance: Instance): this.type = { instance match { case Instance(label, weight, features) => - require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0") - require(numFeatures == features.size, s"Dimensions mismatch when adding new instance." + -s" Expecting $numFeatures but got ${features.size}.") + if (weight == 0.0) return this val localFeaturesStd = bcFeaturesStd.value val localCoefficients = coefficientsArray @@ -512,6 +510,7 @@ private class LinearSVCAggregator( * @return This LinearSVCAggregator object. */ def merge(other: LinearSVCAggregator): this.type = { + if (other.weightSum != 0.0) { weightSum += other.weightSum lossSum += other.lossSum http://git-wip-us.apache.org/repos/asf/spark/blob/387565cf/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index f7e3c8f..eaad549 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -971,9 +971,6 @@ private class LeastSquaresAggregator( */ def add(instance: Instance): this.type = { instance match { case Instance(label, weight, features) => - require(dim == features.size, s"Dimensions mismatch when adding new sample." + -s" Expecting $dim but got ${features.size}.") - require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0") if (weight == 0.0) return this @@ -1005,8 +1002,6 @@ private class LeastSquaresAggregator( * @return This LeastSquaresAggregator object. */ def merge(other: LeastSquaresAggregator): this.type = { -require(dim == other.dim, s"Dimensions mismatch when merging with another " + - s"LeastSquaresAggregator. Expecting $dim but got ${other.dim}.") if (other.weightSum != 0) { totalCnt += other.totalCnt - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20404][CORE] Using Option(name) instead of Some(name)
Repository: spark Updated Branches: refs/heads/branch-2.2 c18de9c04 -> b62ebd91b [SPARK-20404][CORE] Using Option(name) instead of Some(name) Using Option(name) instead of Some(name) to prevent runtime failures when using accumulators created like the following ``` sparkContext.accumulator(0, null) ``` Author: Sergey ZhemzhitskyCloses #17740 from szhem/SPARK-20404-null-acc-names. (cherry picked from commit 0bc7a90210aad9025c1e1bdc99f8e723c1bf0fbf) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b62ebd91 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b62ebd91 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b62ebd91 Branch: refs/heads/branch-2.2 Commit: b62ebd91bb2c64e1ecef0f2d97db91f5ce32743b Parents: c18de9c Author: Sergey Zhemzhitsky Authored: Tue Apr 25 09:18:36 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:18:44 2017 +0100 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b62ebd91/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 99efc48..0ec1bdd 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1350,7 +1350,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T]) : Accumulator[T] = { -val acc = new Accumulator(initialValue, param, Some(name)) +val acc = new Accumulator(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1379,7 +1379,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T]) : Accumulable[R, T] = { -val acc = new Accumulable(initialValue, param, Some(name)) +val acc = new Accumulable(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1414,7 +1414,7 @@ class SparkContext(config: SparkConf) extends Logging { * @note Accumulators must be registered before use, or it will throw exception. */ def register(acc: AccumulatorV2[_, _], name: String): Unit = { -acc.register(this, name = Some(name)) +acc.register(this, name = Option(name)) } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20404][CORE] Using Option(name) instead of Some(name)
Repository: spark Updated Branches: refs/heads/branch-2.1 65990fc57 -> 2d47e1aaf [SPARK-20404][CORE] Using Option(name) instead of Some(name) Using Option(name) instead of Some(name) to prevent runtime failures when using accumulators created like the following ``` sparkContext.accumulator(0, null) ``` Author: Sergey ZhemzhitskyCloses #17740 from szhem/SPARK-20404-null-acc-names. (cherry picked from commit 0bc7a90210aad9025c1e1bdc99f8e723c1bf0fbf) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2d47e1aa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2d47e1aa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2d47e1aa Branch: refs/heads/branch-2.1 Commit: 2d47e1aaf93fa13c0407d5c0dcca0f7c898e5b94 Parents: 65990fc Author: Sergey Zhemzhitsky Authored: Tue Apr 25 09:18:36 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:18:53 2017 +0100 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2d47e1aa/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 5ae9db7..6e24656 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1275,7 +1275,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T]) : Accumulator[T] = { -val acc = new Accumulator(initialValue, param, Some(name)) +val acc = new Accumulator(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1304,7 +1304,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T]) : Accumulable[R, T] = { -val acc = new Accumulable(initialValue, param, Some(name)) +val acc = new Accumulable(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1339,7 +1339,7 @@ class SparkContext(config: SparkConf) extends Logging { * @note Accumulators must be registered before use, or it will throw exception. */ def register(acc: AccumulatorV2[_, _], name: String): Unit = { -acc.register(this, name = Some(name)) +acc.register(this, name = Option(name)) } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20404][CORE] Using Option(name) instead of Some(name)
Repository: spark Updated Branches: refs/heads/master c8f121951 -> 0bc7a9021 [SPARK-20404][CORE] Using Option(name) instead of Some(name) Using Option(name) instead of Some(name) to prevent runtime failures when using accumulators created like the following ``` sparkContext.accumulator(0, null) ``` Author: Sergey ZhemzhitskyCloses #17740 from szhem/SPARK-20404-null-acc-names. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0bc7a902 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0bc7a902 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0bc7a902 Branch: refs/heads/master Commit: 0bc7a90210aad9025c1e1bdc99f8e723c1bf0fbf Parents: c8f1219 Author: Sergey Zhemzhitsky Authored: Tue Apr 25 09:18:36 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:18:36 2017 +0100 -- core/src/main/scala/org/apache/spark/SparkContext.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0bc7a902/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 99efc48..0ec1bdd 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -1350,7 +1350,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulator[T](initialValue: T, name: String)(implicit param: AccumulatorParam[T]) : Accumulator[T] = { -val acc = new Accumulator(initialValue, param, Some(name)) +val acc = new Accumulator(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1379,7 +1379,7 @@ class SparkContext(config: SparkConf) extends Logging { @deprecated("use AccumulatorV2", "2.0.0") def accumulable[R, T](initialValue: R, name: String)(implicit param: AccumulableParam[R, T]) : Accumulable[R, T] = { -val acc = new Accumulable(initialValue, param, Some(name)) +val acc = new Accumulable(initialValue, param, Option(name)) cleaner.foreach(_.registerAccumulatorForCleanup(acc.newAcc)) acc } @@ -1414,7 +1414,7 @@ class SparkContext(config: SparkConf) extends Logging { * @note Accumulators must be registered before use, or it will throw exception. */ def register(acc: AccumulatorV2[_, _], name: String): Unit = { -acc.register(this, name = Some(name)) +acc.register(this, name = Option(name)) } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20455][DOCS] Fix Broken Docker IT Docs
Repository: spark Updated Branches: refs/heads/branch-2.1 427966597 -> 65990fc57 [SPARK-20455][DOCS] Fix Broken Docker IT Docs ## What changes were proposed in this pull request? Just added the Maven `test`goal. ## How was this patch tested? No test needed, just a trivial documentation fix. Author: Armin BraunCloses #17756 from original-brownbear/SPARK-20455. (cherry picked from commit c8f1219510f469935aa9ff0b1c92cfe20372377c) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/65990fc5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/65990fc5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/65990fc5 Branch: refs/heads/branch-2.1 Commit: 65990fc5708b35cf53b3582c146a4de5ece1da3c Parents: 4279665 Author: Armin Braun Authored: Tue Apr 25 09:13:50 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:14:10 2017 +0100 -- docs/building-spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/65990fc5/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index 0945095..33ff80e 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -302,7 +302,7 @@ Once installed, the `docker` service needs to be started, if not already running On Linux, this can be done by `sudo service docker start`. ./build/mvn install -DskipTests -./build/mvn -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 +./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 or - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20455][DOCS] Fix Broken Docker IT Docs
Repository: spark Updated Branches: refs/heads/branch-2.2 fb59a1954 -> c18de9c04 [SPARK-20455][DOCS] Fix Broken Docker IT Docs ## What changes were proposed in this pull request? Just added the Maven `test`goal. ## How was this patch tested? No test needed, just a trivial documentation fix. Author: Armin BraunCloses #17756 from original-brownbear/SPARK-20455. (cherry picked from commit c8f1219510f469935aa9ff0b1c92cfe20372377c) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c18de9c0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c18de9c0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c18de9c0 Branch: refs/heads/branch-2.2 Commit: c18de9c045aaf7d17113f87a6b2146811b4af0eb Parents: fb59a19 Author: Armin Braun Authored: Tue Apr 25 09:13:50 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:13:58 2017 +0100 -- docs/building-spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c18de9c0/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index e99b70f..0f551bc 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -232,7 +232,7 @@ Once installed, the `docker` service needs to be started, if not already running On Linux, this can be done by `sudo service docker start`. ./build/mvn install -DskipTests -./build/mvn -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 +./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 or - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20455][DOCS] Fix Broken Docker IT Docs
Repository: spark Updated Branches: refs/heads/master 31345fde8 -> c8f121951 [SPARK-20455][DOCS] Fix Broken Docker IT Docs ## What changes were proposed in this pull request? Just added the Maven `test`goal. ## How was this patch tested? No test needed, just a trivial documentation fix. Author: Armin BraunCloses #17756 from original-brownbear/SPARK-20455. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8f12195 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8f12195 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8f12195 Branch: refs/heads/master Commit: c8f1219510f469935aa9ff0b1c92cfe20372377c Parents: 31345fd Author: Armin Braun Authored: Tue Apr 25 09:13:50 2017 +0100 Committer: Sean Owen Committed: Tue Apr 25 09:13:50 2017 +0100 -- docs/building-spark.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8f12195/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index e99b70f..0f551bc 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -232,7 +232,7 @@ Once installed, the `docker` service needs to be started, if not already running On Linux, this can be done by `sudo service docker start`. ./build/mvn install -DskipTests -./build/mvn -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 +./build/mvn test -Pdocker-integration-tests -pl :spark-docker-integration-tests_2.11 or - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[5/5] spark-website git commit: fix conflict
fix conflict Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/09046892 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/09046892 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/09046892 Branch: refs/heads/asf-site Commit: 09046892bf9702dbb48c2b325e05e7c4091164ea Parents: 3c96a50 Author: Stan ZhaiAuthored: Tue Apr 25 15:18:47 2017 +0800 Committer: Stan Zhai Committed: Tue Apr 25 15:18:47 2017 +0800 -- site/developer-tools.html | 49 +- 1 file changed, 1 insertion(+), 48 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/09046892/site/developer-tools.html -- diff --git a/site/developer-tools.html b/site/developer-tools.html index 680e089..d6c4169 100644 --- a/site/developer-tools.html +++ b/site/developer-tools.html @@ -304,7 +304,6 @@ $ build/mvn package -DskipTests -pl core build/mvn test -DwildcardSuites=none -Dtest=org.apache.spark.streaming.JavaAPISuite test -<<< HEAD ScalaTest Issues If the following error occurs when running ScalaTest @@ -337,7 +336,7 @@ your pull request to change testing behavior. This includes: [test-maven] - signals to test the pull request using maven [test-hadoop2.7] - signals to test using Sparks Hadoop 2.7 profile -=== + Binary compatibility To ensure binary compatibility, Spark uses https://github.com/typesafehub/migration-manager;>MiMa. @@ -386,7 +385,6 @@ JIRA number of the issue youre working on as well as its title. updating your pull request. Usually, the problems reported by MiMa are self-explanatory and revolve around missing members (methods or fields) that you will have to add back in order to maintain binary compatibility. ->>> aa1c66e424e024cb2e9f962aae8952bb4ad75cb5 Checking Out Pull Requests @@ -428,51 +426,6 @@ $ build/mvn -DskipTests install $ build/mvn dependency:tree -<<< HEAD -=== - -Running Build Targets For Individual Projects - -$ # sbt -$ build/sbt package -$ # Maven -$ build/mvn package -DskipTests -pl assembly - - -ScalaTest Issues - -If the following error occurs when running ScalaTest - -An internal error occurred during: "Launching XYZSuite.scala". -java.lang.NullPointerException - -It is due to an incorrect Scala library in the classpath. To fix it: - - - Right click on project - Select Build Path | Configure Build Path - Add Library | Scala Library - Remove scala-library-2.10.4.jar - lib_managed\jars - - -In the event of Could not find resource path for Web UI: org/apache/spark/ui/static, -its due to a classpath issue (some classes were probably not compiled). To fix this, it is -sufficient to run a test from the command line: - -build/sbt "test-only org.apache.spark.rdd.SortingSuite" - - -Running Different Test Permutations on Jenkins - -When running tests for a pull request on Jenkins, you can add special phrases to the title of -your pull request to change testing behavior. This includes: - - - [test-maven] - signals to test the pull request using maven - [test-hadoop2.7] - signals to test using Sparks Hadoop 2.7 profile - - ->>> aa1c66e424e024cb2e9f962aae8952bb4ad75cb5 Organizing Imports You can use a https://plugins.jetbrains.com/plugin/7350;>IntelliJ Imports Organizer - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[3/5] spark-website git commit: simplify maven build
simplify maven build Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/9902531e Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/9902531e Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/9902531e Branch: refs/heads/asf-site Commit: 9902531e626ea28f6ab01688091339faaf09389a Parents: 4e45856 Author: Stan ZhaiAuthored: Fri Mar 17 12:33:57 2017 +0800 Committer: Stan Zhai Committed: Fri Mar 17 12:33:57 2017 +0800 -- developer-tools.md| 2 +- site/developer-tools.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/9902531e/developer-tools.md -- diff --git a/developer-tools.md b/developer-tools.md index e712e7d..0723115 100644 --- a/developer-tools.md +++ b/developer-tools.md @@ -62,7 +62,7 @@ $ # or you can build the spark-core module with sbt directly using: $ build/sbt core/package $ # Maven -$ build/mvn package -DskipTests -pl :spark-core_2.11 +$ build/mvn package -DskipTests -pl core ``` http://git-wip-us.apache.org/repos/asf/spark-website/blob/9902531e/site/developer-tools.html -- diff --git a/site/developer-tools.html b/site/developer-tools.html index b46d664..62793ef 100644 --- a/site/developer-tools.html +++ b/site/developer-tools.html @@ -245,7 +245,7 @@ $ # or you can build the spark-core module with sbt directly using: $ build/sbt core/package $ # Maven -$ build/mvn package -DskipTests -pl :spark-core_2.11 +$ build/mvn package -DskipTests -pl core - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/5] spark-website git commit: adjust the content structure to make it more reasonable
adjust the content structure to make it more reasonable Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/4e458563 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/4e458563 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/4e458563 Branch: refs/heads/asf-site Commit: 4e458563361e07e4cfb8286fd0c64a948185271a Parents: 05c9946 Author: Stan ZhaiAuthored: Fri Mar 10 00:45:48 2017 +0800 Committer: Stan Zhai Committed: Fri Mar 10 00:45:48 2017 +0800 -- developer-tools.md| 97 + site/developer-tools.html | 98 +- 2 files changed, 97 insertions(+), 98 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/4e458563/developer-tools.md -- diff --git a/developer-tools.md b/developer-tools.md index e012c8e..e712e7d 100644 --- a/developer-tools.md +++ b/developer-tools.md @@ -48,6 +48,23 @@ builds. This process will auto-start after the first time `build/mvn` is called shut down at any time by running `build/zinc-/bin/zinc -shutdown` and will automatically restart whenever `build/mvn` is called. +Building submodules individually + +For instance, you can build the Spark Core module using: + +``` +$ # sbt +$ build/sbt +> project core +> package + +$ # or you can build the spark-core module with sbt directly using: +$ build/sbt core/package + +$ # Maven +$ build/mvn package -DskipTests -pl :spark-core_2.11 +``` + Running Individual Tests @@ -95,7 +112,6 @@ $ build/sbt "core/testOnly *DAGSchedulerSuite -- -z SPARK-12345" For more about how to run individual tests with sbt, see the [sbt documentation](http://www.scala-sbt.org/0.13/docs/Testing.html). - Testing with Maven With Maven, you can use the `-DwildcardSuites` flag to run individual Scala tests: @@ -112,6 +128,37 @@ To run individual Java tests, you can use the `-Dtest` flag: build/mvn test -DwildcardSuites=none -Dtest=org.apache.spark.streaming.JavaAPISuite test ``` +ScalaTest Issues + +If the following error occurs when running ScalaTest + +``` +An internal error occurred during: "Launching XYZSuite.scala". +java.lang.NullPointerException +``` +It is due to an incorrect Scala library in the classpath. To fix it: + +- Right click on project +- Select `Build Path | Configure Build Path` +- `Add Library | Scala Library` +- Remove `scala-library-2.10.4.jar - lib_managed\jars` + +In the event of "Could not find resource path for Web UI: org/apache/spark/ui/static", +it's due to a classpath issue (some classes were probably not compiled). To fix this, it +sufficient to run a test from the command line: + +``` +build/sbt "test-only org.apache.spark.rdd.SortingSuite" +``` + +Running Different Test Permutations on Jenkins + +When running tests for a pull request on Jenkins, you can add special phrases to the title of +your pull request to change testing behavior. This includes: + +- `[test-maven]` - signals to test the pull request using maven +- `[test-hadoop2.7]` - signals to test using Spark's Hadoop 2.7 profile + Checking Out Pull Requests Git provides a mechanism for fetching remote pull requests into your own local repository. @@ -156,54 +203,6 @@ $ build/mvn -DskipTests install $ build/mvn dependency:tree ``` -Building submodules individually - -For instance, you can build the Spark Core module using: - -``` -$ # sbt -$ build/sbt -> project core -> package - -$ # or you can build the spark-core module with sbt directly using: -$ build/sbt core/package - -$ # Maven -$ build/mvn package -DskipTests -pl :spark-core_2.11 -``` - -ScalaTest Issues - -If the following error occurs when running ScalaTest - -``` -An internal error occurred during: "Launching XYZSuite.scala". -java.lang.NullPointerException -``` -It is due to an incorrect Scala library in the classpath. To fix it: - -- Right click on project -- Select `Build Path | Configure Build Path` -- `Add Library | Scala Library` -- Remove `scala-library-2.10.4.jar - lib_managed\jars` - -In the event of "Could not find resource path for Web UI: org/apache/spark/ui/static", -it's due to a classpath issue (some classes were probably not compiled). To fix this, it -sufficient to run a test from the command line: - -``` -build/sbt "test-only org.apache.spark.rdd.SortingSuite" -``` - -Running Different Test Permutations on Jenkins - -When running tests for a pull request on Jenkins, you can add special phrases to the title of -your pull request to change testing behavior. This includes: - -- `[test-maven]` - signals to test the pull request using maven -- `[test-hadoop2.7]` - signals to test using Spark's
[4/5] spark-website git commit: Merge branch 'asf-site' of https://github.com/apache/spark-website into add-sbt-package
Merge branch 'asf-site' of https://github.com/apache/spark-website into add-sbt-package Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/3c96a509 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/3c96a509 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/3c96a509 Branch: refs/heads/asf-site Commit: 3c96a509cb1e00df716d8f38eb5b214d8778e45d Parents: 9902531 aa1c66e Author: Stan ZhaiAuthored: Tue Apr 25 15:17:57 2017 +0800 Committer: Stan Zhai Committed: Tue Apr 25 15:17:57 2017 +0800 -- _layouts/global.html| 1 + community.md| 6 + contributing.md | 4 + developer-tools.md | 57 +++- improvement-proposals.md| 91 ++ ...3-31-spark-summit-june-2017-agenda-posted.md | 15 + site/committers.html| 7 +- site/community.html | 13 +- site/contributing.html | 11 +- site/developer-tools.html | 106 ++- site/documentation.html | 7 +- site/downloads.html | 7 +- site/examples.html | 7 +- site/faq.html | 7 +- site/graphx/index.html | 7 +- site/improvement-proposals.html | 295 +++ site/index.html | 7 +- site/mailing-lists.html | 7 +- site/mllib/index.html | 7 +- site/news/amp-camp-2013-registration-ope.html | 7 +- .../news/announcing-the-first-spark-summit.html | 7 +- .../news/fourth-spark-screencast-published.html | 7 +- site/news/index.html| 16 +- site/news/nsdi-paper.html | 7 +- site/news/one-month-to-spark-summit-2015.html | 7 +- .../proposals-open-for-spark-summit-east.html | 7 +- ...registration-open-for-spark-summit-east.html | 7 +- .../news/run-spark-and-shark-on-amazon-emr.html | 7 +- site/news/spark-0-6-1-and-0-5-2-released.html | 7 +- site/news/spark-0-6-2-released.html | 7 +- site/news/spark-0-7-0-released.html | 7 +- site/news/spark-0-7-2-released.html | 7 +- site/news/spark-0-7-3-released.html | 7 +- site/news/spark-0-8-0-released.html | 7 +- site/news/spark-0-8-1-released.html | 7 +- site/news/spark-0-9-0-released.html | 7 +- site/news/spark-0-9-1-released.html | 7 +- site/news/spark-0-9-2-released.html | 7 +- site/news/spark-1-0-0-released.html | 7 +- site/news/spark-1-0-1-released.html | 7 +- site/news/spark-1-0-2-released.html | 7 +- site/news/spark-1-1-0-released.html | 7 +- site/news/spark-1-1-1-released.html | 7 +- site/news/spark-1-2-0-released.html | 7 +- site/news/spark-1-2-1-released.html | 7 +- site/news/spark-1-2-2-released.html | 7 +- site/news/spark-1-3-0-released.html | 7 +- site/news/spark-1-4-0-released.html | 7 +- site/news/spark-1-4-1-released.html | 7 +- site/news/spark-1-5-0-released.html | 7 +- site/news/spark-1-5-1-released.html | 7 +- site/news/spark-1-5-2-released.html | 7 +- site/news/spark-1-6-0-released.html | 7 +- site/news/spark-1-6-1-released.html | 7 +- site/news/spark-1-6-2-released.html | 7 +- site/news/spark-1-6-3-released.html | 7 +- site/news/spark-2-0-0-released.html | 7 +- site/news/spark-2-0-1-released.html | 7 +- site/news/spark-2-0-2-released.html | 7 +- site/news/spark-2-1-0-released.html | 7 +- site/news/spark-2.0.0-preview.html | 7 +- .../spark-accepted-into-apache-incubator.html | 7 +- site/news/spark-and-shark-in-the-news.html | 7 +- site/news/spark-becomes-tlp.html| 7 +- site/news/spark-featured-in-wired.html | 7 +- .../spark-mailing-lists-moving-to-apache.html | 7 +- site/news/spark-meetups.html| 7 +- site/news/spark-screencasts-published.html | 7 +- site/news/spark-summit-2013-is-a-wrap.html | 7 +- site/news/spark-summit-2014-videos-posted.html | 7 +- site/news/spark-summit-2015-videos-posted.html | 7 +- site/news/spark-summit-agenda-posted.html | 7 +- .../spark-summit-east-2015-videos-posted.html | 7 +-
[1/5] spark-website git commit: fix an error in the descriptions of `Build Targets For Individual Projects`
Repository: spark-website Updated Branches: refs/heads/asf-site aa1c66e42 -> 09046892b fix an error in the descriptions of `Build Targets For Individual Projects` Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/05c99469 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/05c99469 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/05c99469 Branch: refs/heads/asf-site Commit: 05c99469b15a6039df855d8619972c4db1a3e663 Parents: c1b9ad3 Author: Stan ZhaiAuthored: Fri Mar 10 00:33:52 2017 +0800 Committer: Stan Zhai Committed: Fri Mar 10 00:33:52 2017 +0800 -- developer-tools.md| 16 site/developer-tools.html | 16 2 files changed, 24 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/05c99469/developer-tools.md -- diff --git a/developer-tools.md b/developer-tools.md index 88f3f36..e012c8e 100644 --- a/developer-tools.md +++ b/developer-tools.md @@ -48,6 +48,7 @@ builds. This process will auto-start after the first time `build/mvn` is called shut down at any time by running `build/zinc-/bin/zinc -shutdown` and will automatically restart whenever `build/mvn` is called. + Running Individual Tests When developing locally, it's often convenient to run a single test or a few tests, rather than running the entire test suite. @@ -155,14 +156,21 @@ $ build/mvn -DskipTests install $ build/mvn dependency:tree ``` - -Running Build Targets For Individual Projects +Building submodules individually + +For instance, you can build the Spark Core module using: ``` $ # sbt -$ build/sbt package +$ build/sbt +> project core +> package + +$ # or you can build the spark-core module with sbt directly using: +$ build/sbt core/package + $ # Maven -$ build/mvn package -DskipTests -pl assembly +$ build/mvn package -DskipTests -pl :spark-core_2.11 ``` ScalaTest Issues http://git-wip-us.apache.org/repos/asf/spark-website/blob/05c99469/site/developer-tools.html -- diff --git a/site/developer-tools.html b/site/developer-tools.html index 615adea..1cbe7bb 100644 --- a/site/developer-tools.html +++ b/site/developer-tools.html @@ -232,6 +232,7 @@ builds. This process will auto-start after the first time build/mvn shut down at any time by running build/zinc-version/bin/zinc -shutdown and will automatically restart whenever build/mvn is called. + Running Individual Tests When developing locally, its often convenient to run a single test or a few tests, rather than running the entire test suite. @@ -326,13 +327,20 @@ $ build/mvn -DskipTests install $ build/mvn dependency:tree - -Running Build Targets For Individual Projects +Building submodules individually + +For instance, you can build the Spark Core module using: $ # sbt -$ build/sbt package +$ build/sbt + project core + package + +$ # or you can build the spark-core module with sbt directly using: +$ build/sbt core/package + $ # Maven -$ build/mvn package -DskipTests -pl assembly +$ build/mvn package -DskipTests -pl :spark-core_2.11 ScalaTest Issues - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org